Levenshtein-Distance-lee-a-.../app.py

#!/usr/bin/env/ python

#    This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar'
#    and generates the pdf using weasyprint.

#    Copyright (C) 2021, Anais Berck
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details: <http://www.gnu.org/licenses/>.

import textwrap
from io import StringIO
from os.path import dirname
import re
import datetime

import os
from fcntl import lockf, LOCK_EX, LOCK_UN

from flask import Flask, Response, render_template, request, send_file
from weasyprint import HTML

from la_distancia_de_levenshtein_lee_a_cortazar import (
    calculate_distances, find_nearest_species, generate_in_between_species,
    generate_new_fragment, openfile, print_map, print_table, sort_distances)

BASEURL = '/levenshtein'
COUNTER_PATH = 'edition_counter.txt'

def strip_but_spaces_and_words (text  ):
	return re.sub(r'[^\w\s\d]', '', text)

def wrap (text, width):
  return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])

def read_sources (*paths):
  return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ]

def get_edition_count():
  fd = os.open(COUNTER_PATH, os.O_RDWR|os.O_CREAT)
  lockf(fd, LOCK_EX)
  fo = os.fdopen(fd, 'r+', encoding='utf-8')
  content = fo.read()
  if not content:
    edition_count = 0
  else:
    edition_count = int(content.strip())
  edition_count += 1
  fo.seek(0)
  fo.truncate()
  fo.write(str(edition_count))
  fo.flush()
  lockf(fd, LOCK_UN)
  os.close(fd)

  return edition_count


app = Flask(__name__)
trees = []

## 1A. Open textfiles & turn textfiles in machine readable lists

# Cortazar
txt = 'eucalipto_cortazar.txt'

all_plural = openfile('arboles_plural.txt')
all_simple = openfile('arboles_simple.txt')

## 1B. Turn list of trees into dictionary of single/plural words
trees = dict(zip(all_simple, all_plural))

## 2. HOMEPAGE
## laod fragment Cortazar
with open(txt, 'r') as source:
  fragment = source.read()

# cover_distances = sort_distances(calculate_distances('eucalipto', all_simple))

fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower())
fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words))

@app.route('/')
def index():
  return render_template('index.html', trees=trees, BASEURL=BASEURL)

@app.route('{}/generate'.format(BASEURL), methods=['POST'])
def generate():
  edition_count = get_edition_count()
  new_main_tree = str(request.form['selected_tree'])

  if new_main_tree in all_simple:

    # Generate map for the cover
    # cover_map = StringIO()
    # print_map(cover_distances, show_distances=False, file_out=cover_map)

    fragment_cover_map = StringIO()
    print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map)

    ## 3. Count the similarity between the main tree and the other species in the forest
    similarities = calculate_distances(new_main_tree, all_simple)

    ## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts
    sorted_similarities = sort_distances(similarities)

    # Find the nearest species
    near_species, nearest_species = find_nearest_species(sorted_similarities, trees)

    new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)

    ## 5. Compare the similarity between the main character tree and the main species in the forest,
    repetitive_poetry = StringIO()
    in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry)

    forest_map = StringIO()
    print_map(sorted_similarities, file_out=forest_map)

    table_of_intermediary_species = StringIO()
    print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species)

    now = datetime.datetime.now()

    context = {
      'edition_count': edition_count,
      # 'cover_map': cover_map.getvalue(),
      'date': now.strftime('%d-%m-%Y'),
      'time': now.strftime('%H:%M:%S'),
      'fragment_cover_map': fragment_cover_map.getvalue(),
      'forest_map': forest_map.getvalue(),
      'new_fragment': wrap(new_fragment, 85),
      'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55),
      'table_of_intermediary_species': table_of_intermediary_species.getvalue(),
      'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'),
      'BASEDIR': dirname(__file__)
    }

    raw_html = render_template('print.html', **context)
    pdf = HTML(string=raw_html).write_pdf()

    repetitive_poetry.close()
    forest_map.close()
    table_of_intermediary_species.close()

    # return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True)

    r = Response(pdf, mimetype='application/pdf')

    r.headers.extend({
      'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count)
    })

    return r

  return '500'