#!/usr/bin/env/ python # This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar' # and generates the pdf using weasyprint. # Copyright (C) 2021, Anais Berck # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details: . import textwrap from io import StringIO from os.path import dirname import re import datetime import os from fcntl import lockf, LOCK_EX, LOCK_UN from flask import Flask, Response, render_template, request, send_file from weasyprint import HTML from la_distancia_de_levenshtein_lee_a_cortazar import ( calculate_distances, find_nearest_species, generate_in_between_species, generate_new_fragment, openfile, print_map, print_table, sort_distances) BASEURL = '/levenshtein' COUNTER_PATH = 'edition_counter.txt' def strip_but_spaces_and_words (text ): return re.sub(r'[^\w\s\d]', '', text) def wrap (text, width): return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()]) def read_sources (*paths): return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ] def get_edition_count(): fd = os.open(COUNTER_PATH, os.O_RDWR|os.O_CREAT) lockf(fd, LOCK_EX) fo = os.fdopen(fd, 'r+', encoding='utf-8') content = fo.read() if not content: edition_count = 0 else: edition_count = int(content.strip()) edition_count += 1 fo.seek(0) fo.truncate() fo.write(str(edition_count)) fo.flush() lockf(fd, LOCK_UN) os.close(fd) return edition_count app = Flask(__name__) trees = [] ## 1A. Open textfiles & turn textfiles in machine readable lists # Cortazar txt = 'eucalipto_cortazar.txt' all_plural = openfile('arboles_plural.txt') all_simple = openfile('arboles_simple.txt') ## 1B. Turn list of trees into dictionary of single/plural words trees = dict(zip(all_simple, all_plural)) ## 2. HOMEPAGE ## laod fragment Cortazar with open(txt, 'r') as source: fragment = source.read() # cover_distances = sort_distances(calculate_distances('eucalipto', all_simple)) fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower()) fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words)) @app.route('/') def index(): return render_template('index.html', trees=trees, BASEURL=BASEURL) @app.route('{}/generate'.format(BASEURL), methods=['POST']) def generate(): edition_count = get_edition_count() new_main_tree = str(request.form['selected_tree']) if new_main_tree in all_simple: # Generate map for the cover # cover_map = StringIO() # print_map(cover_distances, show_distances=False, file_out=cover_map) fragment_cover_map = StringIO() print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map) ## 3. Count the similarity between the main tree and the other species in the forest similarities = calculate_distances(new_main_tree, all_simple) ## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts sorted_similarities = sort_distances(similarities) # Find the nearest species near_species, nearest_species = find_nearest_species(sorted_similarities, trees) new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species) ## 5. Compare the similarity between the main character tree and the main species in the forest, repetitive_poetry = StringIO() in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry) forest_map = StringIO() print_map(sorted_similarities, file_out=forest_map) table_of_intermediary_species = StringIO() print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species) now = datetime.datetime.now() context = { 'edition_count': edition_count, # 'cover_map': cover_map.getvalue(), 'date': now.strftime('%d-%m-%Y'), 'time': now.strftime('%H:%M:%S'), 'fragment_cover_map': fragment_cover_map.getvalue(), 'forest_map': forest_map.getvalue(), 'new_fragment': wrap(new_fragment, 85), 'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55), 'table_of_intermediary_species': table_of_intermediary_species.getvalue(), 'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'), 'BASEDIR': dirname(__file__) } raw_html = render_template('print.html', **context) pdf = HTML(string=raw_html).write_pdf() repetitive_poetry.close() forest_map.close() table_of_intermediary_species.close() # return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True) r = Response(pdf, mimetype='application/pdf') r.headers.extend({ 'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count) }) return r return '500'