You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

162 lines
5.3 KiB
Python

#!/usr/bin/env/ python
# This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar'
# and generates the pdf using weasyprint.
# Copyright (C) 2021, Anais Berck
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details: <http://www.gnu.org/licenses/>.
import textwrap
from io import StringIO
from os.path import dirname
import re
import datetime
import os
from fcntl import lockf, LOCK_EX, LOCK_UN
from flask import Flask, Response, render_template, request, send_file
from weasyprint import HTML
from la_distancia_de_levenshtein_lee_a_cortazar import (
calculate_distances, find_nearest_species, generate_in_between_species,
generate_new_fragment, openfile, print_map, print_table, sort_distances)
BASEURL = '/levenshtein'
COUNTER_PATH = 'edition_counter.txt'
def strip_but_spaces_and_words (text ):
return re.sub(r'[^\w\s\d]', '', text)
def wrap (text, width):
return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])
def read_sources (*paths):
return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ]
def get_edition_count():
fd = os.open(COUNTER_PATH, os.O_RDWR|os.O_CREAT)
lockf(fd, LOCK_EX)
fo = os.fdopen(fd, 'r+', encoding='utf-8')
content = fo.read()
if not content:
edition_count = 0
else:
edition_count = int(content.strip())
edition_count += 1
fo.seek(0)
fo.truncate()
fo.write(str(edition_count))
fo.flush()
lockf(fd, LOCK_UN)
os.close(fd)
return edition_count
app = Flask(__name__)
trees = []
## 1A. Open textfiles & turn textfiles in machine readable lists
# Cortazar
txt = 'eucalipto_cortazar.txt'
all_plural = openfile('arboles_plural.txt')
all_simple = openfile('arboles_simple.txt')
## 1B. Turn list of trees into dictionary of single/plural words
trees = dict(zip(all_simple, all_plural))
## 2. HOMEPAGE
## laod fragment Cortazar
with open(txt, 'r') as source:
fragment = source.read()
# cover_distances = sort_distances(calculate_distances('eucalipto', all_simple))
fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower())
fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words))
@app.route('/')
def index():
return render_template('index.html', trees=trees, BASEURL=BASEURL)
@app.route('{}/generate'.format(BASEURL), methods=['POST'])
def generate():
edition_count = get_edition_count()
new_main_tree = str(request.form['selected_tree'])
if new_main_tree in all_simple:
# Generate map for the cover
# cover_map = StringIO()
# print_map(cover_distances, show_distances=False, file_out=cover_map)
fragment_cover_map = StringIO()
print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map)
## 3. Count the similarity between the main tree and the other species in the forest
similarities = calculate_distances(new_main_tree, all_simple)
## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts
sorted_similarities = sort_distances(similarities)
# Find the nearest species
near_species, nearest_species = find_nearest_species(sorted_similarities, trees)
new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)
## 5. Compare the similarity between the main character tree and the main species in the forest,
repetitive_poetry = StringIO()
in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry)
forest_map = StringIO()
print_map(sorted_similarities, file_out=forest_map)
table_of_intermediary_species = StringIO()
print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species)
now = datetime.datetime.now()
context = {
'edition_count': edition_count,
# 'cover_map': cover_map.getvalue(),
'date': now.strftime('%d-%m-%Y'),
'time': now.strftime('%H:%M:%S'),
'fragment_cover_map': fragment_cover_map.getvalue(),
'forest_map': forest_map.getvalue(),
'new_fragment': wrap(new_fragment, 85),
'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55),
'table_of_intermediary_species': table_of_intermediary_species.getvalue(),
'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'),
'BASEDIR': dirname(__file__)
}
raw_html = render_template('print.html', **context)
pdf = HTML(string=raw_html).write_pdf()
repetitive_poetry.close()
forest_map.close()
table_of_intermediary_species.close()
# return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True)
r = Response(pdf, mimetype='application/pdf')
r.headers.extend({
'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count)
})
return r
return '500'