You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
162 lines
5.3 KiB
Python
162 lines
5.3 KiB
Python
4 years ago
|
#!/usr/bin/env/ python
|
||
|
|
||
|
# This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar'
|
||
|
# and generates the pdf using weasyprint.
|
||
|
|
||
|
# Copyright (C) 2021, Anais Berck
|
||
|
# This program is free software: you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation, either version 3 of the License, or
|
||
|
# (at your option) any later version.
|
||
|
|
||
|
# This program is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details: <http://www.gnu.org/licenses/>.
|
||
|
|
||
|
import textwrap
|
||
|
from io import StringIO
|
||
|
from os.path import dirname
|
||
|
import re
|
||
|
import datetime
|
||
|
|
||
|
import os
|
||
|
from fcntl import lockf, LOCK_EX, LOCK_UN
|
||
|
|
||
|
from flask import Flask, Response, render_template, request, send_file
|
||
|
from weasyprint import HTML
|
||
|
|
||
|
from la_distancia_de_levenshtein_lee_a_cortazar import (
|
||
|
calculate_distances, find_nearest_species, generate_in_between_species,
|
||
|
generate_new_fragment, openfile, print_map, print_table, sort_distances)
|
||
|
|
||
|
BASEURL = '/levenshtein'
|
||
|
COUNTER_PATH = 'edition_counter.txt'
|
||
|
|
||
|
def strip_but_spaces_and_words (text ):
|
||
|
return re.sub(r'[^\w\s\d]', '', text)
|
||
|
|
||
|
def wrap (text, width):
|
||
|
return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])
|
||
|
|
||
|
def read_sources (*paths):
|
||
|
return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ]
|
||
|
|
||
|
def get_edition_count():
|
||
|
fd = os.open(COUNTER_PATH, os.O_RDWR|os.O_CREAT)
|
||
|
lockf(fd, LOCK_EX)
|
||
|
fo = os.fdopen(fd, 'r+', encoding='utf-8')
|
||
|
content = fo.read()
|
||
|
if not content:
|
||
|
edition_count = 0
|
||
|
else:
|
||
|
edition_count = int(content.strip())
|
||
|
edition_count += 1
|
||
|
fo.seek(0)
|
||
|
fo.truncate()
|
||
|
fo.write(str(edition_count))
|
||
|
fo.flush()
|
||
|
lockf(fd, LOCK_UN)
|
||
|
os.close(fd)
|
||
|
|
||
|
return edition_count
|
||
|
|
||
|
|
||
|
app = Flask(__name__)
|
||
|
trees = []
|
||
|
|
||
|
## 1A. Open textfiles & turn textfiles in machine readable lists
|
||
|
|
||
|
# Cortazar
|
||
|
txt = 'eucalipto_cortazar.txt'
|
||
|
|
||
|
all_plural = openfile('arboles_plural.txt')
|
||
|
all_simple = openfile('arboles_simple.txt')
|
||
|
|
||
|
## 1B. Turn list of trees into dictionary of single/plural words
|
||
|
trees = dict(zip(all_simple, all_plural))
|
||
|
|
||
|
## 2. HOMEPAGE
|
||
|
## laod fragment Cortazar
|
||
|
with open(txt, 'r') as source:
|
||
|
fragment = source.read()
|
||
|
|
||
|
# cover_distances = sort_distances(calculate_distances('eucalipto', all_simple))
|
||
|
|
||
|
fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower())
|
||
|
fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words))
|
||
|
|
||
|
@app.route('/')
|
||
|
def index():
|
||
|
return render_template('index.html', trees=trees, BASEURL=BASEURL)
|
||
|
|
||
|
@app.route('{}/generate'.format(BASEURL), methods=['POST'])
|
||
|
def generate():
|
||
|
edition_count = get_edition_count()
|
||
|
new_main_tree = str(request.form['selected_tree'])
|
||
|
|
||
|
if new_main_tree in all_simple:
|
||
|
|
||
|
# Generate map for the cover
|
||
|
# cover_map = StringIO()
|
||
|
# print_map(cover_distances, show_distances=False, file_out=cover_map)
|
||
|
|
||
|
fragment_cover_map = StringIO()
|
||
|
print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map)
|
||
|
|
||
|
## 3. Count the similarity between the main tree and the other species in the forest
|
||
|
similarities = calculate_distances(new_main_tree, all_simple)
|
||
|
|
||
|
## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts
|
||
|
sorted_similarities = sort_distances(similarities)
|
||
|
|
||
|
# Find the nearest species
|
||
|
near_species, nearest_species = find_nearest_species(sorted_similarities, trees)
|
||
|
|
||
|
new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)
|
||
|
|
||
|
## 5. Compare the similarity between the main character tree and the main species in the forest,
|
||
|
repetitive_poetry = StringIO()
|
||
|
in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry)
|
||
|
|
||
|
forest_map = StringIO()
|
||
|
print_map(sorted_similarities, file_out=forest_map)
|
||
|
|
||
|
table_of_intermediary_species = StringIO()
|
||
|
print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species)
|
||
|
|
||
|
now = datetime.datetime.now()
|
||
|
|
||
|
context = {
|
||
|
'edition_count': edition_count,
|
||
|
# 'cover_map': cover_map.getvalue(),
|
||
|
'date': now.strftime('%d-%m-%Y'),
|
||
|
'time': now.strftime('%H:%M:%S'),
|
||
|
'fragment_cover_map': fragment_cover_map.getvalue(),
|
||
|
'forest_map': forest_map.getvalue(),
|
||
|
'new_fragment': wrap(new_fragment, 85),
|
||
|
'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55),
|
||
|
'table_of_intermediary_species': table_of_intermediary_species.getvalue(),
|
||
|
'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'),
|
||
|
'BASEDIR': dirname(__file__)
|
||
|
}
|
||
|
|
||
|
raw_html = render_template('print.html', **context)
|
||
|
pdf = HTML(string=raw_html).write_pdf()
|
||
|
|
||
|
repetitive_poetry.close()
|
||
|
forest_map.close()
|
||
|
table_of_intermediary_species.close()
|
||
|
|
||
|
# return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True)
|
||
|
|
||
|
r = Response(pdf, mimetype='application/pdf')
|
||
|
|
||
|
r.headers.extend({
|
||
|
'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count)
|
||
|
})
|
||
|
|
||
|
return r
|
||
|
|
||
|
return '500'
|