From 4fd0ac3c34a6d43933b6e229c272fb05581730f3 Mon Sep 17 00:00:00 2001 From: Gijs Date: Mon, 4 Oct 2021 20:55:00 +0200 Subject: [PATCH] Started an english version --- scripts/app.py | 153 +++++++++++++++++++++++++++++++-- scripts/medialab.py | 11 ++- scripts/paseo.py | 4 +- scripts/templates/book.html | 2 +- scripts/templates/book_en.html | 2 +- 5 files changed, 156 insertions(+), 16 deletions(-) diff --git a/scripts/app.py b/scripts/app.py index 1b5320c..d2cf258 100644 --- a/scripts/app.py +++ b/scripts/app.py @@ -1,9 +1,23 @@ #!/usr/bin/env/ python -from flask import Flask, render_template, request, Response +# Copyright (C) 2021, Anais Berck +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details: . + +from flask import Flask, render_template, request, Response, session # from weasyprint import HTML from pagedjs import make_pdf -from settings import DEBUG, BASEURL +from settings import DEBUG, BASEURL, DEFAULT_LANGUAGE, SECRET_KEY + +import os +from fcntl import lockf, LOCK_EX, LOCK_UN # Spacy tries to import CUDA, do not break when it fails try: @@ -16,17 +30,89 @@ import os.path basepath = os.path.dirname(__file__) app = Flask(__name__) +app.secret_key = SECRET_KEY # Book HTML is loaded through filesystem, in a tmp dir, make path absolute. PAGEDJS_STATIC_DIR = os.path.join(basepath, 'static') +COUNTER_PATH_ES = 'edition_counter.txt' +COUNTER_PATH_EN = 'edition_counter_en.txt' + +def get_edition_count_es(): + fd = os.open(COUNTER_PATH_ES, os.O_RDWR|os.O_CREAT) + lockf(fd, LOCK_EX) + fo = os.fdopen(fd, 'r+', encoding='utf-8') + content = fo.read() + if not content: + edition_count = 0 + else: + edition_count = int(content.strip()) + edition_count += 1 + fo.seek(0) + fo.truncate() + fo.write(str(edition_count)) + fo.flush() + lockf(fd, LOCK_UN) + os.close(fd) + + return edition_count + +def get_edition_count_en(): + fd = os.open(COUNTER_PATH_EN, os.O_RDWR|os.O_CREAT) + lockf(fd, LOCK_EX) + fo = os.fdopen(fd, 'r+', encoding='utf-8') + content = fo.read() + if not content: + edition_count = 0 + else: + edition_count = int(content.strip()) + edition_count += 1 + fo.seek(0) + fo.truncate() + fo.write(str(edition_count)) + fo.flush() + lockf(fd, LOCK_UN) + os.close(fd) + + return edition_count + +def get_language(): + if 'LANGUAGE' in session: + return session['LANGUAGE'] + else: + return DEFAULT_LANGUAGE + +def set_language(language): + session['LANGUAGE'] = language + session.modified = True + +def index_es(): + return render_template('index.html') + +def index_en(): + return render_template('index_en.html') + +@app.route('{}/en'.format(BASEURL)) +def en(): + set_language('en') + return index() + +@app.route('{}/es'.format(BASEURL)) +def es(): + set_language('es') + return index() + @app.route('{}/'.format(BASEURL)) def index(): - return render_template('index.html') + if get_language() == 'es': + return index_es() + else: + return index_en() -@app.route('{}/book'.format(BASEURL), methods=['POST']) -def book(): - fragment = max(0, min(1, int(request.form['fragment']))) +def book_es (): + edition_count = get_edition_count_es() + + fragment = max(0, min(1, int(request.form['fragment']))) first_word = 'un' @@ -39,14 +125,15 @@ def book(): author = 'Benito Pérez Gáldos' # Non breaking spaces title = 'Miau' - path = crear_camino(novel, first_word) + path = crear_camino(novel, first_word, 'es') context = { 'title': title, 'author': author, 'path': path, 'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR, - 'DEBUG': DEBUG + 'DEBUG': DEBUG, + 'edition_count': edition_count, } html = render_template('book.html', **context) @@ -59,7 +146,55 @@ def book(): r = Response(pdf, mimetype='application/pdf') r.headers.extend({ - 'Content-Disposition': 'attachment; filename="Paseo por arboles de madrid.pdf"' + 'Content-Disposition': 'attachment; filename="Paseo por arboles de Madrid.pdf"' }) return r + +def book_en (): + edition_count = get_edition_count_en() + + fragment = max(0, min(1, int(request.form['fragment']))) + + first_word = 'a' + + if fragment == 0: + novel = os.path.join(basepath, '../data/emilia_english.txt') + author = 'Emilia Pardo Bazán' # Non breaking spaces + title = 'The Swan of Vila Morta' # Non breaking spaces + else: + novel = os.path.join(basepath, '../data/benito_english.txt') + author = 'Benito Pérez Gáldos' # Non breaking spaces + title = 'Marianela' + + path = crear_camino(novel, first_word, 'en') + + context = { + 'title': title, + 'author': author, + 'path': path, + 'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR, + 'DEBUG': DEBUG, + 'edition_count': edition_count, + } + + html = render_template('book_en.html', **context) + + if (DEBUG): + return html + else: + pdf = make_pdf(html) + + r = Response(pdf, mimetype='application/pdf') + + r.headers.extend({ + 'Content-Disposition': 'attachment; filename="Walk along the trees of Madrid.pdf"' + }) + + return r +@app.route('{}/book'.format(BASEURL), methods=['POST']) +def book(): + if get_language() == 'es': + return book_es() + else: + return book_en() diff --git a/scripts/medialab.py b/scripts/medialab.py index 3008523..0cf7940 100644 --- a/scripts/medialab.py +++ b/scripts/medialab.py @@ -16,6 +16,7 @@ import random # Cargar tokenizer en español nlp = spacy.load("es_core_news_sm") +nlp_en = spacy.load("en_core_web_sm") #FUNCIONES_________________________________________________________________ @@ -26,7 +27,7 @@ def limpiar_texto(fragmento): fragmento_limpio = ' '.join(fragmento_limpio) return fragmento_limpio -def crear_base_datos(nombre_texto): +def crear_base_datos(nombre_texto, lenguaje='es'): # Abrir el archivo de texto para crear la base de datos archivo = open(nombre_texto, 'r') fragmento = archivo.read() @@ -34,8 +35,12 @@ def crear_base_datos(nombre_texto): fragmento_limpio = limpiar_texto(fragmento) # Tokenización del fragmento de texto - doc = nlp(fragmento_limpio) - doc_len = len(doc) + if lenguaje == 'es': + doc = nlp(fragmento_limpio) + doc_len = len(doc) + else: + doc = nlp_en(fragmento_limpio) + doc_len = len(doc) palabras_arboles = {} #Verbos, sustantivos, adverbios y adjetivos palabras_camino = {} #El resto de palabras diff --git a/scripts/paseo.py b/scripts/paseo.py index e247c76..c164bb9 100644 --- a/scripts/paseo.py +++ b/scripts/paseo.py @@ -77,12 +77,12 @@ def path(word, words_tree, words_path, trees): return itinerary # Genera un camino a partir de un texto y una palabra del texto -def crear_camino(nombre_archivo, palabra_inicial): +def crear_camino(nombre_archivo, palabra_inicial, lenguaje='es'): trees = load_trees_from_json() shuffle(trees) #print("Starting to read text") - (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo) + (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo, lenguaje) #print("Amount of tree words: ", len(palabras_arboles)) diff --git a/scripts/templates/book.html b/scripts/templates/book.html index 7fdd308..41e6cb1 100644 --- a/scripts/templates/book.html +++ b/scripts/templates/book.html @@ -769,7 +769,7 @@ Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.

La copia de este libro es única y el tiraje es por definición infinito.
- Esta copia es el número XXX de copias descargadas.
+ Esta copia es el número {{ edition_count }} de copias descargadas.
Condiciones colectivas de (re)uso (CC4r), 2021
Copyleft con una diferencia: Se le invita a copiar, distribuir y modificar esta obra bajo los términos de la CC4r.

diff --git a/scripts/templates/book_en.html b/scripts/templates/book_en.html index 4c11a3c..906832a 100644 --- a/scripts/templates/book_en.html +++ b/scripts/templates/book_en.html @@ -570,7 +570,7 @@
  • the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.
  • The copy of this book is unique and the print run is by definition infinite.
    - This copy is the XXX number of copies downloaded.

    + This copy is the {{ edition_count }} number of copies downloaded.

    Collective terms of (re)use (CC4r), 2021
    Copyleft with a difference: You are invited to copy, distribute, and modify this work under the terms of the work under the terms of the CC4r.