Started an english version

master
Gijs 3 years ago
parent a0e720668f
commit 4fd0ac3c34

@ -1,9 +1,23 @@
#!/usr/bin/env/ python
from flask import Flask, render_template, request, Response
# Copyright (C) 2021, Anais Berck
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details: <http://www.gnu.org/licenses/>.
from flask import Flask, render_template, request, Response, session
# from weasyprint import HTML
from pagedjs import make_pdf
from settings import DEBUG, BASEURL
from settings import DEBUG, BASEURL, DEFAULT_LANGUAGE, SECRET_KEY
import os
from fcntl import lockf, LOCK_EX, LOCK_UN
# Spacy tries to import CUDA, do not break when it fails
try:
@ -16,17 +30,89 @@ import os.path
basepath = os.path.dirname(__file__)
app = Flask(__name__)
app.secret_key = SECRET_KEY
# Book HTML is loaded through filesystem, in a tmp dir, make path absolute.
PAGEDJS_STATIC_DIR = os.path.join(basepath, 'static')
COUNTER_PATH_ES = 'edition_counter.txt'
COUNTER_PATH_EN = 'edition_counter_en.txt'
def get_edition_count_es():
fd = os.open(COUNTER_PATH_ES, os.O_RDWR|os.O_CREAT)
lockf(fd, LOCK_EX)
fo = os.fdopen(fd, 'r+', encoding='utf-8')
content = fo.read()
if not content:
edition_count = 0
else:
edition_count = int(content.strip())
edition_count += 1
fo.seek(0)
fo.truncate()
fo.write(str(edition_count))
fo.flush()
lockf(fd, LOCK_UN)
os.close(fd)
return edition_count
def get_edition_count_en():
fd = os.open(COUNTER_PATH_EN, os.O_RDWR|os.O_CREAT)
lockf(fd, LOCK_EX)
fo = os.fdopen(fd, 'r+', encoding='utf-8')
content = fo.read()
if not content:
edition_count = 0
else:
edition_count = int(content.strip())
edition_count += 1
fo.seek(0)
fo.truncate()
fo.write(str(edition_count))
fo.flush()
lockf(fd, LOCK_UN)
os.close(fd)
return edition_count
def get_language():
if 'LANGUAGE' in session:
return session['LANGUAGE']
else:
return DEFAULT_LANGUAGE
def set_language(language):
session['LANGUAGE'] = language
session.modified = True
def index_es():
return render_template('index.html')
def index_en():
return render_template('index_en.html')
@app.route('{}/en'.format(BASEURL))
def en():
set_language('en')
return index()
@app.route('{}/es'.format(BASEURL))
def es():
set_language('es')
return index()
@app.route('{}/'.format(BASEURL))
def index():
return render_template('index.html')
if get_language() == 'es':
return index_es()
else:
return index_en()
@app.route('{}/book'.format(BASEURL), methods=['POST'])
def book():
fragment = max(0, min(1, int(request.form['fragment'])))
def book_es ():
edition_count = get_edition_count_es()
fragment = max(0, min(1, int(request.form['fragment'])))
first_word = 'un'
@ -39,14 +125,15 @@ def book():
author = 'Benito Pérez Gáldos' # Non breaking spaces
title = 'Miau'
path = crear_camino(novel, first_word)
path = crear_camino(novel, first_word, 'es')
context = {
'title': title,
'author': author,
'path': path,
'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR,
'DEBUG': DEBUG
'DEBUG': DEBUG,
'edition_count': edition_count,
}
html = render_template('book.html', **context)
@ -59,7 +146,55 @@ def book():
r = Response(pdf, mimetype='application/pdf')
r.headers.extend({
'Content-Disposition': 'attachment; filename="Paseo por arboles de madrid.pdf"'
'Content-Disposition': 'attachment; filename="Paseo por arboles de Madrid.pdf"'
})
return r
def book_en ():
edition_count = get_edition_count_en()
fragment = max(0, min(1, int(request.form['fragment'])))
first_word = 'a'
if fragment == 0:
novel = os.path.join(basepath, '../data/emilia_english.txt')
author = 'Emilia Pardo Bazán' # Non breaking spaces
title = 'The Swan of Vila Morta' # Non breaking spaces
else:
novel = os.path.join(basepath, '../data/benito_english.txt')
author = 'Benito Pérez Gáldos' # Non breaking spaces
title = 'Marianela'
path = crear_camino(novel, first_word, 'en')
context = {
'title': title,
'author': author,
'path': path,
'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR,
'DEBUG': DEBUG,
'edition_count': edition_count,
}
html = render_template('book_en.html', **context)
if (DEBUG):
return html
else:
pdf = make_pdf(html)
r = Response(pdf, mimetype='application/pdf')
r.headers.extend({
'Content-Disposition': 'attachment; filename="Walk along the trees of Madrid.pdf"'
})
return r
@app.route('{}/book'.format(BASEURL), methods=['POST'])
def book():
if get_language() == 'es':
return book_es()
else:
return book_en()

@ -16,6 +16,7 @@ import random
# Cargar tokenizer en español
nlp = spacy.load("es_core_news_sm")
nlp_en = spacy.load("en_core_web_sm")
#FUNCIONES_________________________________________________________________
@ -26,7 +27,7 @@ def limpiar_texto(fragmento):
fragmento_limpio = ' '.join(fragmento_limpio)
return fragmento_limpio
def crear_base_datos(nombre_texto):
def crear_base_datos(nombre_texto, lenguaje='es'):
# Abrir el archivo de texto para crear la base de datos
archivo = open(nombre_texto, 'r')
fragmento = archivo.read()
@ -34,8 +35,12 @@ def crear_base_datos(nombre_texto):
fragmento_limpio = limpiar_texto(fragmento)
# Tokenización del fragmento de texto
doc = nlp(fragmento_limpio)
doc_len = len(doc)
if lenguaje == 'es':
doc = nlp(fragmento_limpio)
doc_len = len(doc)
else:
doc = nlp_en(fragmento_limpio)
doc_len = len(doc)
palabras_arboles = {} #Verbos, sustantivos, adverbios y adjetivos
palabras_camino = {} #El resto de palabras

@ -77,12 +77,12 @@ def path(word, words_tree, words_path, trees):
return itinerary
# Genera un camino a partir de un texto y una palabra del texto
def crear_camino(nombre_archivo, palabra_inicial):
def crear_camino(nombre_archivo, palabra_inicial, lenguaje='es'):
trees = load_trees_from_json()
shuffle(trees)
#print("Starting to read text")
(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo, lenguaje)
#print("Amount of tree words: ", len(palabras_arboles))

@ -769,7 +769,7 @@
Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.</li>
</ul>
<p>La copia de este libro es única y el tiraje es por definición infinito.<br>
Esta copia es el número XXX de copias descargadas.<br> <!-- VOEG NUMMER KOPIJ TOE-->
Esta copia es el número {{ edition_count }} de copias descargadas.<br>
Condiciones colectivas de (re)uso (CC4r), 2021<br>Copyleft con una diferencia: Se le
invita a copiar, distribuir y modificar esta obra bajo los términos de la <a
href="https://gitlab.constantvzw.org/unbound/cc4r">CC4r</a>.</p>

@ -570,7 +570,7 @@
<li>the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.</li>
</ul>
<p>The copy of this book is unique and the print run is by definition infinite.<br>
This copy is the XXX number of copies downloaded. </p> <!-- VOEG NUMMER KOPIJ TOE-->
This copy is the {{ edition_count }} number of copies downloaded. </p>
<p>Collective terms of (re)use (CC4r), 2021<br>Copyleft with a difference: You are invited to copy, distribute, and modify this work under the terms of the work under the terms of the <a href="https://gitlab.constantvzw.org/unbound/cc4r">CC4r</a>.</p>
</section>

Loading…
Cancel
Save