From 4fd0ac3c34a6d43933b6e229c272fb05581730f3 Mon Sep 17 00:00:00 2001
From: Gijs
Date: Mon, 4 Oct 2021 20:55:00 +0200
Subject: [PATCH] Started an english version
---
scripts/app.py | 153 +++++++++++++++++++++++++++++++--
scripts/medialab.py | 11 ++-
scripts/paseo.py | 4 +-
scripts/templates/book.html | 2 +-
scripts/templates/book_en.html | 2 +-
5 files changed, 156 insertions(+), 16 deletions(-)
diff --git a/scripts/app.py b/scripts/app.py
index 1b5320c..d2cf258 100644
--- a/scripts/app.py
+++ b/scripts/app.py
@@ -1,9 +1,23 @@
#!/usr/bin/env/ python
-from flask import Flask, render_template, request, Response
+# Copyright (C) 2021, Anais Berck
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details: .
+
+from flask import Flask, render_template, request, Response, session
# from weasyprint import HTML
from pagedjs import make_pdf
-from settings import DEBUG, BASEURL
+from settings import DEBUG, BASEURL, DEFAULT_LANGUAGE, SECRET_KEY
+
+import os
+from fcntl import lockf, LOCK_EX, LOCK_UN
# Spacy tries to import CUDA, do not break when it fails
try:
@@ -16,17 +30,89 @@ import os.path
basepath = os.path.dirname(__file__)
app = Flask(__name__)
+app.secret_key = SECRET_KEY
# Book HTML is loaded through filesystem, in a tmp dir, make path absolute.
PAGEDJS_STATIC_DIR = os.path.join(basepath, 'static')
+COUNTER_PATH_ES = 'edition_counter.txt'
+COUNTER_PATH_EN = 'edition_counter_en.txt'
+
+def get_edition_count_es():
+ fd = os.open(COUNTER_PATH_ES, os.O_RDWR|os.O_CREAT)
+ lockf(fd, LOCK_EX)
+ fo = os.fdopen(fd, 'r+', encoding='utf-8')
+ content = fo.read()
+ if not content:
+ edition_count = 0
+ else:
+ edition_count = int(content.strip())
+ edition_count += 1
+ fo.seek(0)
+ fo.truncate()
+ fo.write(str(edition_count))
+ fo.flush()
+ lockf(fd, LOCK_UN)
+ os.close(fd)
+
+ return edition_count
+
+def get_edition_count_en():
+ fd = os.open(COUNTER_PATH_EN, os.O_RDWR|os.O_CREAT)
+ lockf(fd, LOCK_EX)
+ fo = os.fdopen(fd, 'r+', encoding='utf-8')
+ content = fo.read()
+ if not content:
+ edition_count = 0
+ else:
+ edition_count = int(content.strip())
+ edition_count += 1
+ fo.seek(0)
+ fo.truncate()
+ fo.write(str(edition_count))
+ fo.flush()
+ lockf(fd, LOCK_UN)
+ os.close(fd)
+
+ return edition_count
+
+def get_language():
+ if 'LANGUAGE' in session:
+ return session['LANGUAGE']
+ else:
+ return DEFAULT_LANGUAGE
+
+def set_language(language):
+ session['LANGUAGE'] = language
+ session.modified = True
+
+def index_es():
+ return render_template('index.html')
+
+def index_en():
+ return render_template('index_en.html')
+
+@app.route('{}/en'.format(BASEURL))
+def en():
+ set_language('en')
+ return index()
+
+@app.route('{}/es'.format(BASEURL))
+def es():
+ set_language('es')
+ return index()
+
@app.route('{}/'.format(BASEURL))
def index():
- return render_template('index.html')
+ if get_language() == 'es':
+ return index_es()
+ else:
+ return index_en()
-@app.route('{}/book'.format(BASEURL), methods=['POST'])
-def book():
- fragment = max(0, min(1, int(request.form['fragment'])))
+def book_es ():
+ edition_count = get_edition_count_es()
+
+ fragment = max(0, min(1, int(request.form['fragment'])))
first_word = 'un'
@@ -39,14 +125,15 @@ def book():
author = 'Benito Pérez Gáldos' # Non breaking spaces
title = 'Miau'
- path = crear_camino(novel, first_word)
+ path = crear_camino(novel, first_word, 'es')
context = {
'title': title,
'author': author,
'path': path,
'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR,
- 'DEBUG': DEBUG
+ 'DEBUG': DEBUG,
+ 'edition_count': edition_count,
}
html = render_template('book.html', **context)
@@ -59,7 +146,55 @@ def book():
r = Response(pdf, mimetype='application/pdf')
r.headers.extend({
- 'Content-Disposition': 'attachment; filename="Paseo por arboles de madrid.pdf"'
+ 'Content-Disposition': 'attachment; filename="Paseo por arboles de Madrid.pdf"'
})
return r
+
+def book_en ():
+ edition_count = get_edition_count_en()
+
+ fragment = max(0, min(1, int(request.form['fragment'])))
+
+ first_word = 'a'
+
+ if fragment == 0:
+ novel = os.path.join(basepath, '../data/emilia_english.txt')
+ author = 'Emilia Pardo Bazán' # Non breaking spaces
+ title = 'The Swan of Vila Morta' # Non breaking spaces
+ else:
+ novel = os.path.join(basepath, '../data/benito_english.txt')
+ author = 'Benito Pérez Gáldos' # Non breaking spaces
+ title = 'Marianela'
+
+ path = crear_camino(novel, first_word, 'en')
+
+ context = {
+ 'title': title,
+ 'author': author,
+ 'path': path,
+ 'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR,
+ 'DEBUG': DEBUG,
+ 'edition_count': edition_count,
+ }
+
+ html = render_template('book_en.html', **context)
+
+ if (DEBUG):
+ return html
+ else:
+ pdf = make_pdf(html)
+
+ r = Response(pdf, mimetype='application/pdf')
+
+ r.headers.extend({
+ 'Content-Disposition': 'attachment; filename="Walk along the trees of Madrid.pdf"'
+ })
+
+ return r
+@app.route('{}/book'.format(BASEURL), methods=['POST'])
+def book():
+ if get_language() == 'es':
+ return book_es()
+ else:
+ return book_en()
diff --git a/scripts/medialab.py b/scripts/medialab.py
index 3008523..0cf7940 100644
--- a/scripts/medialab.py
+++ b/scripts/medialab.py
@@ -16,6 +16,7 @@ import random
# Cargar tokenizer en español
nlp = spacy.load("es_core_news_sm")
+nlp_en = spacy.load("en_core_web_sm")
#FUNCIONES_________________________________________________________________
@@ -26,7 +27,7 @@ def limpiar_texto(fragmento):
fragmento_limpio = ' '.join(fragmento_limpio)
return fragmento_limpio
-def crear_base_datos(nombre_texto):
+def crear_base_datos(nombre_texto, lenguaje='es'):
# Abrir el archivo de texto para crear la base de datos
archivo = open(nombre_texto, 'r')
fragmento = archivo.read()
@@ -34,8 +35,12 @@ def crear_base_datos(nombre_texto):
fragmento_limpio = limpiar_texto(fragmento)
# Tokenización del fragmento de texto
- doc = nlp(fragmento_limpio)
- doc_len = len(doc)
+ if lenguaje == 'es':
+ doc = nlp(fragmento_limpio)
+ doc_len = len(doc)
+ else:
+ doc = nlp_en(fragmento_limpio)
+ doc_len = len(doc)
palabras_arboles = {} #Verbos, sustantivos, adverbios y adjetivos
palabras_camino = {} #El resto de palabras
diff --git a/scripts/paseo.py b/scripts/paseo.py
index e247c76..c164bb9 100644
--- a/scripts/paseo.py
+++ b/scripts/paseo.py
@@ -77,12 +77,12 @@ def path(word, words_tree, words_path, trees):
return itinerary
# Genera un camino a partir de un texto y una palabra del texto
-def crear_camino(nombre_archivo, palabra_inicial):
+def crear_camino(nombre_archivo, palabra_inicial, lenguaje='es'):
trees = load_trees_from_json()
shuffle(trees)
#print("Starting to read text")
- (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
+ (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo, lenguaje)
#print("Amount of tree words: ", len(palabras_arboles))
diff --git a/scripts/templates/book.html b/scripts/templates/book.html
index 7fdd308..41e6cb1 100644
--- a/scripts/templates/book.html
+++ b/scripts/templates/book.html
@@ -769,7 +769,7 @@
Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.
La copia de este libro es única y el tiraje es por definición infinito.
- Esta copia es el número XXX de copias descargadas.
+ Esta copia es el número {{ edition_count }} de copias descargadas.
Condiciones colectivas de (re)uso (CC4r), 2021
Copyleft con una diferencia: Se le
invita a copiar, distribuir y modificar esta obra bajo los términos de la CC4r.
diff --git a/scripts/templates/book_en.html b/scripts/templates/book_en.html
index 4c11a3c..906832a 100644
--- a/scripts/templates/book_en.html
+++ b/scripts/templates/book_en.html
@@ -570,7 +570,7 @@
the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.
The copy of this book is unique and the print run is by definition infinite.
- This copy is the XXX number of copies downloaded.
+ This copy is the {{ edition_count }} number of copies downloaded.
Collective terms of (re)use (CC4r), 2021
Copyleft with a difference: You are invited to copy, distribute, and modify this work under the terms of the work under the terms of the CC4r.