From 4f4d2802f0695454083df2b9068163ab82410b83 Mon Sep 17 00:00:00 2001 From: Gijs Date: Sat, 5 Jun 2021 15:05:40 +0200 Subject: [PATCH] First layout --- scripts/app.py | 49 +++++++++- scripts/medialab.py | 1 + scripts/pagedjs.py | 82 +++++++++++++++++ scripts/paseo.py | 98 ++++++++++---------- scripts/templates/book.html | 173 +++++++++++++++++++++++++++++++++++ scripts/templates/index.html | 8 +- 6 files changed, 362 insertions(+), 49 deletions(-) create mode 100644 scripts/pagedjs.py create mode 100644 scripts/templates/book.html diff --git a/scripts/app.py b/scripts/app.py index f422af6..203bc23 100644 --- a/scripts/app.py +++ b/scripts/app.py @@ -1,10 +1,55 @@ #!/usr/bin/env/ python -from flask import Flask, render_template +from flask import Flask, render_template, request, Response +from weasyprint import HTML +from pagedjs import make_pdf + +try: + from paseo import crear_camino +except ModuleNotFoundError: + pass + +import os.path + +basepath = os.path.dirname(__file__) BASEURL = '' app = Flask(__name__) @app.route('{}/'.format(BASEURL)) def index(): - return 'Hello world' + return render_template('index.html') + +@app.route('{}/book'.format(BASEURL), methods=['POST']) +def book(): + fragment = max(0, min(1, int(request.form['fragment']))) + + first_word = 'un' + + if fragment == 0: + novel = os.path.join(basepath, '../data/emilia_prueba.txt') + author = 'Emilia Pardo Bazán' + title = 'La Madre Naturaleza' + else: + novel = os.path.join(basepath, '../data/prueba.txt') + author = 'Benito Pérez Gáldos' + title = 'Miau' + + path = crear_camino(novel, first_word) + + context = { 'title': title, 'author': author, 'path': path } + html = render_template('book.html', **context) + + # pdf = HTML(string=html).write_pdf() + + # Use pagedjs as weasyprint does not seem to support our layout. + pdf = make_pdf(html) + + r = Response(pdf, mimetype='application/pdf') + + r.headers.extend({ + 'Content-Disposition': 'attachment; filename="Paseo por arboles de madrid.pdf"' + }) + + return r + # return html diff --git a/scripts/medialab.py b/scripts/medialab.py index df3a1da..3008523 100644 --- a/scripts/medialab.py +++ b/scripts/medialab.py @@ -13,6 +13,7 @@ Created on Fri May 28 17:00:51 2021 import spacy import random + # Cargar tokenizer en español nlp = spacy.load("es_core_news_sm") diff --git a/scripts/pagedjs.py b/scripts/pagedjs.py new file mode 100644 index 0000000..6fb6774 --- /dev/null +++ b/scripts/pagedjs.py @@ -0,0 +1,82 @@ +import subprocess +import tempfile +import os.path + +basepath = os.path.abspath(os.path.dirname(__file__)) +paged_bin = 'node_modules/pagedjs-cli/bin/paged' + +def run_pagedjs (path_html, path_pdf, cwd=None, extra_scripts=[]): + args = [ + paged_bin + ] + + for script in extra_scripts: + args.extend([ + '--additional-script', + script + ]) + + args.extend([ + '-o', path_pdf, + path_html + ]) + + try: + return subprocess.check_output(args, cwd=cwd, stderr=subprocess.STDOUT).decode() + except subprocess.CalledProcessError as e: + return 'Error:\n{}'.format(e.output.decode()) + +""" + Generate a PDF based on provided HTML using pagedjs and returns the contents of + the generated PDF. + + If optional path_out is provided the PDF is written there and the function returns the path. + + Optional extra_scripts is a list of strings with javascript. + Scripts are sent in the same order to paged.js +""" +def make_pdf (html, path_out=None, extra_scripts=[]): + with tempfile.TemporaryDirectory(prefix='algoliterary_publishing_house_') as tempdir: + with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.html', delete=False) as temphtml: + # Store html in a temporary file + temphtml.write(html) + temphtml.close() + + name_in = temphtml.name + + extra_scripts_tmp = [] + + for script in extra_scripts: + with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.js', delete=False) as tempjs: + tempjs.write(script) + tempjs.close() + extra_scripts_tmp.append(tempjs.name) + + # Make a temporary file for the generated PDF + with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.pdf', delete=False) as temppdf: + temppdf.close() + name_out = temppdf.name + + # Make the pdf + run_pagedjs(name_in, name_out, cwd=basepath, extra_scripts=extra_scripts_tmp) + + if path_out: + import shutil + shutil.copy(name_out, path_out) + return path_out + else: + with open(name_out, 'rb') as generated_pdf: + return generated_pdf.read() + +if __name__ == '__main__': + + + with open(os.path.join(basepath, 'index.html'), 'r') as file_input: + html = file_input.read() + + with open(os.path.join(basepath, 'delayedLoading.js'), 'r') as js_input: + js = js_input.read() + + make_pdf(html, os.path.join(basepath, 'generated.pdf'), [ js ]) + + \ No newline at end of file diff --git a/scripts/paseo.py b/scripts/paseo.py index 7acdf85..e247c76 100644 --- a/scripts/paseo.py +++ b/scripts/paseo.py @@ -8,7 +8,8 @@ def path(word, words_tree, words_path, trees): tree_index = {} itinerary = [] current_step = word.capitalize() + ' ' - markov_decision_traces = [ ( word, -1, []) ] + previous_steps = '' + markov_decision_traces = [ ( word, 0, [word]) ] posibilities, dice, next_word = paso(word, words_tree, words_path) @@ -44,7 +45,7 @@ def path(word, words_tree, words_path, trees): # is interpunction, add it to the current step # but first remove trailing space if next_word in '.,:;!?\)': - current_step = current_step[:-1] + next_word + current_step = current_step[:-1] + next_word + ' ' # Request a new next word to continue generation markov_decision_traces.append(( next_word, dice, posibilities )) @@ -55,13 +56,17 @@ def path(word, words_tree, words_path, trees): word = next_word posibilities, dice, next_word = paso(word, words_tree, words_path) + # Add the current step, and the tree to the itinerary itinerary.append(( current_step, + previous_steps, tree, markov_decision_traces )) + previous_steps += current_step + # Clear the current step current_step = '' markov_decision_traces = [] @@ -83,48 +88,49 @@ def crear_camino(nombre_archivo, palabra_inicial): return path(palabra_inicial, palabras_arboles, palabras_camino, trees) -import os.path - -basepath = os.path.dirname(__file__) - -#EJECUCIÓN__________________________________________________________________ -print('Puedes elegir una novela para crear tu Paseo por árboles de Madrid.') -print('Opción 1: La novela "La madre naturaleza" de la escritora feminista Emilia Pardo Bazán \ -fue publicada en 1887. Usa en esta obra una prosa poética y descriptiva, y en sus páginas se \ -siente el amor que profesa al paisaje gallego, con un conocimiento de la botánica y de \ -las costumbres rurales muy superior al de sus contemporáneos.') -print('Opción 2: La novela "Miau" del escritor Benito Pérez Galdós fue publicada en 1888. \ -Enmarcada en el género realista, satiriza el Madrid burocrático de finales del siglo XIX \ -a partir de las vicisitudes vitales de su protagonista, Ramón Villaamil, \ -un competente exempleado del Ministerio de Hacienda, al que una serie de intrigas \ -han dejado cesante.') -novel = input('Por favor, marca 1 o 2: ') - -first_word = 'un' -if novel == '1': - novel = os.path.join(basepath, '../data/emilia_prueba.txt') - author = 'Emilia Pardo Bazán' - title = 'La Madre Naturaleza' -else: - novel = os.path.join(basepath, '../data/prueba.txt') - author = 'Benito Pérez Gáldos' - title = 'Miau' - -# Create title/subtitle -print('\nPaseo por los árboles de Madrid con', author, 'y', title, '\n') -print('-------------------------------------------------------------------------------------------\n') - -# Create chapters -path = crear_camino(novel, first_word) - -sentences = [] -for sentence, tree, traces in path: - for word, dice, options in traces: - print('Dice rolled - {} -'.format(dice)) - print('New word - {} - chosen from {}'.format(word, options)) - print('') - sentences.append(sentence) - print('Itinerary:\n{} \n'.format(''.join(sentences))) - print('Tree linked to last word :', tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'], '\n') - print('\n') +if __name__ == '__main__': + import os.path + + basepath = os.path.dirname(__file__) + + #EJECUCIÓN__________________________________________________________________ + print('Puedes elegir una novela para crear tu Paseo por árboles de Madrid.') + print('Opción 1: La novela "La madre naturaleza" de la escritora feminista Emilia Pardo Bazán \ + fue publicada en 1887. Usa en esta obra una prosa poética y descriptiva, y en sus páginas se \ + siente el amor que profesa al paisaje gallego, con un conocimiento de la botánica y de \ + las costumbres rurales muy superior al de sus contemporáneos.') + print('Opción 2: La novela "Miau" del escritor Benito Pérez Galdós fue publicada en 1888. \ + Enmarcada en el género realista, satiriza el Madrid burocrático de finales del siglo XIX \ + a partir de las vicisitudes vitales de su protagonista, Ramón Villaamil, \ + un competente exempleado del Ministerio de Hacienda, al que una serie de intrigas \ + han dejado cesante.') + novel = input('Por favor, marca 1 o 2: ') + + first_word = 'un' + if novel == '1': + novel = os.path.join(basepath, '../data/emilia_prueba.txt') + author = 'Emilia Pardo Bazán' + title = 'La Madre Naturaleza' + else: + novel = os.path.join(basepath, '../data/prueba.txt') + author = 'Benito Pérez Gáldos' + title = 'Miau' + + # Create title/subtitle + print('\nPaseo por los árboles de Madrid con', author, 'y', title, '\n') + print('-------------------------------------------------------------------------------------------\n') + + # Create chapters + path = crear_camino(novel, first_word) + + sentences = [] + for sentence, concatenated_steps, tree, traces in path: + for word, dice, options in traces: + print('Dice rolled - {} -'.format(dice)) + print('New word - {} - chosen from {}'.format(word, options)) + print('') + sentences.append(sentence) + print('Itinerary:\n{} \n'.format(''.join(sentences))) + print('Tree linked to last word :', tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'], '\n') + print('\n') diff --git a/scripts/templates/book.html b/scripts/templates/book.html new file mode 100644 index 0000000..f8a5351 --- /dev/null +++ b/scripts/templates/book.html @@ -0,0 +1,173 @@ + + + + + + + Document + + + +

Paseo por los árboles de Madrid con {{ author }} y {{ title }}

+ + {% for sentence, previous_steps, tree, traces in path %} +
+
+ {{ previous_steps }} +
+ +
+ {{ tree.properties.NOMBRE_COMUN }} en {{ tree.properties.MINTDIRECCIONAUX }} +
+
+ {% endfor %} + + \ No newline at end of file diff --git a/scripts/templates/index.html b/scripts/templates/index.html index 6116828..ddac7e2 100644 --- a/scripts/templates/index.html +++ b/scripts/templates/index.html @@ -1 +1,7 @@ -

Hello world

\ No newline at end of file +

Hello world

+ +
+
+
+ +
\ No newline at end of file