From 4f4d2802f0695454083df2b9068163ab82410b83 Mon Sep 17 00:00:00 2001
From: Gijs <gijs@de-heij.com>
Date: Sat, 5 Jun 2021 15:05:40 +0200
Subject: [PATCH] First layout

---
 scripts/app.py               |  49 +++++++++-
 scripts/medialab.py          |   1 +
 scripts/pagedjs.py           |  82 +++++++++++++++++
 scripts/paseo.py             |  98 ++++++++++----------
 scripts/templates/book.html  | 173 +++++++++++++++++++++++++++++++++++
 scripts/templates/index.html |   8 +-
 6 files changed, 362 insertions(+), 49 deletions(-)
 create mode 100644 scripts/pagedjs.py
 create mode 100644 scripts/templates/book.html

diff --git a/scripts/app.py b/scripts/app.py
index f422af6..203bc23 100644
--- a/scripts/app.py
+++ b/scripts/app.py
@@ -1,10 +1,55 @@
 #!/usr/bin/env/ python
 
-from flask import Flask, render_template
+from flask import Flask, render_template, request, Response
+from weasyprint import HTML
+from pagedjs import make_pdf
+
+try:
+  from paseo import crear_camino
+except ModuleNotFoundError:
+  pass
+
+import os.path
+
+basepath = os.path.dirname(__file__)
 
 BASEURL = ''
 app = Flask(__name__)
 
 @app.route('{}/'.format(BASEURL))
 def index():
-  return 'Hello world'
+  return render_template('index.html')
+
+@app.route('{}/book'.format(BASEURL), methods=['POST'])
+def book():
+  fragment = max(0, min(1, int(request.form['fragment'])))
+
+  first_word = 'un'
+
+  if fragment == 0:
+    novel = os.path.join(basepath, '../data/emilia_prueba.txt')
+    author = 'Emilia Pardo Bazán'
+    title = 'La Madre Naturaleza'
+  else:
+    novel = os.path.join(basepath, '../data/prueba.txt')
+    author = 'Benito Pérez Gáldos'
+    title = 'Miau'
+
+  path = crear_camino(novel, first_word)
+
+  context = { 'title': title, 'author': author, 'path': path }
+  html = render_template('book.html', **context)
+
+  # pdf = HTML(string=html).write_pdf()
+
+  # Use pagedjs as weasyprint does not seem to support our layout.
+  pdf = make_pdf(html)
+
+  r = Response(pdf, mimetype='application/pdf')
+
+  r.headers.extend({
+    'Content-Disposition': 'attachment; filename="Paseo por arboles de madrid.pdf"'
+  })
+
+  return r
+  # return html
diff --git a/scripts/medialab.py b/scripts/medialab.py
index df3a1da..3008523 100644
--- a/scripts/medialab.py
+++ b/scripts/medialab.py
@@ -13,6 +13,7 @@ Created on Fri May 28 17:00:51 2021
 import spacy
 import random
 
+
 # Cargar tokenizer en español
 nlp = spacy.load("es_core_news_sm")
 
diff --git a/scripts/pagedjs.py b/scripts/pagedjs.py
new file mode 100644
index 0000000..6fb6774
--- /dev/null
+++ b/scripts/pagedjs.py
@@ -0,0 +1,82 @@
+import subprocess
+import tempfile
+import os.path
+
+basepath = os.path.abspath(os.path.dirname(__file__))
+paged_bin = 'node_modules/pagedjs-cli/bin/paged'
+
+def run_pagedjs (path_html, path_pdf, cwd=None, extra_scripts=[]):
+  args = [
+    paged_bin
+  ]
+
+  for script in extra_scripts:
+    args.extend([
+      '--additional-script',
+      script
+    ])
+
+  args.extend([
+    '-o', path_pdf,
+    path_html
+  ])
+
+  try:
+    return subprocess.check_output(args, cwd=cwd, stderr=subprocess.STDOUT).decode()
+  except subprocess.CalledProcessError as e:
+    return 'Error:\n{}'.format(e.output.decode())
+
+"""
+  Generate a PDF based on provided HTML using pagedjs and returns the contents of
+  the generated PDF.
+
+  If optional path_out is provided the PDF is written there and the function returns the path.
+  
+  Optional extra_scripts is a list of strings with javascript.
+  Scripts are sent in the same order to paged.js
+"""
+def make_pdf (html, path_out=None, extra_scripts=[]):
+  with tempfile.TemporaryDirectory(prefix='algoliterary_publishing_house_') as tempdir:
+    with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.html', delete=False) as temphtml:
+      # Store html in a temporary file  
+      temphtml.write(html)
+      temphtml.close()
+
+      name_in = temphtml.name
+
+      extra_scripts_tmp = []
+
+      for script in extra_scripts:
+        with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.js', delete=False) as tempjs:
+          tempjs.write(script)
+          tempjs.close()
+          extra_scripts_tmp.append(tempjs.name)
+
+      # Make a temporary file for the generated PDF
+      with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.pdf', delete=False) as temppdf:
+        temppdf.close()
+        name_out = temppdf.name
+
+        # Make the pdf
+        run_pagedjs(name_in, name_out, cwd=basepath, extra_scripts=extra_scripts_tmp)
+
+        if path_out:
+          import shutil
+          shutil.copy(name_out, path_out)
+          return path_out
+        else:
+          with open(name_out, 'rb') as generated_pdf:
+            return generated_pdf.read()
+
+if __name__ == '__main__':
+
+
+  with open(os.path.join(basepath, 'index.html'), 'r') as file_input:
+    html = file_input.read()
+
+    with open(os.path.join(basepath, 'delayedLoading.js'), 'r') as js_input:
+      js = js_input.read()
+    
+      make_pdf(html, os.path.join(basepath, 'generated.pdf'), [ js ])
+    
+      
\ No newline at end of file
diff --git a/scripts/paseo.py b/scripts/paseo.py
index 7acdf85..e247c76 100644
--- a/scripts/paseo.py
+++ b/scripts/paseo.py
@@ -8,7 +8,8 @@ def path(word, words_tree, words_path, trees):
 	tree_index = {}
 	itinerary = []
 	current_step = word.capitalize() + ' '
-	markov_decision_traces = [ ( word, -1, []) ]
+	previous_steps = ''
+	markov_decision_traces = [ ( word, 0, [word]) ]
 
 	posibilities, dice, next_word = paso(word, words_tree, words_path)
 	
@@ -44,7 +45,7 @@ def path(word, words_tree, words_path, trees):
 			# is interpunction, add it to the current step
 			# but first remove trailing space
 			if next_word in '.,:;!?\)':
-				current_step = current_step[:-1] + next_word
+				current_step = current_step[:-1] + next_word + ' '
 				# Request a new next word to continue generation  
 				markov_decision_traces.append(( next_word, dice, posibilities ))
 
@@ -55,13 +56,17 @@ def path(word, words_tree, words_path, trees):
 					word = next_word
 					posibilities, dice, next_word = paso(word, words_tree, words_path)
 
+
 			# Add the current step, and the tree to the itinerary
 			itinerary.append((
 				current_step,
+				previous_steps,
 				tree,
 				markov_decision_traces
 			))
 
+			previous_steps += current_step
+
 			# Clear the current step
 			current_step = ''
 			markov_decision_traces = []
@@ -83,48 +88,49 @@ def crear_camino(nombre_archivo, palabra_inicial):
 
 	return path(palabra_inicial, palabras_arboles, palabras_camino, trees)
 
-import os.path
-
-basepath = os.path.dirname(__file__)
-
-#EJECUCIÓN__________________________________________________________________
-print('Puedes elegir una novela para crear tu Paseo por árboles de Madrid.')
-print('Opción 1: La novela "La madre naturaleza" de la escritora feminista Emilia Pardo Bazán \
-fue publicada en 1887. Usa en esta obra una prosa poética y descriptiva, y en sus páginas se \
-siente el amor que profesa al paisaje gallego, con un conocimiento de la botánica y de \
-las costumbres rurales muy superior al de sus contemporáneos.')
-print('Opción 2: La novela "Miau" del escritor Benito Pérez Galdós fue publicada en 1888. \
-Enmarcada en el género realista, satiriza el Madrid burocrático de finales del siglo XIX \
-a partir de las vicisitudes vitales de su protagonista, Ramón Villaamil, \
-un competente exempleado del Ministerio de Hacienda, al que una serie de intrigas \
-han dejado cesante.')
-novel = input('Por favor, marca 1 o 2: ')
-
-first_word = 'un'
-if novel == '1':
-	novel = os.path.join(basepath, '../data/emilia_prueba.txt')
-	author = 'Emilia Pardo Bazán'
-	title = 'La Madre Naturaleza'
-else:
-	novel = os.path.join(basepath, '../data/prueba.txt')
-	author = 'Benito Pérez Gáldos'
-	title = 'Miau'
-
-# Create title/subtitle
-print('\nPaseo por los árboles de Madrid con', author, 'y', title, '\n')
-print('-------------------------------------------------------------------------------------------\n')
-
-# Create chapters
-path = crear_camino(novel, first_word)
-
-sentences = []
-for sentence, tree, traces in path:
-	for word, dice, options in traces:
-		print('Dice rolled - {} -'.format(dice))
-		print('New word - {} - chosen from {}'.format(word, options))
-		print('')
-	sentences.append(sentence)
-	print('Itinerary:\n{} \n'.format(''.join(sentences)))
-	print('Tree linked to last word :', tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'], '\n')
-	print('\n')
+if __name__ == '__main__':
+	import os.path
+
+	basepath = os.path.dirname(__file__)
+
+	#EJECUCIÓN__________________________________________________________________
+	print('Puedes elegir una novela para crear tu Paseo por árboles de Madrid.')
+	print('Opción 1: La novela "La madre naturaleza" de la escritora feminista Emilia Pardo Bazán \
+	fue publicada en 1887. Usa en esta obra una prosa poética y descriptiva, y en sus páginas se \
+	siente el amor que profesa al paisaje gallego, con un conocimiento de la botánica y de \
+	las costumbres rurales muy superior al de sus contemporáneos.')
+	print('Opción 2: La novela "Miau" del escritor Benito Pérez Galdós fue publicada en 1888. \
+	Enmarcada en el género realista, satiriza el Madrid burocrático de finales del siglo XIX \
+	a partir de las vicisitudes vitales de su protagonista, Ramón Villaamil, \
+	un competente exempleado del Ministerio de Hacienda, al que una serie de intrigas \
+	han dejado cesante.')
+	novel = input('Por favor, marca 1 o 2: ')
+
+	first_word = 'un'
+	if novel == '1':
+		novel = os.path.join(basepath, '../data/emilia_prueba.txt')
+		author = 'Emilia Pardo Bazán'
+		title = 'La Madre Naturaleza'
+	else:
+		novel = os.path.join(basepath, '../data/prueba.txt')
+		author = 'Benito Pérez Gáldos'
+		title = 'Miau'
+
+	# Create title/subtitle
+	print('\nPaseo por los árboles de Madrid con', author, 'y', title, '\n')
+	print('-------------------------------------------------------------------------------------------\n')
+
+	# Create chapters
+	path = crear_camino(novel, first_word)
+
+	sentences = []
+	for sentence, concatenated_steps, tree, traces in path:
+		for word, dice, options in traces:
+			print('Dice rolled - {} -'.format(dice))
+			print('New word - {} - chosen from {}'.format(word, options))
+			print('')
+		sentences.append(sentence)
+		print('Itinerary:\n{} \n'.format(''.join(sentences)))
+		print('Tree linked to last word :', tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'], '\n')
+		print('\n')
 
diff --git a/scripts/templates/book.html b/scripts/templates/book.html
new file mode 100644
index 0000000..f8a5351
--- /dev/null
+++ b/scripts/templates/book.html
@@ -0,0 +1,173 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Document</title>
+  <style>
+    
+    @page {
+      size: 210mm 297mm;
+      margin: 10mm 10mm 15mm 10mm;
+    }
+
+    [data-picked] {
+      text-decoration: underline;
+      position: relative;
+    }
+
+    [data-picked]::after {
+      content: ' → ';
+      text-decoration: none;
+      position: absolute;
+      left: calc(100% + 1.5em);
+      top: 0;
+      height: 1.2em;
+      display: block;
+    }
+    
+    .traces > :last-child [data-picked]::after {
+      display: none;
+    }
+
+    @page {
+      size: a4;
+    }
+
+    @page title {
+      background: black;
+    }
+
+    @page:left {
+      @bottom-left {
+        text-align: left;
+        content: counter(page);
+      }
+    }
+
+    @page:right {
+      @bottom-right {
+        text-align: right;
+        content: counter(page);
+      }
+    }
+
+    @page:empty {
+      @bottom-right {
+        content: '';
+      }
+
+      @bottom-left {
+        content: '';
+      }
+    }
+
+    @page title {
+      @bottom-right {
+        content: '';
+      }
+
+      @bottom-left {
+        content: '';
+      }
+    }
+
+    ul {
+      margin: 0;
+      padding: 0;
+    }
+
+    h1 {
+      page: title;
+      color: white;
+      page-break-after: right;
+    }
+
+    section.step {
+      page-break-before: always;
+      text-align: center;
+      display: flex;
+      flex-direction: column;
+      height: 252mm;
+      align-items: center;
+      justify-content: space-between;
+      overflow: hidden;
+      position: relative;
+      margin: 0;
+      padding: 0;
+    }
+
+    .traces {
+      list-style-type: none;
+      display: flex;
+      flex-direction: row;
+      position: absolute;
+	    top: 50%;
+      line-height: 1.2em;
+      margin: 0;
+      padding: 0;
+      vertical-align: text-bottom;
+    }
+
+    .options {
+      list-style-type: none;
+      margin: 0 4em 0 0;
+    }
+
+    .options li {
+      line-height: 1.2em;
+      height: 1.2em;
+      margin: 0;
+      padding: 0;
+    }
+
+    .sentence {
+      z-index: 1;
+      position: relative;
+      background: linear-gradient(to top, rgba(255,255,255,0), white 2.5em);
+      padding: 0.5em 20mm 3em 20mm;
+      width: 100%;
+      box-sizing: border-box;
+    }
+  
+
+    .tree {
+      z-index: 1;
+      position: relative;
+      background: linear-gradient(to bottom, rgba(255,255,255,0), white 2.5em);
+      padding: 3em 20mm 0.5em 20mm;
+      width: 100%;
+      box-sizing: border-box;
+    }
+  </style>
+</head>
+<body>
+  <h1>Paseo por los árboles de Madrid con {{ author }} y {{ title }}</h1>
+
+  {% for sentence, previous_steps, tree, traces in path %}
+    <section class="step">
+      <section class="sentence">
+        {{ previous_steps }}
+      </section>
+      <ul class="traces">
+        {% for word, dice, options in traces %}
+        <li style="margin-top: calc({{ dice }} * -1.2em)">
+          <ul class="options">
+            {% for option in options %}
+            <li {% if loop.index0 == dice %}data-picked{% endif %}>
+              {{ option }}
+            </li>
+            {% endfor %}
+          </ul>
+          <!-- Rolled: {{ dice }} -->
+        </li>
+        {% endfor %}
+      </ul>
+      <section class="tree">
+        {{ tree.properties.NOMBRE_COMUN }} en {{ tree.properties.MINTDIRECCIONAUX }}
+      </section>
+    </section>
+  {% endfor %}
+</body>
+</html>
\ No newline at end of file
diff --git a/scripts/templates/index.html b/scripts/templates/index.html
index 6116828..ddac7e2 100644
--- a/scripts/templates/index.html
+++ b/scripts/templates/index.html
@@ -1 +1,7 @@
-<h1>Hello world</h1>
\ No newline at end of file
+<h1>Hello world</h1>
+
+<form method="POST" action="/book">
+  <label><input type="radio" name="fragment" value="0" checked> Fragment 1</label><br />
+  <label><input type="radio" name="fragment" value="1"> Fragment 2</label><br />
+  <button type="submit">Generate</button>
+</form>
\ No newline at end of file