commit dbe02f7ead90764e0d13f4753481f248d3136025 Author: Gijs Date: Thu Jun 3 16:33:52 2021 +0200 First test to run pagedjs grom python. diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..977382d --- /dev/null +++ b/Readme.md @@ -0,0 +1,6 @@ +# Grafting a tree + +# Installation + +The prototype uses pagedjs-cli to convert the HTML into PDF. In a terminal `cd` into the cloned folder and run: +`npm install pagedjs-cli` \ No newline at end of file diff --git a/delayedLoading.js b/delayedLoading.js new file mode 100644 index 0000000..e218b0e --- /dev/null +++ b/delayedLoading.js @@ -0,0 +1,18 @@ +class delayedLoading extends Paged.Handler { + // this let us call the methods from the the chunker, the polisher and the caller for the rest of the script + constructor(chunker, polisher, caller) { + super(chunker, polisher, caller); + } + + beforeParsed (_) { + document.body.appendChild(document.createTextNode('this text was inserted before the slow resolve')); + return new Promise(function (resolve) { + setTimeout(function () { + document.body.appendChild(document.createTextNode('this text was inserted before pagedjs is loaded')); + resolve(); + }, 2000); + }); + } +} + +Paged.registerHandlers(delayedLoading); \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..a20eae2 --- /dev/null +++ b/index.html @@ -0,0 +1,73 @@ + + + + + + Hello world + + + +

+ Hello world. +

This document started as a website.

This is a second paragraph.

+ + + \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..adadee5 --- /dev/null +++ b/test.py @@ -0,0 +1,82 @@ +import subprocess +import tempfile +import os.path + +basepath = os.path.abspath(os.path.dirname(__file__)) +paged_bin = 'node_modules/pagedjs-cli/bin/paged' + +def run_pagedjs (path_html, path_pdf, cwd=None, extra_scripts=[]): + args = [ + paged_bin + ] + + for script in extra_scripts: + args.extend([ + '--additional-script', + script + ]) + + args.extend([ + '-o', path_pdf, + path_html + ]) + + try: + return subprocess.check_output(args, cwd=cwd, stderr=subprocess.STDOUT).decode() + except subprocess.CalledProcessError as e: + return 'Error:\n{}'.format(e.output.decode()) + +""" + Generate a PDF based on provided HTML using pagedjs and returns the contents of + the generated PDF. + + If optional path_out is provided the PDF is written there and the function returns the path. + + Optional extra_scripts is a list of strings with javascript. + Scripts are sent in the same order to paged.js +""" +def make_pdf (html, path_out=None, extra_scripts=[]): + with tempfile.TemporaryDirectory(prefix='algoliterary_publishing_house_') as tempdir: + with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.html', delete=False) as temphtml: + # Store html in a temporary file + temphtml.write(html) + temphtml.close() + + name_in = temphtml.name + + extra_scripts_tmp = [] + + for script in extra_scripts: + with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.js', delete=False) as tempjs: + tempjs.write(script) + tempjs.close() + extra_scripts_tmp.append(tempjs.name) + + # Make a temporary file for the generated PDF + with tempfile.NamedTemporaryFile(dir=tempdir, mode='w', suffix='.pdf', delete=False) as temppdf: + temppdf.close() + name_out = temppdf.name + + # Make the pdf + run_pagedjs(name_in, name_out, cwd=basepath, extra_scripts=extra_scripts_tmp) + + if path_out: + import shutil + shutil.copy(name_out, path_out) + return path_out + else: + with open(name_out) as generated_pdf: + return generated_pdf.read() + +if __name__ == '__main__': + + + with open(os.path.join(basepath, 'index.html'), 'r') as file_input: + html = file_input.read() + + with open(os.path.join(basepath, 'delayedLoading.js'), 'r') as js_input: + js = js_input.read() + + make_pdf(html, os.path.join(basepath, 'generated.pdf'), [ js ]) + + \ No newline at end of file