Levenshtein-Distance-lee-a-.../app.py

#!/usr/bin/env/ python

#    This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar'
#    and generates the pdf using weasyprint.

#    Copyright (C) 2021, Anais Berck
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details: <http://www.gnu.org/licenses/>.

import textwrap
from io import StringIO
from os.path import dirname
import re
import datetime

import os
from fcntl import lockf, LOCK_EX, LOCK_UN

from flask import Flask, Response, render_template, request, send_file
from weasyprint import HTML

from la_distancia_de_levenshtein_lee_a_cortazar import (
    calculate_distances, find_nearest_species, generate_in_between_species,
    generate_new_fragment, openfile, print_map, print_table, sort_distances)

BASEURL = '/levenshtein'
COUNTER_PATH = 'edition_counter.txt'

def strip_but_spaces_and_words (text  ):
	return re.sub(r'[^\w\s\d]', '', text)

def wrap (text, width):
  return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])

def read_sources (*paths):
  return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ]

def get_edition_count():
  fd = os.open(COUNTER_PATH, os.O_RDWR|os.O_CREAT)
  lockf(fd, LOCK_EX)
  fo = os.fdopen(fd, 'r+', encoding='utf-8')
  content = fo.read()
  if not content:
    edition_count = 0
  else:
    edition_count = int(content.strip())
  edition_count += 1
  fo.seek(0)
  fo.truncate()
  fo.write(str(edition_count))
  fo.flush()
  lockf(fd, LOCK_UN)
  os.close(fd)

  return edition_count


app = Flask(__name__)
trees = []

## 1A. Open textfiles & turn textfiles in machine readable lists

# Cortazar
txt = 'eucalipto_cortazar.txt'

all_plural = openfile('arboles_plural.txt')
all_simple = openfile('arboles_simple.txt')

## 1B. Turn list of trees into dictionary of single/plural words
trees = dict(zip(all_simple, all_plural))

## 2. HOMEPAGE
## laod fragment Cortazar
with open(txt, 'r') as source:
  fragment = source.read()

# cover_distances = sort_distances(calculate_distances('eucalipto', all_simple))

fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower())
fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words))

@app.route('/')
def index():
  return render_template('index.html', trees=trees, BASEURL=BASEURL)

@app.route('{}/generate'.format(BASEURL), methods=['POST'])
def generate():
  edition_count = get_edition_count()
  new_main_tree = str(request.form['selected_tree'])

  if new_main_tree in all_simple:
    
    # Generate map for the cover
    # cover_map = StringIO()
    # print_map(cover_distances, show_distances=False, file_out=cover_map)

    fragment_cover_map = StringIO()
    print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map)

    ## 3. Count the similarity between the main tree and the other species in the forest
    similarities = calculate_distances(new_main_tree, all_simple)
    
    ## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts
    sorted_similarities = sort_distances(similarities)

    # Find the nearest species
    near_species, nearest_species = find_nearest_species(sorted_similarities, trees)

    new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)

    ## 5. Compare the similarity between the main character tree and the main species in the forest, 
    repetitive_poetry = StringIO()
    in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry)

    forest_map = StringIO()
    print_map(sorted_similarities, file_out=forest_map)

    table_of_intermediary_species = StringIO()
    print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species)
    
    now = datetime.datetime.now()

    context = {
      'edition_count': edition_count,
      # 'cover_map': cover_map.getvalue(),
      'date': now.strftime('%d-%m-%Y'),
      'time': now.strftime('%H:%M:%S'),
      'fragment_cover_map': fragment_cover_map.getvalue(),
      'forest_map': forest_map.getvalue(),
      'new_fragment': wrap(new_fragment, 85),
      'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55),
      'table_of_intermediary_species': table_of_intermediary_species.getvalue(),
      'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'),
      'BASEDIR': dirname(__file__)
    }

    raw_html = render_template('print.html', **context)
    pdf = HTML(string=raw_html).write_pdf()

    repetitive_poetry.close()
    forest_map.close()
    table_of_intermediary_species.close()

    # return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True)

    r = Response(pdf, mimetype='application/pdf')

    r.headers.extend({
      'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count)
    })

    return r

  return '500'
functional copy of the folder in Algolit repository -Algoliterary Publishing- where this project is still saved. 3 years ago			`#!/usr/bin/env/ python`

			`# This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar'`
			`# and generates the pdf using weasyprint.`

			`# Copyright (C) 2021, Anais Berck`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`

			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details: <http://www.gnu.org/licenses/>.`

			`import textwrap`
			`from io import StringIO`
			`from os.path import dirname`
			`import re`
			`import datetime`

			`import os`
			`from fcntl import lockf, LOCK_EX, LOCK_UN`

			`from flask import Flask, Response, render_template, request, send_file`
			`from weasyprint import HTML`

			`from la_distancia_de_levenshtein_lee_a_cortazar import (`
			`calculate_distances, find_nearest_species, generate_in_between_species,`
			`generate_new_fragment, openfile, print_map, print_table, sort_distances)`

			`BASEURL = '/levenshtein'`
			`COUNTER_PATH = 'edition_counter.txt'`

			`def strip_but_spaces_and_words (text ):`
			`return re.sub(r'[^\w\s\d]', '', text)`

			`def wrap (text, width):`
			`return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])`

			`def read_sources (*paths):`
			`return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ]`

			`def get_edition_count():`
			`fd = os.open(COUNTER_PATH, os.O_RDWR\|os.O_CREAT)`
			`lockf(fd, LOCK_EX)`
			`fo = os.fdopen(fd, 'r+', encoding='utf-8')`
			`content = fo.read()`
			`if not content:`
			`edition_count = 0`
			`else:`
			`edition_count = int(content.strip())`
			`edition_count += 1`
			`fo.seek(0)`
			`fo.truncate()`
			`fo.write(str(edition_count))`
			`fo.flush()`
			`lockf(fd, LOCK_UN)`
			`os.close(fd)`

			`return edition_count`


			`app = Flask(__name__)`
			`trees = []`

			`## 1A. Open textfiles & turn textfiles in machine readable lists`

			`# Cortazar`
			`txt = 'eucalipto_cortazar.txt'`

			`all_plural = openfile('arboles_plural.txt')`
			`all_simple = openfile('arboles_simple.txt')`

			`## 1B. Turn list of trees into dictionary of single/plural words`
			`trees = dict(zip(all_simple, all_plural))`

			`## 2. HOMEPAGE`
			`## laod fragment Cortazar`
			`with open(txt, 'r') as source:`
			`fragment = source.read()`

			`# cover_distances = sort_distances(calculate_distances('eucalipto', all_simple))`

			`fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower())`
			`fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words))`

			`@app.route('/')`
			`def index():`
			`return render_template('index.html', trees=trees, BASEURL=BASEURL)`

			`@app.route('{}/generate'.format(BASEURL), methods=['POST'])`
			`def generate():`
			`edition_count = get_edition_count()`
			`new_main_tree = str(request.form['selected_tree'])`

			`if new_main_tree in all_simple:`

			`# Generate map for the cover`
			`# cover_map = StringIO()`
			`# print_map(cover_distances, show_distances=False, file_out=cover_map)`

			`fragment_cover_map = StringIO()`
			`print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map)`

			`## 3. Count the similarity between the main tree and the other species in the forest`
			`similarities = calculate_distances(new_main_tree, all_simple)`

			`## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts`
			`sorted_similarities = sort_distances(similarities)`

			`# Find the nearest species`
			`near_species, nearest_species = find_nearest_species(sorted_similarities, trees)`

			`new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)`

			`## 5. Compare the similarity between the main character tree and the main species in the forest,`
			`repetitive_poetry = StringIO()`
			`in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry)`

			`forest_map = StringIO()`
			`print_map(sorted_similarities, file_out=forest_map)`

			`table_of_intermediary_species = StringIO()`
			`print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species)`

			`now = datetime.datetime.now()`

			`context = {`
			`'edition_count': edition_count,`
			`# 'cover_map': cover_map.getvalue(),`
			`'date': now.strftime('%d-%m-%Y'),`
			`'time': now.strftime('%H:%M:%S'),`
			`'fragment_cover_map': fragment_cover_map.getvalue(),`
			`'forest_map': forest_map.getvalue(),`
			`'new_fragment': wrap(new_fragment, 85),`
			`'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55),`
			`'table_of_intermediary_species': table_of_intermediary_species.getvalue(),`
			`'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'),`
			`'BASEDIR': dirname(__file__)`
			`}`

			`raw_html = render_template('print.html', **context)`
			`pdf = HTML(string=raw_html).write_pdf()`

			`repetitive_poetry.close()`
			`forest_map.close()`
			`table_of_intermediary_species.close()`

			`# return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True)`

			`r = Response(pdf, mimetype='application/pdf')`

			`r.headers.extend({`
			`'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count)`
			`})`

			`return r`

			`return '500'`