functional copy of the folder in Algolit repository -Algoliterary Publishing- where this project is still saved.
parent
14e27bec96
commit
7d6da2e2bd
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<comment version="3.0">
|
||||||
|
<caption/>
|
||||||
|
<note>User comments</note>
|
||||||
|
<place/>
|
||||||
|
<categories/>
|
||||||
|
</comment>
|
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<comment version="3.0">
|
||||||
|
<caption/>
|
||||||
|
<note>charset="Ascii" User comments</note>
|
||||||
|
<place/>
|
||||||
|
<categories/>
|
||||||
|
</comment>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,161 @@
|
|||||||
|
#!/usr/bin/env/ python
|
||||||
|
|
||||||
|
# This is the webinterface for 'la_distancia_de_levenshtein_lee_a_cortazar'
|
||||||
|
# and generates the pdf using weasyprint.
|
||||||
|
|
||||||
|
# Copyright (C) 2021, Anais Berck
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details: <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
from io import StringIO
|
||||||
|
from os.path import dirname
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import os
|
||||||
|
from fcntl import lockf, LOCK_EX, LOCK_UN
|
||||||
|
|
||||||
|
from flask import Flask, Response, render_template, request, send_file
|
||||||
|
from weasyprint import HTML
|
||||||
|
|
||||||
|
from la_distancia_de_levenshtein_lee_a_cortazar import (
|
||||||
|
calculate_distances, find_nearest_species, generate_in_between_species,
|
||||||
|
generate_new_fragment, openfile, print_map, print_table, sort_distances)
|
||||||
|
|
||||||
|
BASEURL = '/levenshtein'
|
||||||
|
COUNTER_PATH = 'edition_counter.txt'
|
||||||
|
|
||||||
|
def strip_but_spaces_and_words (text ):
|
||||||
|
return re.sub(r'[^\w\s\d]', '', text)
|
||||||
|
|
||||||
|
def wrap (text, width):
|
||||||
|
return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])
|
||||||
|
|
||||||
|
def read_sources (*paths):
|
||||||
|
return [ (p, wrap(open(p, 'r').read(), 120)) for p in paths ]
|
||||||
|
|
||||||
|
def get_edition_count():
|
||||||
|
fd = os.open(COUNTER_PATH, os.O_RDWR|os.O_CREAT)
|
||||||
|
lockf(fd, LOCK_EX)
|
||||||
|
fo = os.fdopen(fd, 'r+', encoding='utf-8')
|
||||||
|
content = fo.read()
|
||||||
|
if not content:
|
||||||
|
edition_count = 0
|
||||||
|
else:
|
||||||
|
edition_count = int(content.strip())
|
||||||
|
edition_count += 1
|
||||||
|
fo.seek(0)
|
||||||
|
fo.truncate()
|
||||||
|
fo.write(str(edition_count))
|
||||||
|
fo.flush()
|
||||||
|
lockf(fd, LOCK_UN)
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
|
return edition_count
|
||||||
|
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
trees = []
|
||||||
|
|
||||||
|
## 1A. Open textfiles & turn textfiles in machine readable lists
|
||||||
|
|
||||||
|
# Cortazar
|
||||||
|
txt = 'eucalipto_cortazar.txt'
|
||||||
|
|
||||||
|
all_plural = openfile('arboles_plural.txt')
|
||||||
|
all_simple = openfile('arboles_simple.txt')
|
||||||
|
|
||||||
|
## 1B. Turn list of trees into dictionary of single/plural words
|
||||||
|
trees = dict(zip(all_simple, all_plural))
|
||||||
|
|
||||||
|
## 2. HOMEPAGE
|
||||||
|
## laod fragment Cortazar
|
||||||
|
with open(txt, 'r') as source:
|
||||||
|
fragment = source.read()
|
||||||
|
|
||||||
|
# cover_distances = sort_distances(calculate_distances('eucalipto', all_simple))
|
||||||
|
|
||||||
|
fragment_words = re.split(r'[\n\s]+', strip_but_spaces_and_words(fragment).lower())
|
||||||
|
fragment_word_distances = sort_distances(calculate_distances('eucalipto', fragment_words))
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
return render_template('index.html', trees=trees, BASEURL=BASEURL)
|
||||||
|
|
||||||
|
@app.route('{}/generate'.format(BASEURL), methods=['POST'])
|
||||||
|
def generate():
|
||||||
|
edition_count = get_edition_count()
|
||||||
|
new_main_tree = str(request.form['selected_tree'])
|
||||||
|
|
||||||
|
if new_main_tree in all_simple:
|
||||||
|
|
||||||
|
# Generate map for the cover
|
||||||
|
# cover_map = StringIO()
|
||||||
|
# print_map(cover_distances, show_distances=False, file_out=cover_map)
|
||||||
|
|
||||||
|
fragment_cover_map = StringIO()
|
||||||
|
print_map(fragment_word_distances, show_distances=False, file_out=fragment_cover_map)
|
||||||
|
|
||||||
|
## 3. Count the similarity between the main tree and the other species in the forest
|
||||||
|
similarities = calculate_distances(new_main_tree, all_simple)
|
||||||
|
|
||||||
|
## 4. Sort the similarities between the trees and the new main tree from the lowest to the highest counts
|
||||||
|
sorted_similarities = sort_distances(similarities)
|
||||||
|
|
||||||
|
# Find the nearest species
|
||||||
|
near_species, nearest_species = find_nearest_species(sorted_similarities, trees)
|
||||||
|
|
||||||
|
new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)
|
||||||
|
|
||||||
|
## 5. Compare the similarity between the main character tree and the main species in the forest,
|
||||||
|
repetitive_poetry = StringIO()
|
||||||
|
in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=repetitive_poetry)
|
||||||
|
|
||||||
|
forest_map = StringIO()
|
||||||
|
print_map(sorted_similarities, file_out=forest_map)
|
||||||
|
|
||||||
|
table_of_intermediary_species = StringIO()
|
||||||
|
print_table(new_main_tree, near_species, in_between_species, table_of_intermediary_species)
|
||||||
|
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
|
||||||
|
context = {
|
||||||
|
'edition_count': edition_count,
|
||||||
|
# 'cover_map': cover_map.getvalue(),
|
||||||
|
'date': now.strftime('%d-%m-%Y'),
|
||||||
|
'time': now.strftime('%H:%M:%S'),
|
||||||
|
'fragment_cover_map': fragment_cover_map.getvalue(),
|
||||||
|
'forest_map': forest_map.getvalue(),
|
||||||
|
'new_fragment': wrap(new_fragment, 85),
|
||||||
|
'repetitive_poetry': wrap(repetitive_poetry.getvalue(), 55),
|
||||||
|
'table_of_intermediary_species': table_of_intermediary_species.getvalue(),
|
||||||
|
'sources': read_sources('la_distancia_de_levenshtein_lee_a_cortazar.py'),
|
||||||
|
'BASEDIR': dirname(__file__)
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_html = render_template('print.html', **context)
|
||||||
|
pdf = HTML(string=raw_html).write_pdf()
|
||||||
|
|
||||||
|
repetitive_poetry.close()
|
||||||
|
forest_map.close()
|
||||||
|
table_of_intermediary_species.close()
|
||||||
|
|
||||||
|
# return send_file(pdf, attachment_filename='La distancia de Levenshtein {}.pdf'.format(edition_count), as_attachment=True)
|
||||||
|
|
||||||
|
r = Response(pdf, mimetype='application/pdf')
|
||||||
|
|
||||||
|
r.headers.extend({
|
||||||
|
'Content-Disposition': 'attachment; filename="La distancia de Levenshtein {}.pdf"'.format(edition_count)
|
||||||
|
})
|
||||||
|
|
||||||
|
return r
|
||||||
|
|
||||||
|
return '500'
|
@ -0,0 +1,90 @@
|
|||||||
|
abedules
|
||||||
|
abetos
|
||||||
|
acebos
|
||||||
|
acebuches
|
||||||
|
aguacates
|
||||||
|
ailantos
|
||||||
|
aladiernos
|
||||||
|
álamos
|
||||||
|
albaricoqueros
|
||||||
|
alcanforeros
|
||||||
|
alcornoques
|
||||||
|
algarrobos
|
||||||
|
alisos
|
||||||
|
almendros
|
||||||
|
almeces
|
||||||
|
arces
|
||||||
|
arraclánes
|
||||||
|
avellanos
|
||||||
|
baobabs
|
||||||
|
bojs
|
||||||
|
boneteros
|
||||||
|
brezos
|
||||||
|
carballos
|
||||||
|
carpes
|
||||||
|
castaños
|
||||||
|
cedros
|
||||||
|
cerezos
|
||||||
|
chopos
|
||||||
|
cinamomos
|
||||||
|
cipréses
|
||||||
|
ciruelos
|
||||||
|
cocoteros
|
||||||
|
cornejos
|
||||||
|
ébanos
|
||||||
|
enebros
|
||||||
|
espantaloboses
|
||||||
|
espinos
|
||||||
|
eucaliptos
|
||||||
|
evónimos
|
||||||
|
fresnos
|
||||||
|
galaperos
|
||||||
|
granados
|
||||||
|
guayacanes
|
||||||
|
guindos
|
||||||
|
labiérnagos
|
||||||
|
laureles
|
||||||
|
lentiscos
|
||||||
|
lichis
|
||||||
|
limoneros
|
||||||
|
loros
|
||||||
|
madroños
|
||||||
|
maguillos
|
||||||
|
majuelos
|
||||||
|
mangos
|
||||||
|
manzanos
|
||||||
|
melocotoneros
|
||||||
|
melojos
|
||||||
|
mostellares
|
||||||
|
naranjos
|
||||||
|
negundos
|
||||||
|
nogales
|
||||||
|
olivos
|
||||||
|
olmos
|
||||||
|
ombúes
|
||||||
|
palmitos
|
||||||
|
perales
|
||||||
|
pereteros
|
||||||
|
pimenteros
|
||||||
|
pinabetes
|
||||||
|
pinos
|
||||||
|
pinsapos
|
||||||
|
piruétanos
|
||||||
|
plátanos
|
||||||
|
quejigos
|
||||||
|
rebollos
|
||||||
|
robles
|
||||||
|
sanguinos
|
||||||
|
sargatillos
|
||||||
|
sauces
|
||||||
|
saúcos
|
||||||
|
serbales
|
||||||
|
sicomoros
|
||||||
|
tabacos
|
||||||
|
tamariscos
|
||||||
|
tarajes
|
||||||
|
tarayes
|
||||||
|
tejos
|
||||||
|
temblónes
|
||||||
|
terebintos
|
||||||
|
tilos
|
@ -0,0 +1,90 @@
|
|||||||
|
abedul
|
||||||
|
abeto
|
||||||
|
acebo
|
||||||
|
acebuche
|
||||||
|
aguacate
|
||||||
|
ailanto
|
||||||
|
aladierno
|
||||||
|
álamo
|
||||||
|
albaricoquero
|
||||||
|
alcanforero
|
||||||
|
alcornoque
|
||||||
|
algarrobo
|
||||||
|
aliso
|
||||||
|
almendro
|
||||||
|
almez
|
||||||
|
arce
|
||||||
|
arraclán
|
||||||
|
avellano
|
||||||
|
baobab
|
||||||
|
boj
|
||||||
|
bonetero
|
||||||
|
brezo
|
||||||
|
carballo
|
||||||
|
carpe
|
||||||
|
castaño
|
||||||
|
cedro
|
||||||
|
cerezo
|
||||||
|
chopo
|
||||||
|
cinamomo
|
||||||
|
ciprés
|
||||||
|
ciruelo
|
||||||
|
cocotero
|
||||||
|
cornejo
|
||||||
|
ébano
|
||||||
|
enebro
|
||||||
|
espantalobos
|
||||||
|
espino
|
||||||
|
eucalipto
|
||||||
|
evónimo
|
||||||
|
fresno
|
||||||
|
galapero
|
||||||
|
granado
|
||||||
|
guayacán
|
||||||
|
guindo
|
||||||
|
labiérnago
|
||||||
|
laurel
|
||||||
|
lentisco
|
||||||
|
lichi
|
||||||
|
limonero
|
||||||
|
loro
|
||||||
|
madroño
|
||||||
|
maguillo
|
||||||
|
majuelo
|
||||||
|
mango
|
||||||
|
manzano
|
||||||
|
melocotonero
|
||||||
|
melojo
|
||||||
|
mostellar
|
||||||
|
naranjo
|
||||||
|
negundo
|
||||||
|
nogal
|
||||||
|
olivo
|
||||||
|
olmo
|
||||||
|
ombú
|
||||||
|
palmito
|
||||||
|
peral
|
||||||
|
peretero
|
||||||
|
pimentero
|
||||||
|
pinabete
|
||||||
|
pino
|
||||||
|
pinsapo
|
||||||
|
piruétano
|
||||||
|
plátano
|
||||||
|
quejigo
|
||||||
|
rebollo
|
||||||
|
roble
|
||||||
|
sanguino
|
||||||
|
sargatillo
|
||||||
|
sauce
|
||||||
|
saúco
|
||||||
|
serbal
|
||||||
|
sicomoro
|
||||||
|
tabaco
|
||||||
|
tamarisco
|
||||||
|
taraje
|
||||||
|
taray
|
||||||
|
tejo
|
||||||
|
temblón
|
||||||
|
terebinto
|
||||||
|
tilo
|
@ -0,0 +1,23 @@
|
|||||||
|
acacia
|
||||||
|
bardaguera
|
||||||
|
caoba
|
||||||
|
carrasquilla
|
||||||
|
cornicabra
|
||||||
|
coscoja
|
||||||
|
encina
|
||||||
|
higuera
|
||||||
|
mimbre
|
||||||
|
mimbrera
|
||||||
|
mimosa
|
||||||
|
morera
|
||||||
|
palma
|
||||||
|
palmera
|
||||||
|
robinia
|
||||||
|
sabina
|
||||||
|
salciña
|
||||||
|
sarga
|
||||||
|
secuoya
|
||||||
|
sófora
|
||||||
|
teca
|
||||||
|
velintonia
|
||||||
|
haya (m)
|
@ -0,0 +1,69 @@
|
|||||||
|
import math
|
||||||
|
import random
|
||||||
|
|
||||||
|
words = [
|
||||||
|
(8, ' AAA '),
|
||||||
|
(6, ' BBB '),
|
||||||
|
(8, ' CCC '),
|
||||||
|
(8, ' DDD '),
|
||||||
|
(3, ' EEE '),
|
||||||
|
(8, ' FFF '),
|
||||||
|
(2, ' GGG '),
|
||||||
|
]
|
||||||
|
|
||||||
|
height = 30
|
||||||
|
width = 40
|
||||||
|
|
||||||
|
middle = (20, 15)
|
||||||
|
|
||||||
|
grid = [[] for x in range(height + 1)]
|
||||||
|
|
||||||
|
grid[middle[1]].append((middle[0] * 2, (middle[0] * 2 + 1), 'X'))
|
||||||
|
|
||||||
|
def space_available (grid, start, end, line):
|
||||||
|
other_words = grid[line]
|
||||||
|
|
||||||
|
for other_start, other_end, _ in other_words:
|
||||||
|
if start < other_end and end > other_start:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
for distance, word in words:
|
||||||
|
placed = False
|
||||||
|
|
||||||
|
angle = random.random() * math.pi * 2
|
||||||
|
step = 0
|
||||||
|
steps = 20
|
||||||
|
while (not placed) and step < steps:
|
||||||
|
|
||||||
|
x = int(round(math.cos(angle) * distance) + middle[0]) * 2
|
||||||
|
y = int(round(math.sin(angle) * distance) + middle[1])
|
||||||
|
|
||||||
|
start = x - max(0, int(math.floor(len(word) / 2)))
|
||||||
|
end = start + len(word)
|
||||||
|
|
||||||
|
if space_available(grid, start, end, y):
|
||||||
|
grid[y].append((start, end, word))
|
||||||
|
placed = True
|
||||||
|
|
||||||
|
angle += (math.pi * 2 / steps)
|
||||||
|
|
||||||
|
# print(angle, x, y)
|
||||||
|
|
||||||
|
print(grid)
|
||||||
|
|
||||||
|
for row in grid:
|
||||||
|
# Sort by first key of the tuples, start of the word
|
||||||
|
row = sorted(row, key=lambda r: r[0])
|
||||||
|
|
||||||
|
if not len(row):
|
||||||
|
print (width * ' ')
|
||||||
|
else:
|
||||||
|
line = ''
|
||||||
|
for start, _, word in row:
|
||||||
|
line += (start - len(line)) * ' ' + word
|
||||||
|
|
||||||
|
line += (width - len(line)) * ' '
|
||||||
|
|
||||||
|
print(line)
|
@ -0,0 +1,4 @@
|
|||||||
|
from weasyprint import HTML
|
||||||
|
|
||||||
|
HTML(filename='content.html')\
|
||||||
|
.write_pdf('weasyprint-library.pdf')
|
@ -0,0 +1,315 @@
|
|||||||
|
#!/usr/bin/env/ python
|
||||||
|
|
||||||
|
# This script unfolds the Levenhstein Distance algorithm,
|
||||||
|
# often used in spellcheckers and to detect similarity between texts.
|
||||||
|
# The algorithm reads a fragment on a eucalyptus tree from
|
||||||
|
# Julio Cortazar's 'Historias de Cronopios y Famas', Alfaguara, 2012.
|
||||||
|
|
||||||
|
# Copyright (C) 2021, Anais Berck
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details: <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# Python libraries
|
||||||
|
import random
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
import textwrap
|
||||||
|
import Levenshtein
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Functions
|
||||||
|
|
||||||
|
# Open & prepare textfile for machinal reading
|
||||||
|
def openfile(txt):
|
||||||
|
all_text = []
|
||||||
|
with open(txt, 'r') as source:
|
||||||
|
# Read the text
|
||||||
|
text = source.read()
|
||||||
|
# Remove punctuation
|
||||||
|
characters = ['\n', '-', ':', '.', ',']
|
||||||
|
for c in characters:
|
||||||
|
if c in text:
|
||||||
|
clean_text = text.replace(c,' ')
|
||||||
|
# Transform fragment in a list of words
|
||||||
|
words = clean_text.split()
|
||||||
|
# Recolt all unique words of the fragment in a set of words
|
||||||
|
for word in words:
|
||||||
|
word = word.lower()
|
||||||
|
word = word.strip()
|
||||||
|
all_text.append(word)
|
||||||
|
return all_text
|
||||||
|
|
||||||
|
# Levenhstein Distance
|
||||||
|
def levenshtein(a, b):
|
||||||
|
if not a: return len(b)
|
||||||
|
if not b: return len(a)
|
||||||
|
return min(levenshtein(a[1:], b[1:])+(a[0] != b[0]),
|
||||||
|
levenshtein(a[1:], b)+1,
|
||||||
|
levenshtein(a, b[1:])+1)
|
||||||
|
|
||||||
|
# Create map
|
||||||
|
def space_available (grid, start, end, line):
|
||||||
|
other_words = grid[line]
|
||||||
|
|
||||||
|
for other_start, other_end, _ in other_words:
|
||||||
|
if start < other_end and end > other_start:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Create frame
|
||||||
|
def formatTable(words, cellWidth, cellHeight, padding=1, cells=6):
|
||||||
|
def makeRow (height):
|
||||||
|
return [''] * cellHeight
|
||||||
|
|
||||||
|
def makeLine (width, text = '', fillChar=' ', padding=1):
|
||||||
|
text = (padding * fillChar) + text
|
||||||
|
return text.center(width - padding, ' ') + (padding * fillChar)
|
||||||
|
|
||||||
|
out = ""
|
||||||
|
|
||||||
|
row = makeRow(cellHeight)
|
||||||
|
|
||||||
|
cell = 0
|
||||||
|
|
||||||
|
for word in words:
|
||||||
|
wrapped = textwrap.wrap(word, width=cellWidth - (2 * padding))
|
||||||
|
lines = padding * [makeLine(cellWidth, padding=padding)]
|
||||||
|
|
||||||
|
for line in wrapped:
|
||||||
|
lines.append(makeLine(cellWidth, text=line))
|
||||||
|
|
||||||
|
for _ in range(cellHeight - len(lines)):
|
||||||
|
lines.append(makeLine(cellWidth, padding=padding))
|
||||||
|
|
||||||
|
cell += 1
|
||||||
|
|
||||||
|
for (i, line) in enumerate(lines):
|
||||||
|
row[i] += line
|
||||||
|
|
||||||
|
if cell == cells:
|
||||||
|
out += '\n'.join(row)
|
||||||
|
row = makeRow(cellHeight)
|
||||||
|
cell = 0
|
||||||
|
|
||||||
|
if cell > 0:
|
||||||
|
out += '\n'.join(row)
|
||||||
|
row = makeRow(cellHeight)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
# print statements for interaction with reader
|
||||||
|
def pick_new_main_tree_terminal(fragment, all_simple, file_out=sys.stdout):
|
||||||
|
print(fragment, file=file_out)
|
||||||
|
## ask input reader
|
||||||
|
print("\nPara que La Distancia de Levenshtein pueda leer el texto y producir un libro único, necesitas cambiar una palabra.", file=file_out)
|
||||||
|
print("\nPuedes elegir otra especie de árbol para el eucalipto, el árbol principal del fragmento. La Distancia de Levenshtein calculará entonces qué especie se encuentra en su cercanía y se reemplazará en el fragmento la palabra más genérica 'árboles' por esta nueva especie.", file=file_out)
|
||||||
|
print("\nPor favor, elige el árbol principal de la lista siguiente:", file=file_out)
|
||||||
|
print(', '.join(all_simple), file=file_out)
|
||||||
|
print("\nQuiero reemplazar el eucalipto por:", file=file_out)
|
||||||
|
new_main_tree = input()
|
||||||
|
while new_main_tree not in all_simple:
|
||||||
|
print("Tal vez escribiste mal la especie, por favor intente de nuevo.", file=file_out)
|
||||||
|
new_main_tree = input()
|
||||||
|
return new_main_tree
|
||||||
|
|
||||||
|
# Levenshtein Distance between new main tree and other tree species
|
||||||
|
def calculate_distances (center_tree, other_trees):
|
||||||
|
# Declare dictionary object
|
||||||
|
distances = {}
|
||||||
|
# For every species in the list, compare the main tree to the selected species and count the similarity between both
|
||||||
|
for tree in other_trees:
|
||||||
|
# using the Levenhstein Distance algorithm
|
||||||
|
# count = levenshtein(new_main_tree,tree)
|
||||||
|
count = Levenshtein.distance(center_tree, tree)
|
||||||
|
# save each compared species and its similarity count the dictionary
|
||||||
|
distances[tree] = count
|
||||||
|
|
||||||
|
return distances
|
||||||
|
|
||||||
|
|
||||||
|
def sort_distances (distances):
|
||||||
|
return sorted(distances.items(), key = lambda kv: kv[1])
|
||||||
|
|
||||||
|
|
||||||
|
# Find the minimum distance between new main tree and near species
|
||||||
|
def find_nearest_species(sorted_distances, trees):
|
||||||
|
# First entry in sorted_distances is the new_main_tree, distance 0
|
||||||
|
# pick the next value
|
||||||
|
minimum_distance = sorted_distances[1][1]
|
||||||
|
possible_trees = []
|
||||||
|
|
||||||
|
for tree in sorted_distances[1:]:
|
||||||
|
if tree[1] == minimum_distance:
|
||||||
|
possible_trees.append(tree)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
near_species = random.choice(possible_trees)[0]
|
||||||
|
nearest_species = trees[near_species]
|
||||||
|
|
||||||
|
return (near_species, nearest_species)
|
||||||
|
|
||||||
|
# rewrite fragment Cortazar
|
||||||
|
def generate_new_fragment(fragment, new_main_tree, nearest_species):
|
||||||
|
new_fragment = fragment.replace("eucalipto", new_main_tree)
|
||||||
|
new_fragment = new_fragment.replace("árboles", nearest_species)
|
||||||
|
new_fragment = new_fragment.replace("Un fama anda", "Un fama ignorante anda")
|
||||||
|
|
||||||
|
return new_fragment
|
||||||
|
|
||||||
|
# generate in between species and show the process of the Levenhstein Distance algorithm
|
||||||
|
def generate_in_between_species (new_main_tree, near_species, file_out=sys.stdout):
|
||||||
|
# Define length of main character tree and major species
|
||||||
|
length_main_character_tree = len(new_main_tree)
|
||||||
|
length_near_species = len(near_species)
|
||||||
|
|
||||||
|
# Declare a list of in between words showing the process
|
||||||
|
in_between_species = []
|
||||||
|
# Loop over every number until reaching the total lenght of the original word
|
||||||
|
for cell in range(length_main_character_tree):
|
||||||
|
#print('row number: ', element)
|
||||||
|
# Loop over every number until reaching the total lenght of the replacement word
|
||||||
|
for number in range(length_near_species):
|
||||||
|
#print('column number: ', number)
|
||||||
|
# select the number of letters +1 of the original word
|
||||||
|
part1 = new_main_tree[:cell+1]
|
||||||
|
print('La Distancia de Levenshtein observa una parte del',new_main_tree, ':', part1, file=file_out)
|
||||||
|
# select the number of letters +1 of the replacement word
|
||||||
|
part2 = near_species[:number+1]
|
||||||
|
print('Después observa una parte del', near_species, ':', part2, file=file_out)
|
||||||
|
# selected letters of the original words replace the selected letters of the replacement word
|
||||||
|
new_species = part1 + near_species[number+1:]
|
||||||
|
print('En su intento de comparación reemplaza la parte del', near_species, 'por la parte del', new_main_tree, 'y crea así una nueva especie intermediaria: el', new_species, file=file_out)
|
||||||
|
# add in between words to the list
|
||||||
|
in_between_species.append(new_species)
|
||||||
|
# calculate the similarity between in between words
|
||||||
|
print('Calcula las acciones necesarias para reemplazar', new_main_tree[:cell+1], 'por', near_species[:number+1], ': ', Levenshtein.distance(new_main_tree[:cell+1], near_species[:number+1]), '\n', file=file_out)
|
||||||
|
# print('\n', file=file_out)
|
||||||
|
|
||||||
|
## Print all in between words
|
||||||
|
#print('in between species: ', in_between_species, file=file_out)
|
||||||
|
|
||||||
|
return in_between_species
|
||||||
|
|
||||||
|
# Draw a map of all in between species and their distance to main tree
|
||||||
|
def print_map(sorted_distances, show_distances=True, file_out=sys.stdout):
|
||||||
|
# As characters are less wide than high make them a bit wider
|
||||||
|
xscale = 2
|
||||||
|
# Height of the map
|
||||||
|
height = 70
|
||||||
|
# Width of the map
|
||||||
|
width = int(70 * xscale)
|
||||||
|
# Centerpoint of the map
|
||||||
|
middle = (int(30 * xscale), 35)
|
||||||
|
|
||||||
|
grid = [[] for x in range(height + 1)]
|
||||||
|
|
||||||
|
centerpoint = sorted_distances[0][0]
|
||||||
|
start = middle[0] - max(0, int(len(centerpoint) / 2))
|
||||||
|
|
||||||
|
grid[middle[1]].append((start, start + len(centerpoint), centerpoint))
|
||||||
|
|
||||||
|
for treename, distance in sorted_distances[1:]:
|
||||||
|
placed = False
|
||||||
|
|
||||||
|
treename = ' {}({}) '.format(treename, distance) if show_distances else ' {} '.format(treename)
|
||||||
|
angle = random.random() * math.pi * 2
|
||||||
|
step = 0
|
||||||
|
steps = 180
|
||||||
|
while (not placed) and step < steps:
|
||||||
|
|
||||||
|
x = int(math.floor(math.cos(angle) * (distance * 2.8)) * xscale + middle[0])
|
||||||
|
y = int(math.floor(math.sin(angle) * (distance * 2.8)) + middle[1])
|
||||||
|
|
||||||
|
start = x - max(0, int(len(treename) / 2))
|
||||||
|
end = start + len(treename)
|
||||||
|
|
||||||
|
if space_available(grid, start, end, y):
|
||||||
|
grid[y].append((start, end, treename))
|
||||||
|
placed = True
|
||||||
|
|
||||||
|
angle += (math.pi * 2 / steps)
|
||||||
|
step += 1
|
||||||
|
|
||||||
|
if not placed:
|
||||||
|
print('Could not place {}'.format(treename), file=file_out)
|
||||||
|
# print(angle, x, y)
|
||||||
|
|
||||||
|
for row in grid:
|
||||||
|
# Sort by first key of the tuples, start of the word
|
||||||
|
row = sorted(row, key=lambda r: r[0])
|
||||||
|
|
||||||
|
if len(row):
|
||||||
|
line = ''
|
||||||
|
for start, _, treename in row:
|
||||||
|
line += ((start) - len(line)) * ' ' + treename
|
||||||
|
|
||||||
|
print(line, file=file_out)
|
||||||
|
|
||||||
|
# draw table with all new intermediary species
|
||||||
|
def print_table(new_main_tree, near_species, in_between_species, file_out=sys.stdout):
|
||||||
|
## 8. Print tabel
|
||||||
|
print('{} → {}'.format(new_main_tree, near_species), file=file_out)
|
||||||
|
print('', file=file_out)
|
||||||
|
print(formatTable(in_between_species, 20, 5, cells=len(near_species)), file=file_out)
|
||||||
|
|
||||||
|
# Execute functions
|
||||||
|
if __name__ == '__main__':
|
||||||
|
## 1A. Open textfiles & turn textfiles in machine readable lists
|
||||||
|
|
||||||
|
# Cortazar
|
||||||
|
txt = 'eucalipto_cortazar.txt'
|
||||||
|
# Open textfile
|
||||||
|
all_text = openfile(txt)
|
||||||
|
|
||||||
|
# List of trees
|
||||||
|
txt1 = 'arboles_simple.txt'
|
||||||
|
txt2 = 'arboles_plural.txt'
|
||||||
|
all_simple = openfile(txt1)
|
||||||
|
all_plural = openfile(txt2)
|
||||||
|
|
||||||
|
## 1B. Turn list of trees into dictionary of single/plural words
|
||||||
|
trees = dict(zip(all_simple, all_plural))
|
||||||
|
|
||||||
|
## 2. HOMEPAGE print statements
|
||||||
|
## print fragment Cortazar
|
||||||
|
with open(txt, 'r') as source:
|
||||||
|
fragment = source.read()
|
||||||
|
|
||||||
|
## 2b. Ask user to pick a new main tree
|
||||||
|
new_main_tree = pick_new_main_tree_terminal(fragment, all_simple, sys.stdout)
|
||||||
|
|
||||||
|
## 3. Count the similarity between the main tree and the other species in the forest
|
||||||
|
distances = calculate_distances(new_main_tree, all_simple)
|
||||||
|
|
||||||
|
## 4. Sort the distances between the trees and the new main tree from the lowest to the highest counts
|
||||||
|
sorted_distances = sort_distances(distances)
|
||||||
|
|
||||||
|
# Find the nearest species
|
||||||
|
near_species, nearest_species = find_nearest_species(sorted_distances, trees)
|
||||||
|
|
||||||
|
# Print rewritten fragment
|
||||||
|
print("\n")
|
||||||
|
new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)
|
||||||
|
print(new_fragment, file=sys.stdout)
|
||||||
|
|
||||||
|
## 6. Compare the similarity between the main character tree and the main species in the forest,
|
||||||
|
in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=sys.stdout)
|
||||||
|
|
||||||
|
# Show the sorted distances dictionary
|
||||||
|
# print('Sorted distances: ', sorted_distances, file=sys.stdout)
|
||||||
|
# print('\n', file=sys.stdout)
|
||||||
|
|
||||||
|
# generate_new_text(fragment, new_main_tree, all_simple, all_plural, trees, sys.stdout)
|
||||||
|
## 7. Generate map of the woods
|
||||||
|
print_map(sorted_distances, file_out=sys.stdout)
|
||||||
|
|
||||||
|
## 8. Generate intermediary species table
|
||||||
|
print_table(new_main_tree, near_species, in_between_species, sys.stdout)
|
Binary file not shown.
@ -0,0 +1,3 @@
|
|||||||
|
WeasyPrint
|
||||||
|
python-Levenshtein
|
||||||
|
flask
|
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env/ python
|
||||||
|
# encoding=utf8
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
|
||||||
|
species = []
|
||||||
|
name_species = ''
|
||||||
|
|
||||||
|
url = "https://www.arbolapp.es/especies-nombre-cientifico/"
|
||||||
|
|
||||||
|
# Getting the webpage, creating a Response object.
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response:
|
||||||
|
# Extracting the source code of the page.
|
||||||
|
data = response.text
|
||||||
|
soup = BeautifulSoup(data, 'lxml')
|
||||||
|
# find all elements inside a div element of class contenido
|
||||||
|
selector = 'div.contenido > h4'
|
||||||
|
# find elements that contain the data we want
|
||||||
|
found = soup.select(selector)
|
||||||
|
for element in found:
|
||||||
|
heading_data = element.text
|
||||||
|
print(heading_data.lower())
|
||||||
|
# print('soup:', soup)
|
||||||
|
# for link in soup.find_all("a"):
|
||||||
|
# url = link.get("href", "")
|
||||||
|
# print('url:', url)
|
||||||
|
# if "/wiki/" in url:
|
||||||
|
# name_species = url.replace("/wiki/", "")
|
||||||
|
# species.append(name_species)
|
||||||
|
|
||||||
|
# destination = "List_of_tree_genera.txt"
|
||||||
|
# with open(destination, 'w') as source:
|
||||||
|
# for specie in species:
|
||||||
|
# source.write(specie)
|
||||||
|
# source.write('\n')
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"]
|
||||||
|
|
||||||
|
'''
|
||||||
|
comments:
|
||||||
|
Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa
|
||||||
|
|
||||||
|
'''
|
@ -0,0 +1,93 @@
|
|||||||
|
Copyright 2020 The XanhMono Project Authors (https://github.com/yellow-type-foundry/xanhmono).
|
||||||
|
|
||||||
|
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
||||||
|
This license is copied below, and is also available with a FAQ at:
|
||||||
|
http://scripts.sil.org/OFL
|
||||||
|
|
||||||
|
|
||||||
|
-----------------------------------------------------------
|
||||||
|
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
|
||||||
|
-----------------------------------------------------------
|
||||||
|
|
||||||
|
PREAMBLE
|
||||||
|
The goals of the Open Font License (OFL) are to stimulate worldwide
|
||||||
|
development of collaborative font projects, to support the font creation
|
||||||
|
efforts of academic and linguistic communities, and to provide a free and
|
||||||
|
open framework in which fonts may be shared and improved in partnership
|
||||||
|
with others.
|
||||||
|
|
||||||
|
The OFL allows the licensed fonts to be used, studied, modified and
|
||||||
|
redistributed freely as long as they are not sold by themselves. The
|
||||||
|
fonts, including any derivative works, can be bundled, embedded,
|
||||||
|
redistributed and/or sold with any software provided that any reserved
|
||||||
|
names are not used by derivative works. The fonts and derivatives,
|
||||||
|
however, cannot be released under any other type of license. The
|
||||||
|
requirement for fonts to remain under this license does not apply
|
||||||
|
to any document created using the fonts or their derivatives.
|
||||||
|
|
||||||
|
DEFINITIONS
|
||||||
|
"Font Software" refers to the set of files released by the Copyright
|
||||||
|
Holder(s) under this license and clearly marked as such. This may
|
||||||
|
include source files, build scripts and documentation.
|
||||||
|
|
||||||
|
"Reserved Font Name" refers to any names specified as such after the
|
||||||
|
copyright statement(s).
|
||||||
|
|
||||||
|
"Original Version" refers to the collection of Font Software components as
|
||||||
|
distributed by the Copyright Holder(s).
|
||||||
|
|
||||||
|
"Modified Version" refers to any derivative made by adding to, deleting,
|
||||||
|
or substituting -- in part or in whole -- any of the components of the
|
||||||
|
Original Version, by changing formats or by porting the Font Software to a
|
||||||
|
new environment.
|
||||||
|
|
||||||
|
"Author" refers to any designer, engineer, programmer, technical
|
||||||
|
writer or other person who contributed to the Font Software.
|
||||||
|
|
||||||
|
PERMISSION & CONDITIONS
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
||||||
|
redistribute, and sell modified and unmodified copies of the Font
|
||||||
|
Software, subject to the following conditions:
|
||||||
|
|
||||||
|
1) Neither the Font Software nor any of its individual components,
|
||||||
|
in Original or Modified Versions, may be sold by itself.
|
||||||
|
|
||||||
|
2) Original or Modified Versions of the Font Software may be bundled,
|
||||||
|
redistributed and/or sold with any software, provided that each copy
|
||||||
|
contains the above copyright notice and this license. These can be
|
||||||
|
included either as stand-alone text files, human-readable headers or
|
||||||
|
in the appropriate machine-readable metadata fields within text or
|
||||||
|
binary files as long as those fields can be easily viewed by the user.
|
||||||
|
|
||||||
|
3) No Modified Version of the Font Software may use the Reserved Font
|
||||||
|
Name(s) unless explicit written permission is granted by the corresponding
|
||||||
|
Copyright Holder. This restriction only applies to the primary font name as
|
||||||
|
presented to the users.
|
||||||
|
|
||||||
|
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
||||||
|
Software shall not be used to promote, endorse or advertise any
|
||||||
|
Modified Version, except to acknowledge the contribution(s) of the
|
||||||
|
Copyright Holder(s) and the Author(s) or with their explicit written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
5) The Font Software, modified or unmodified, in part or in whole,
|
||||||
|
must be distributed entirely under this license, and must not be
|
||||||
|
distributed under any other license. The requirement for fonts to
|
||||||
|
remain under this license does not apply to any document created
|
||||||
|
using the Font Software.
|
||||||
|
|
||||||
|
TERMINATION
|
||||||
|
This license becomes null and void if any of the above conditions are
|
||||||
|
not met.
|
||||||
|
|
||||||
|
DISCLAIMER
|
||||||
|
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||||
|
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
||||||
|
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
||||||
|
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
||||||
|
OTHER DEALINGS IN THE FONT SOFTWARE.
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue