The code has been rewritten to fit the lay-out proposal

master
ana mertens 3 years ago
parent 61315b7355
commit b1d2ec08ba

@ -0,0 +1,13 @@
itinerary = [
(
sentence,
tree,
[
(
word,
dice,
options
),
] # sentence part
),
]

@ -12,6 +12,7 @@ Created on Fri May 28 17:00:51 2021
import spacy import spacy
import random import random
# Cargar tokenizer en español # Cargar tokenizer en español
nlp = spacy.load("es_core_news_sm") nlp = spacy.load("es_core_news_sm")
@ -57,9 +58,15 @@ def crear_base_datos(nombre_texto):
#Genera la siguiente palabra en el camino según la cadena de Markov #Genera la siguiente palabra en el camino según la cadena de Markov
def paso(palabra, palabras_arboles, palabras_camino): def paso(palabra, palabras_arboles, palabras_camino):
if palabra in palabras_arboles: if palabra in palabras_arboles:
return random.choice(palabras_arboles[palabra]) posibilidades = palabras_arboles[palabra]
dado = random.choice(range(0, len(palabras_arboles[palabra])))
palabra_siguiente = palabras_arboles[palabra][dado]
return (posibilidades, dado, palabra_siguiente)
elif palabra in palabras_camino : elif palabra in palabras_camino :
return random.choice(palabras_camino[palabra]) posibilidades = palabras_camino[palabra]
dado = random.choice(range(0, len(palabras_camino[palabra])))
palabra_siguiente = palabras_camino[palabra][dado]
return (posibilidades, dado, palabra_siguiente)
else: else:
pass pass
@ -68,13 +75,13 @@ def camino(palabra, palabras_arboles, palabras_camino):
itinerario = palabra.capitalize() + ' ' itinerario = palabra.capitalize() + ' '
while palabra != '.' and i < 100: while palabra != '.' and i < 100:
delimitador = ' ' delimitador = ' '
palabra = paso(palabra, palabras_arboles, palabras_camino) _, __, palabra_siguiente = paso(palabra, palabras_arboles, palabras_camino)
if palabra in '.,:;!?\)': if palabra_siguiente in '.,:;!?\)':
itinerario = itinerario[:-1] itinerario = itinerario[:-1]
aliento = random.random() aliento = random.random()
if aliento < 0.1: if aliento < 0.1:
delimitador = '\n' delimitador = '\n'
itinerario += (palabra + delimitador) itinerario += (palabra_siguiente + delimitador)
i += 1 i += 1
return itinerario return itinerario

@ -2,17 +2,20 @@ from parse_trees import load_trees_from_json
from medialab import crear_base_datos, paso from medialab import crear_base_datos, paso
from random import shuffle, random from random import shuffle, random
# creating Markov Chain in text & trees
def path(word, words_tree, words_path, trees): def path(word, words_tree, words_path, trees):
# Collects a list of trees to visit # Collects a list of trees to visit
tree_index = {} tree_index = {}
itinerary = [] itinerary = []
current_step = word.capitalize() + ' ' current_step = word.capitalize() + ' '
markov_decision_traces = [ ( word, -1, []) ]
word = paso(word, words_tree, words_path) posibilities, dice, next_word = paso(word, words_tree, words_path)
while len(itinerary) < 100 and word not in '.!?': while len(itinerary) < 100 and next_word not in '.!?':
if word in ',:;\)': if next_word in ',:;\)':
current_step = current_step[:-1] current_step = current_step[:-1]
current_step += ' '
breath = random() breath = random()
if breath < 0.1: if breath < 0.1:
@ -20,45 +23,51 @@ def path(word, words_tree, words_path, trees):
else: else:
separator = ' ' separator = ' '
current_step += (word + separator) current_step += (next_word + separator)
markov_decision_traces.append(( next_word, dice, posibilities ))
if word in words_tree: if next_word in words_tree:
# Current word is a tree word, this step in the itinerary is 'complete' # Current word is a tree word, this step in the itinerary is 'complete'
# Word is not yet in the index, add a tree for this word # Word is not yet in the index, add a tree for this word
if word not in tree_index: if next_word not in tree_index:
# Add tree to index and remove from list of available trees # Add tree to index and remove from list of available trees
tree_index[word] = trees.pop(0) tree_index[next_word] = trees.pop(0)
# Retreive tree linked to this word from the index # Retreive tree linked to this word from the index
tree = tree_index[word] tree = tree_index[next_word]
# Get a next word from the database # Get a next word from the database
word = paso(word, words_tree, words_path) word = next_word
posibilities, dice, next_word = paso(word, words_tree, words_path)
# Try to look ahead to the next word, if the next word # Try to look ahead to the next word, if the next word
# is interpunction, add it to the current step # is interpunction, add it to the current step
# but first remove trailing space # but first remove trailing space
if word in '.,:;!?\)': if next_word in '.,:;!?\)':
current_step = current_step[:-1] + word current_step = current_step[:-1] + next_word
# Request a new next word to continue generation # Request a new next word to continue generation
markov_decision_traces.append(( next_word, dice, posibilities ))
# Test whether the next word marks the end of a sentence, # Test whether the next word marks the end of a sentence,
# thus the end of the itinerary. Then don't touch it so the # thus the end of the itinerary. Then don't touch it so the
# while will break. # while will break.
if word not in '.!?': if next_word not in '.!?':
word = paso(word, words_tree, words_path) word = next_word
posibilities, dice, next_word = paso(word, words_tree, words_path)
# Add the current step, and the tree to the itinerary # Add the current step, and the tree to the itinerary
itinerary.append(( itinerary.append((
current_step, current_step,
tree tree,
markov_decision_traces
)) ))
# Clear the current step # Clear the current step
current_step = '' current_step = ''
markov_decision_traces = []
else: else:
word = paso(word, words_tree, words_path) word = next_word
posibilities, dice, next_word = paso(word, words_tree, words_path)
return itinerary return itinerary
@ -77,12 +86,45 @@ def crear_camino(nombre_archivo, palabra_inicial):
import os.path import os.path
basepath = os.path.dirname(__file__) basepath = os.path.dirname(__file__)
textfile = os.path.join(basepath, '../data/emilia_prueba.txt')
#EJECUCIÓN__________________________________________________________________ #EJECUCIÓN__________________________________________________________________
path = crear_camino(textfile, 'un') print('Puedes elegir una novela para crear tu Paseo por árboles de Madrid.')
print('Opción 1: La novela "La madre naturaleza" de la escritora feminista Emilia Pardo Bazán \
for step in path: fue publicada en 1887. Usa en esta obra una prosa poética y descriptiva, y en sus páginas se \
print(step[0]) siente el amor que profesa al paisaje gallego, con un conocimiento de la botánica y de \
print(step[1]['properties']['NOMBRE_COMUN'], ' en ', step[1]['properties']['MINTDIRECCIONAUX']) las costumbres rurales muy superior al de sus contemporáneos.')
print('Opción 2: La novela "Miau" del escritor Benito Pérez Galdós fue publicada en 1888. \
Enmarcada en el género realista, satiriza el Madrid burocrático de finales del siglo XIX \
a partir de las vicisitudes vitales de su protagonista, Ramón Villaamil, \
un competente exempleado del Ministerio de Hacienda, al que una serie de intrigas \
han dejado cesante.')
novel = input('Por favor, marca 1 o 2: ')
first_word = 'un'
if novel == '1':
novel = os.path.join(basepath, '../data/emilia_prueba.txt')
author = 'Emilia Pardo Bazán'
title = 'La Madre Naturaleza'
else:
novel = os.path.join(basepath, '../data/prueba.txt')
author = 'Benito Pérez Gáldos'
title = 'Miau'
# Create title/subtitle
print('\nPaseo por los árboles de Madrid con', author, 'y', title, '\n')
print('-------------------------------------------------------------------------------------------\n')
# Create chapters
path = crear_camino(novel, first_word)
sentences = []
for sentence, tree, traces in path:
for word, dice, options in traces:
print('Dice rolled - {} -'.format(dice))
print('New word - {} - chosen from {}'.format(word, options))
print('')
sentences.append(sentence)
print('Itinerary:\n{} \n'.format(''.join(sentences)))
print('Tree linked to last word :', tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'], '\n')
print('\n')

Loading…
Cancel
Save