The code has been rewritten to fit the lay-out proposal

4 years ago · b1d2ec08ba
parent 61315b7355
commit b1d2ec08ba
3 changed files with 190 additions and 128 deletions
--- a/scripts/format_lay_out.txt
+++ b/scripts/format_lay_out.txt
@ -0,0 +1,13 @@
+itinerary = [
+    (
+        sentence,
+        tree,
+        [ 
+            (
+                word,
+                dice,
+                options
+            ),
+        ] # sentence part
+    ),
+]
--- a/scripts/medialab.py
+++ b/scripts/medialab.py
@ -12,78 +12,85 @@ Created on Fri May 28 17:00:51 2021

 import spacy
 import random
+
 # Cargar tokenizer en español
 nlp = spacy.load("es_core_news_sm")

 #FUNCIONES_________________________________________________________________

 def limpiar_texto(fragmento):
-    fragmento_limpio = fragmento.split('--')
-    fragmento_limpio = ', '.join(fragmento_limpio)
-    fragmento_limpio = fragmento.split('\n')
-    fragmento_limpio = ' '.join(fragmento_limpio)
-    return fragmento_limpio
+	fragmento_limpio = fragmento.split('--')
+	fragmento_limpio = ', '.join(fragmento_limpio)
+	fragmento_limpio = fragmento.split('\n')
+	fragmento_limpio = ' '.join(fragmento_limpio)
+	return fragmento_limpio

 def crear_base_datos(nombre_texto):
-    # Abrir el archivo de texto para crear la base de datos
-    archivo = open(nombre_texto, 'r')
-    fragmento = archivo.read()
-    archivo.close()
-    fragmento_limpio = limpiar_texto(fragmento)
-    
-    # Tokenización del fragmento de texto
-    doc = nlp(fragmento_limpio)
-    doc_len = len(doc)
+	# Abrir el archivo de texto para crear la base de datos
+	archivo = open(nombre_texto, 'r')
+	fragmento = archivo.read()
+	archivo.close()
+	fragmento_limpio = limpiar_texto(fragmento)
+	
+	# Tokenización del fragmento de texto
+	doc = nlp(fragmento_limpio)
+	doc_len = len(doc)

-    palabras_arboles = {} #Verbos, sustantivos, adverbios y adjetivos
-    palabras_camino = {} #El resto de palabras
-    for i in range(0, doc_len-1):
-        tok = doc[i]
-        palabra = tok.text.lower()
-        sig_palabra = doc[i+1].text.lower()
-        if (tok.pos_ == 'VERB' or tok.pos_ == 'NOUN'\
-            or tok.pos_ == 'ADJ' or tok.pos_ == 'ADV'):
-            if palabra in palabras_arboles:
-                palabras_arboles[palabra].append(sig_palabra)
-            else:
-                palabras_arboles[palabra] = [sig_palabra]
-        else:
-            if palabra in palabras_camino:
-                palabras_camino[palabra].append(sig_palabra)
-            else:
-                palabras_camino[palabra] = [sig_palabra]
-    return (palabras_arboles, palabras_camino)
+	palabras_arboles = {} #Verbos, sustantivos, adverbios y adjetivos
+	palabras_camino = {} #El resto de palabras
+	for i in range(0, doc_len-1):
+		tok = doc[i]
+		palabra = tok.text.lower()
+		sig_palabra = doc[i+1].text.lower()
+		if (tok.pos_ == 'VERB' or tok.pos_ == 'NOUN'\
+			or tok.pos_ == 'ADJ' or tok.pos_ == 'ADV'):
+			if palabra in palabras_arboles:
+				palabras_arboles[palabra].append(sig_palabra)
+			else:
+				palabras_arboles[palabra] = [sig_palabra]
+		else:
+			if palabra in palabras_camino:
+				palabras_camino[palabra].append(sig_palabra)
+			else:
+				palabras_camino[palabra] = [sig_palabra]
+	return (palabras_arboles, palabras_camino)

 #Genera la siguiente palabra en el camino según la cadena de Markov
 def paso(palabra, palabras_arboles, palabras_camino):
-    if palabra in palabras_arboles:
-        return random.choice(palabras_arboles[palabra])
-    elif palabra in palabras_camino :
-        return random.choice(palabras_camino[palabra])
-    else:
-        pass
+	if palabra in palabras_arboles:
+		posibilidades = palabras_arboles[palabra]
+		dado = random.choice(range(0, len(palabras_arboles[palabra])))
+		palabra_siguiente = palabras_arboles[palabra][dado]
+		return (posibilidades, dado, palabra_siguiente)
+	elif palabra in palabras_camino :
+		posibilidades = palabras_camino[palabra]
+		dado = random.choice(range(0, len(palabras_camino[palabra])))
+		palabra_siguiente = palabras_camino[palabra][dado]
+		return (posibilidades, dado, palabra_siguiente)
+	else:
+		pass

 def camino(palabra, palabras_arboles, palabras_camino):
-    i = 0
-    itinerario = palabra.capitalize() + ' '
-    while palabra != '.' and i < 100:
-        delimitador = ' '
-        palabra = paso(palabra, palabras_arboles, palabras_camino)
-        if palabra in '.,:;!?\)':
-            itinerario = itinerario[:-1]
-        aliento = random.random()
-        if aliento < 0.1:
-            delimitador = '\n'
-        itinerario += (palabra + delimitador)
-        i += 1
-    return itinerario
+	i = 0
+	itinerario = palabra.capitalize() + ' '
+	while palabra != '.' and i < 100:
+		delimitador = ' '
+		_, __, palabra_siguiente = paso(palabra, palabras_arboles, palabras_camino)
+		if palabra_siguiente in '.,:;!?\)':
+			itinerario = itinerario[:-1]
+		aliento = random.random()
+		if aliento < 0.1:
+			delimitador = '\n'
+		itinerario += (palabra_siguiente + delimitador)
+		i += 1
+	return itinerario

 #Genera un camino a partir de un texto y una palabra del texto
 def crear_camino(nombre_archivo, palabra_inicial):
-    (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
-    
-    return camino(palabra_inicial, palabras_arboles, palabras_camino)
+	(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
+	
+	return camino(palabra_inicial, palabras_arboles, palabras_camino)

 if __name__ == '__main__':
-    #EJECUCIÓN__________________________________________________________________
-    print(crear_camino('../data/emilia_prueba.txt', 'un'))
+	#EJECUCIÓN__________________________________________________________________
+	print(crear_camino('../data/emilia_prueba.txt', 'un'))
--- a/scripts/paseo.py
+++ b/scripts/paseo.py
@ -2,87 +2,129 @@ from parse_trees import load_trees_from_json
 from medialab import crear_base_datos, paso
 from random import shuffle, random

+# creating Markov Chain in text & trees
 def path(word, words_tree, words_path, trees):
-    # Collects a list of trees to visit
-    tree_index = {}
-    itinerary = []
-    current_step = word.capitalize() + ' '
-
-    word = paso(word, words_tree, words_path)
-
-    while len(itinerary) < 100 and word not in '.!?':
-        if word in ',:;\)':
-            current_step = current_step[:-1]
-
-        breath = random()
-        if breath < 0.1:
-            separator = '\n'
-        else:
-            separator = ' '
-        
-        current_step += (word + separator)
-
-        if word in words_tree:
-            # Current word is a tree word, this step in the itinerary is 'complete'
-            # Word is not yet in the index, add a tree for this word
-            if word not in tree_index:
-                # Add tree to index and remove from list of available trees
-                tree_index[word] = trees.pop(0)
-
-            # Retreive tree linked to this word from the index
-            tree = tree_index[word]
-            
-
-            # Get a next word from the database
-            word = paso(word, words_tree, words_path)
-
-            # Try to look ahead to the next word, if the next word 
-            # is interpunction, add it to the current step
-            # but first remove trailing space
-            if word in '.,:;!?\)':
-                current_step = current_step[:-1] + word
-                # Request a new next word to continue generation  
-
-                # Test whether the next word marks the end of a sentence,
-                # thus the end of the itinerary. Then don't touch it so the
-                # while will break.
-                if word not in '.!?':
-                    word = paso(word, words_tree, words_path)
-
-            # Add the current step, and the tree to the itinerary
-            itinerary.append((
-                current_step,
-                tree
-            ))
-
-            # Clear the current step
-            current_step = ''
-        else:
-            word = paso(word, words_tree, words_path)
-
-    return itinerary
+	# Collects a list of trees to visit
+	tree_index = {}
+	itinerary = []
+	current_step = word.capitalize() + ' '
+	markov_decision_traces = [ ( word, -1, []) ]
+
+	posibilities, dice, next_word = paso(word, words_tree, words_path)
+	
+	while len(itinerary) < 100 and next_word not in '.!?':
+		if next_word in ',:;\)':
+			current_step = current_step[:-1]
+			current_step += ' '
+
+		breath = random()
+		if breath < 0.1:
+			separator = '\n'
+		else:
+			separator = ' '
+		
+		current_step += (next_word + separator)
+		markov_decision_traces.append(( next_word, dice, posibilities ))
+
+		if next_word in words_tree:
+			# Current word is a tree word, this step in the itinerary is 'complete'
+			# Word is not yet in the index, add a tree for this word
+			if next_word not in tree_index:
+				# Add tree to index and remove from list of available trees
+				tree_index[next_word] = trees.pop(0)
+
+			# Retreive tree linked to this word from the index
+			tree = tree_index[next_word]
+
+			# Get a next word from the database
+			word = next_word
+			posibilities, dice, next_word = paso(word, words_tree, words_path)
+
+			# Try to look ahead to the next word, if the next word 
+			# is interpunction, add it to the current step
+			# but first remove trailing space
+			if next_word in '.,:;!?\)':
+				current_step = current_step[:-1] + next_word
+				# Request a new next word to continue generation  
+				markov_decision_traces.append(( next_word, dice, posibilities ))
+
+				# Test whether the next word marks the end of a sentence,
+				# thus the end of the itinerary. Then don't touch it so the
+				# while will break.
+				if next_word not in '.!?':
+					word = next_word
+					posibilities, dice, next_word = paso(word, words_tree, words_path)
+
+			# Add the current step, and the tree to the itinerary
+			itinerary.append((
+				current_step,
+				tree,
+				markov_decision_traces
+			))
+
+			# Clear the current step
+			current_step = ''
+			markov_decision_traces = []
+		else:
+			word = next_word
+			posibilities, dice, next_word = paso(word, words_tree, words_path)
+
+	return itinerary

 # Genera un camino a partir de un texto y una palabra del texto
 def crear_camino(nombre_archivo, palabra_inicial):
-    trees = load_trees_from_json()
-    shuffle(trees)
-    
-    #print("Starting to read text")
-    (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
-    
-    #print("Amount of tree words: ", len(palabras_arboles))
+	trees = load_trees_from_json()
+	shuffle(trees)
+	
+	#print("Starting to read text")
+	(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
+	
+	#print("Amount of tree words: ", len(palabras_arboles))

-    return path(palabra_inicial, palabras_arboles, palabras_camino, trees)
+	return path(palabra_inicial, palabras_arboles, palabras_camino, trees)

 import os.path

 basepath = os.path.dirname(__file__)
-textfile = os.path.join(basepath, '../data/emilia_prueba.txt')

 #EJECUCIÓN__________________________________________________________________
-path = crear_camino(textfile, 'un')
-
-for step in path:
-    print(step[0])
-    print(step[1]['properties']['NOMBRE_COMUN'], ' en ', step[1]['properties']['MINTDIRECCIONAUX'])
+print('Puedes elegir una novela para crear tu Paseo por árboles de Madrid.')
+print('Opción 1: La novela "La madre naturaleza" de la escritora feminista Emilia Pardo Bazán \
+fue publicada en 1887. Usa en esta obra una prosa poética y descriptiva, y en sus páginas se \
+siente el amor que profesa al paisaje gallego, con un conocimiento de la botánica y de \
+las costumbres rurales muy superior al de sus contemporáneos.')
+print('Opción 2: La novela "Miau" del escritor Benito Pérez Galdós fue publicada en 1888. \
+Enmarcada en el género realista, satiriza el Madrid burocrático de finales del siglo XIX \
+a partir de las vicisitudes vitales de su protagonista, Ramón Villaamil, \
+un competente exempleado del Ministerio de Hacienda, al que una serie de intrigas \
+han dejado cesante.')
+novel = input('Por favor, marca 1 o 2: ')
+
+first_word = 'un'
+if novel == '1':
+	novel = os.path.join(basepath, '../data/emilia_prueba.txt')
+	author = 'Emilia Pardo Bazán'
+	title = 'La Madre Naturaleza'
+else:
+	novel = os.path.join(basepath, '../data/prueba.txt')
+	author = 'Benito Pérez Gáldos'
+	title = 'Miau'
+
+# Create title/subtitle
+print('\nPaseo por los árboles de Madrid con', author, 'y', title, '\n')
+print('-------------------------------------------------------------------------------------------\n')
+
+# Create chapters
+path = crear_camino(novel, first_word)
+
+sentences = []
+for sentence, tree, traces in path:
+	for word, dice, options in traces:
+		print('Dice rolled - {} -'.format(dice))
+		print('New word - {} - chosen from {}'.format(word, options))
+		print('')
+	sentences.append(sentence)
+	print('Itinerary:\n{} \n'.format(''.join(sentences)))
+	print('Tree linked to last word :', tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'], '\n')
+	print('\n')