You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
91 lines
3.0 KiB
Python
91 lines
3.0 KiB
Python
4 years ago
|
from parse_trees import load_trees_from_json
|
||
|
from medialab import crear_base_datos, paso
|
||
|
from random import shuffle, random
|
||
|
|
||
|
def path(word, words_tree, words_path, trees):
|
||
|
# Collects a list of trees to visit
|
||
|
tree_index = {}
|
||
|
itinerary = []
|
||
|
current_step = word.capitalize() + ' '
|
||
|
|
||
|
word = paso(word, words_tree, words_path)
|
||
|
|
||
|
while len(itinerary) < 100 and word not in '.!?':
|
||
|
if word in ',:;\)':
|
||
|
current_step = current_step[:-1]
|
||
|
|
||
|
breath = random()
|
||
|
if breath < 0.1:
|
||
|
separator = '\n'
|
||
|
else:
|
||
|
separator = ' '
|
||
|
|
||
|
current_step += (word + separator)
|
||
|
|
||
|
if word in words_tree:
|
||
|
# Current word is a tree word, this step in the itinerary is 'complete'
|
||
|
# Word is not yet in the index, add a tree for this word
|
||
|
if word not in tree_index:
|
||
|
# Add tree to index and remove from list of available trees
|
||
|
tree_index[word] = trees.pop(0)
|
||
|
|
||
|
# Retreive tree linked to this word from the index
|
||
|
tree = tree_index[word]
|
||
|
|
||
|
|
||
|
# Get a next word from the database
|
||
|
word = paso(word, words_tree, words_path)
|
||
|
|
||
|
# Try to look ahead to the next word, if the next word
|
||
|
# is interpunction, add it to the current step
|
||
|
# but first remove trailing space
|
||
|
if word in '.,:;!?\)':
|
||
|
current_step = current_step[:-1] + word
|
||
|
# Request a new next word to continue generation
|
||
|
|
||
|
# Test whether the next word marks the end of a sentence,
|
||
|
# thus the end of the itinerary. Then don't touch it so the
|
||
|
# while will break.
|
||
|
if word not in '.!?':
|
||
|
word = paso(word, words_tree, words_path)
|
||
|
|
||
|
# Add the current step, and the tree to the itinerary
|
||
|
itinerary.append((
|
||
|
current_step,
|
||
|
tree
|
||
|
))
|
||
|
|
||
|
# Clear the current step
|
||
|
current_step = ''
|
||
|
else:
|
||
|
word = paso(word, words_tree, words_path)
|
||
|
|
||
|
return itinerary
|
||
|
|
||
|
# Genera un camino a partir de un texto y una palabra del texto
|
||
|
def crear_camino(nombre_archivo, palabra_inicial):
|
||
|
trees = load_trees_from_json()
|
||
|
shuffle(trees)
|
||
|
|
||
|
print("Starting to read text")
|
||
|
(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo)
|
||
|
|
||
|
print("Amount of tree words: ", len(palabras_arboles))
|
||
|
|
||
|
return path(palabra_inicial, palabras_arboles, palabras_camino, trees)
|
||
|
|
||
|
import os.path
|
||
|
|
||
|
basepath = os.path.dirname(__file__)
|
||
|
textfile = os.path.join(basepath, '../data/emilia_prueba.txt')
|
||
|
|
||
|
#EJECUCIÓN__________________________________________________________________
|
||
|
path = crear_camino(textfile, 'un')
|
||
|
|
||
|
for step in path:
|
||
|
print(step[0])
|
||
|
print(step[1]['properties']['NOMBRE_COMUN'], ' en ', step[1]['properties']['MINTDIRECCIONAUX'])
|
||
|
|
||
|
# for tree in trees[:10]:
|
||
|
# print(tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'])
|