From 232f02ea3fe48e708c7b9cfd093e5c4f78b55744 Mon Sep 17 00:00:00 2001 From: Gijs Date: Wed, 2 Jun 2021 13:24:15 +0200 Subject: [PATCH] Extended the markov chain script developed during the workshop 'Algoliterary Authors' to link the significant words in the sentence to the trees living in the Barrio de las Letras. And to generate an itinerary based on street addresses of these trees. --- scripts/parse_trees.py | 61 ++++++++++++++++++++++++++++ scripts/paseo.py | 90 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 scripts/parse_trees.py create mode 100644 scripts/paseo.py diff --git a/scripts/parse_trees.py b/scripts/parse_trees.py new file mode 100644 index 0000000..e6a23b0 --- /dev/null +++ b/scripts/parse_trees.py @@ -0,0 +1,61 @@ +import json +import os.path + +# Medialab Prado +medialab_prado = [ -3.69380, 40.41041 ] +# X 441133.28 +# Y 4473541.39 + + +# Plaza del Sol +# Top left corner of the search field +plaza_del_sol = [ -3.7035, 40.41684 ] +# X 440315.95 +# Y 4474261.62 + +# Atocha Renfe +# Bottom right corner of the search field +atocha_renfe = [ -3.6886, 40.40697 ] +# X 441571.52 +# Y 4473156.10 + +# Return true if the tree lives within the given bounding box +# and it has a street address in the database +def within (bbox): + def eligible (tree): + coords = tree['geometry']['coordinates'] + if bbox[0][0] <= coords[0] <= bbox[1][0] \ + and bbox[0][1] <= coords[1] <= bbox[1][1] \ + and tree['properties']['MINTDIRECCIONAUX'] is not None: + return True + + + return False + + return eligible + +# Load trees from the selected_ordered_trees +# who live within barrio de las letras +# and have a postal address recorded in the database +def load_trees_from_json(): + basepath = os.path.dirname(__file__) + datafile = os.path.join(basepath, '../data/selected_trees_ordered.geojson') + + barrio_de_las_letras = [ + [-3.7025, 40.4134], + [-3.6945, 40.4169] + ] + + with open(datafile, 'r') as fp: + data = json.load(fp) + trees = data['features'] + return list(filter(within(barrio_de_las_letras), trees)) + + +if __name__ == '__main__': + from random import shuffle + + eligible_trees = load_trees_from_json() + print(len(eligible_trees)) + shuffle(eligible_trees) + print(eligible_trees[:1]) diff --git a/scripts/paseo.py b/scripts/paseo.py new file mode 100644 index 0000000..ade3268 --- /dev/null +++ b/scripts/paseo.py @@ -0,0 +1,90 @@ +from parse_trees import load_trees_from_json +from medialab import crear_base_datos, paso +from random import shuffle, random + +def path(word, words_tree, words_path, trees): + # Collects a list of trees to visit + tree_index = {} + itinerary = [] + current_step = word.capitalize() + ' ' + + word = paso(word, words_tree, words_path) + + while len(itinerary) < 100 and word not in '.!?': + if word in ',:;\)': + current_step = current_step[:-1] + + breath = random() + if breath < 0.1: + separator = '\n' + else: + separator = ' ' + + current_step += (word + separator) + + if word in words_tree: + # Current word is a tree word, this step in the itinerary is 'complete' + # Word is not yet in the index, add a tree for this word + if word not in tree_index: + # Add tree to index and remove from list of available trees + tree_index[word] = trees.pop(0) + + # Retreive tree linked to this word from the index + tree = tree_index[word] + + + # Get a next word from the database + word = paso(word, words_tree, words_path) + + # Try to look ahead to the next word, if the next word + # is interpunction, add it to the current step + # but first remove trailing space + if word in '.,:;!?\)': + current_step = current_step[:-1] + word + # Request a new next word to continue generation + + # Test whether the next word marks the end of a sentence, + # thus the end of the itinerary. Then don't touch it so the + # while will break. + if word not in '.!?': + word = paso(word, words_tree, words_path) + + # Add the current step, and the tree to the itinerary + itinerary.append(( + current_step, + tree + )) + + # Clear the current step + current_step = '' + else: + word = paso(word, words_tree, words_path) + + return itinerary + +# Genera un camino a partir de un texto y una palabra del texto +def crear_camino(nombre_archivo, palabra_inicial): + trees = load_trees_from_json() + shuffle(trees) + + print("Starting to read text") + (palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo) + + print("Amount of tree words: ", len(palabras_arboles)) + + return path(palabra_inicial, palabras_arboles, palabras_camino, trees) + +import os.path + +basepath = os.path.dirname(__file__) +textfile = os.path.join(basepath, '../data/emilia_prueba.txt') + +#EJECUCIÓN__________________________________________________________________ +path = crear_camino(textfile, 'un') + +for step in path: + print(step[0]) + print(step[1]['properties']['NOMBRE_COMUN'], ' en ', step[1]['properties']['MINTDIRECCIONAUX']) + +# for tree in trees[:10]: +# print(tree['properties']['NOMBRE_COMUN'], ' en ', tree['properties']['MINTDIRECCIONAUX'])