Continued styling.

master
Gijs 3 years ago
parent 4fd0ac3c34
commit 0c060ca841

@ -16,6 +16,8 @@ from flask import Flask, render_template, request, Response, session
from pagedjs import make_pdf
from settings import DEBUG, BASEURL, DEFAULT_LANGUAGE, SECRET_KEY
import textwrap
import os
from fcntl import lockf, LOCK_EX, LOCK_UN
@ -76,6 +78,12 @@ def get_edition_count_en():
return edition_count
def wrap (text, width):
return'\n'.join(['\n'.join(textwrap.wrap(line, width=width)) for line in text.splitlines()])
def read_sources (*paths):
return [ (p, wrap(open(p, 'r').read(), 105)) for p in paths ]
def get_language():
if 'LANGUAGE' in session:
return session['LANGUAGE']
@ -87,10 +95,38 @@ def set_language(language):
session.modified = True
def index_es():
return render_template('index.html')
context = {
BASEURL: BASEURL
}
return render_template('index.html', **context)
def index_en():
return render_template('index_en.html')
context = {
BASEURL: BASEURL
}
return render_template('index_en.html', **context)
"""
Adds the last word of the previous step to the itinerary
"""
def add_last_word_previous_step (itinerary):
new_itinerary = []
last_word_previous_step = None
for step in itinerary:
new_itinerary.append(
(
step[0],
step[1],
step[2],
step[3],
last_word_previous_step
)
)
last_word_previous_step = step[3][-1][0]
return new_itinerary
@app.route('{}/en'.format(BASEURL))
def en():
@ -125,7 +161,8 @@ def book_es ():
author = 'Benito Pérez Gáldos' # Non breaking spaces
title = 'Miau'
path = crear_camino(novel, first_word, 'es')
path = add_last_word_previous_step(crear_camino(novel, first_word, 'es'))
complete_sentence = path[-1][1] + path[-1][0]
context = {
'title': title,
@ -134,6 +171,8 @@ def book_es ():
'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR,
'DEBUG': DEBUG,
'edition_count': edition_count,
'sources': read_sources('paseo.py', 'medialab.py'),
'complete_sentence': complete_sentence,
}
html = render_template('book.html', **context)
@ -167,7 +206,8 @@ def book_en ():
author = 'Benito Pérez Gáldos' # Non breaking spaces
title = 'Marianela'
path = crear_camino(novel, first_word, 'en')
path = add_last_word_previous_step(crear_camino(novel, first_word, 'en'))
complete_sentence = path[-1][1] + path[-1][0]
context = {
'title': title,
@ -176,6 +216,8 @@ def book_en ():
'STATIC_DIR': '/static' if DEBUG else PAGEDJS_STATIC_DIR,
'DEBUG': DEBUG,
'edition_count': edition_count,
'sources': read_sources('paseo.py', 'medialab.py'),
'complete_sentence': complete_sentence,
}
html = render_template('book_en.html', **context)

@ -27,7 +27,7 @@ def limpiar_texto(fragmento):
fragmento_limpio = ' '.join(fragmento_limpio)
return fragmento_limpio
def crear_base_datos(nombre_texto, lenguaje='es'):
def crear_base_datos(nombre_texto, idioma='es'):
# Abrir el archivo de texto para crear la base de datos
archivo = open(nombre_texto, 'r')
fragmento = archivo.read()
@ -35,11 +35,11 @@ def crear_base_datos(nombre_texto, lenguaje='es'):
fragmento_limpio = limpiar_texto(fragmento)
# Tokenización del fragmento de texto
if lenguaje == 'es':
doc = nlp(fragmento_limpio)
if idioma == 'en':
doc = nlp_en(fragmento_limpio)
doc_len = len(doc)
else:
doc = nlp_en(fragmento_limpio)
doc = nlp(fragmento_limpio)
doc_len = len(doc)
palabras_arboles = {} #Verbos, sustantivos, adverbios y adjetivos

@ -77,12 +77,12 @@ def path(word, words_tree, words_path, trees):
return itinerary
# Genera un camino a partir de un texto y una palabra del texto
def crear_camino(nombre_archivo, palabra_inicial, lenguaje='es'):
def crear_camino(nombre_archivo, palabra_inicial, idioma='es'):
trees = load_trees_from_json()
shuffle(trees)
#print("Starting to read text")
(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo, lenguaje)
(palabras_arboles, palabras_camino) = crear_base_datos(nombre_archivo, idioma)
#print("Amount of tree words: ", len(palabras_arboles))

@ -0,0 +1,93 @@
Copyright (c) 2012-2013, The Mozilla Corporation and Telefonica S.A.
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
http://scripts.sil.org/OFL
-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------
PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.
The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.
DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.
"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).
"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).
"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.
"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.
PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:
1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.
2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.
3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.
5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.
TERMINATION
This license becomes null and void if any of the above conditions are
not met.
DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

@ -28,14 +28,39 @@
font-style: italic;
}
@font-face {
font-family: "Fira mono";
src: url({{ STATIC_DIR }}/fira-mono/FiraMono-Regular.ttf) format("truetype");
font-weight: 400;
font-style: normal;
}
@font-face {
font-family: "Fira mono";
src: url({{ STATIC_DIR }}/fira-mono/FiraMono-Medium.ttf) format("truetype");
font-weight: 500;
font-style: normal;
}
@font-face {
font-family: "Fira mono";
src: url({{ STATIC_DIR }}/fira-mono/FiraMono-Bold.ttf) format("truetype");
font-weight: 600;
font-style: bold;
}
@page {
size: 210mm 297mm;
margin: 10mm 20mm 15mm 20mm;
}
@page chain {
margin: 0mm 10mm 15mm 10mm;
}
@page title {
margin: 10mm 10mm 15mm 10mm;
background: green;
margin: 10mm 10mm 15mm 10mm;
}
@page:left {
@ -72,6 +97,9 @@
}
}
#chapter-chain {
page: chain;
}
:root {
--font-size: 10pt;
@ -85,11 +113,6 @@
line-height: var(--line-height);
}
ul {
margin: 0;
padding: 0;
}
h1 {
page: title;
color: white;
@ -105,6 +128,24 @@
font-style: normal;
}
h2 {
font-family: Serreria;
font-size: 18pt;
line-height: 24pt;
}
ul {
margin: 0 0 0 1.1em;
padding: 0;
list-style-type: none;
}
ul li:before {
content: ' ';
position: absolute;
margin-left: -1.1em;
}
section.step {
page-break-before: always;
text-align: center;
@ -114,6 +155,10 @@
padding: 0;
}
section.step.first {
page-break-before: right;
}
section.step_content {
overflow: hidden;
position: absolute;
@ -150,6 +195,11 @@
padding: 0;
}
.traces li:before,
.options li:before {
content: '';
}
.sentence {
z-index: 1;
position: absolute;
@ -162,7 +212,6 @@
/* background: white; */
}
.tree {
z-index: 1;
position: absolute;
@ -182,6 +231,10 @@
/* font-style: italic; */
}
.last-word-previous-step [data-picked] {
text-decoration: none;
}
[data-picked]::after {
content: ' → ';
text-decoration: none;
@ -195,6 +248,90 @@
.traces> :last-child [data-picked]::after {
display: none;
}
.chapter {
page-break-before: right;
}
.chapter#chapter-introduction,
.chapter#chapter-description,
.chapter#chapter-technical-description,
.chapter#chapter-credits {
width: 65%;
}
.pagedjs_right_page .chapter#chapter-introduction,
.pagedjs_right_page .chapter#chapter-description,
.pagedjs_right_page .chapter#chapter-technical-description,
.pagedjs_right_page .chapter#chapter-credits {
margin-left: 30%;
}
.pagedjs_right_page .sources {
float: left;
margin-left: calc(-1 * (40% + 3em));
width: 40%;
}
.pagedjs_left_page .sources {
float: left;
margin-right: calc(-1 * (100% + 3em));
width: 40%;
}
.sources a {
word-break: break-all;
}
a {
text-decoration: underline dotted;
color: currentColor;
}
pre, code, table {
font-family: "Fira mono";
font-size: .8rem;
}
th {
font-weight: normal;
}
h4 {
font-size: 1rem;
}
h4:first-child {
margin-top: 0;
}
.pagedjs_right_page p.images {
margin-left: -25%;
}
.pagedjs_left_page p.images {
margin-right: -25%;
}
img {
display: inline-block;
max-width: 45%;
vertical-align: top;
}
.complete_sentence {
height: 282mm;
text-align: center;
display: flex;
flex-direction: column;
justify-content: center;
page-break-before: right;
page-break-after: left;
padding-left: 15mm;
padding-right: 15mm;
}
</style>
{% if DEBUG %}
<link href="{{ STATIC_DIR }}/pagedjs-interface.css" rel="stylesheet" type="text/css">
@ -205,7 +342,47 @@
<body>
<h1>Paseo por los árboles de Madrid con&nbsp;<em>{{ author }}</em> y&nbsp;{{ title }}</h1>
<section class="chapter">
<section class="chapter" id="chapter-chain">
<h2 id="poema-paseo">Paseo por los árboles de Madrid con&nbsp;<em>{{ author }}</em> y&nbsp;{{ title }}</h2>
{% for sentence, previous_steps, tree, traces, last_word_previous_step in path %}
<section class="step{% if loop.first %} first{% endif %}">
<section class="sentence">
{{ previous_steps }}
</section>
<section class="step_content">
<ul class="traces">
{% if last_word_previous_step %}
<li class="last-word-previous-step">
<ul class="options">
<li data-picked>{{ last_word_previous_step}}</li>
</ul>
</li>
{% endif %}
{% for word, dice, options in traces %}
<li style="margin-top: calc(-{{ dice }} * var(--line-height))">
<ul class="options">
{% for option in options %}
<li {% if loop.index0==dice %}data-picked{% endif %}>
{{ option }}
</li>
{% endfor %}
</ul>
<!-- Rolled: {{ dice }} -->
</li>
{% endfor %}
</ul>
</section>
<section class="tree">
{{ tree.properties.NOMBRE_COMUN }} en {{ tree.properties.MINTDIRECCIONAUX }}
</section>
</section>
{% endfor %}
<section class="complete_sentence">
{{ complete_sentence }}
</section>
</section>
<section class="chapter" id="chapter-introduction">
<h2 id="introducci-n">Introducción</h2>
<p>Paseo por los árboles de Madrid es un libro en la <em>Editorial Algoliteraria: crear alianzas con los
árboles</em>.<br>El autor de este libro es el algoritmo de las cadenas de Markov. Genera simultáneamente
@ -244,45 +421,26 @@
</ul>
</section>
{% for sentence, previous_steps, tree, traces in path %}
<section class="step">
<h2 id="poema-paseo">Poema &amp; Paseo</h2>
<section class="sentence">
{{ previous_steps }}
</section>
<section class="step_content">
<ul class="traces">
{% for word, dice, options in traces %}
<li style="margin-top: calc(-{{ dice }} * var(--line-height))">
<ul class="options">
{% for option in options %}
<li {% if loop.index0==dice %}data-picked{% endif %}>
{{ option }}
</li>
{% endfor %}
</ul>
<!-- Rolled: {{ dice }} -->
</li>
{% endfor %}
</ul>
</section>
<section class="tree">
{{ tree.properties.NOMBRE_COMUN }} en {{ tree.properties.MINTDIRECCIONAUX }}
</section>
</section>
{% endfor %}
<section class="chapter">
<section class="chapter" id="chapter-description">
<h2 id="descripci-n-general-de-las-cadenas-de-markov">Descripción general de las cadenas de Markov</h2>
<section class="sources">
<h4 id="fuentes">Fuentes</h4>
<p><a
<ul>
<li>
<a
href="https://spectrum.ieee.org/andrey-markov-and-claude-shannon-built-the-first-language-generation-models">https://spectrum.ieee.org/andrey-markov-and-claude-shannon-built-the-first-language-generation-models</a>
<a href="http://langvillea.people.cofc.edu/MCapps7.pdf">http://langvillea.people.cofc.edu/MCapps7.pdf</a>
</li>
<li><a href="http://langvillea.people.cofc.edu/MCapps7.pdf">http://langvillea.people.cofc.edu/MCapps7.pdf</a></li>
<li>
<a
href="https://www.irishtimes.com/news/science/that-s-maths-andrey-markov-s-brilliant-ideas-are-still-bearing-fruit-1.3220929">https://www.irishtimes.com/news/science/that-s-maths-andrey-markov-s-brilliant-ideas-are-still-bearing-fruit-1.3220929</a>
</li>
<li>
<a
href="http://www.alpha60.de/research/markov/DavidLink_TracesOfTheMouth_2006.pdf">http://www.alpha60.de/research/markov/DavidLink_TracesOfTheMouth_2006.pdf</a>
</p>
</li>
</ul>
</section>
<h3 id="historias">Historias</h3>
<p>Andrey Andreyevich Markov fue un matemático ruso que vivió entre 1856 y 1922. Sus estudios más famosos fueron
con las cadenas de Markov, un algoritmo que permite predecir los cambios futuros una vez que se conoce el
@ -328,9 +486,10 @@
Aunque Markov hubiera tenido más tiempo y mejor vista para llevar a cabo sus experimentos, las extensiones
habrían sido muy difíciles de completar, dada la época preinformática en la que vivió, en la que los
esfuerzos computacionales debían pagarse en años-hombre. </p>
<p><img src="markov_1.jpeg" alt="">
<img src="markov_2.jpeg" alt=""><br>These images show Markovs original notes in computing the probabilities
needed for his Pushkin chain.
<p class="images"><img src="{{ STATIC_DIR }}/images/markov_1.jpeg" alt="">
<img src="{{ STATIC_DIR }}/images/markov_2.jpeg" alt=""><br>
Estas imágenes muestran las notas originales de Markov al calcular las probabilidades necesarias para su
cadena Pushkin.
</p>
<h3 id="influencia">Influencia</h3>
<p>Algunos de los conceptos centrales de Markov en torno a la probabilidad y el lenguaje se extendieron por el
@ -402,16 +561,25 @@
También aquí las cadenas de Markov han asumido gran parte del trabajo. </p>
</section>
<section class="chapter">
<section class="chapter" id="chapter-technical-description">
<h2 id="descripci-n-t-cnica-de-las-cadenas-de-markov">Descripción técnica de las cadenas de Markov</h2>
<h4 id="sources-">Fuentes:</h4>
<p><a
<section class="sources">
<h4>Fuentes:</h4>
<ul>
<li>
<a
href="https://en.wikipedia.org/wiki/Examples_of_Markov_chains">https://en.wikipedia.org/wiki/Examples_of_Markov_chains</a>
</li>
<li>
<a
href="https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/">https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/</a>
</li>
<li>
<a
href="https://towardsdatascience.com/predicting-the-weather-with-markov-chains-a34735f0c4df">https://towardsdatascience.com/predicting-the-weather-with-markov-chains-a34735f0c4df</a>
</p>
</li>
</ul>
</section>
<p>En un proceso de Markov podemos predecir los cambios futuros una vez que conocemos el estado actual.
Wikipedia describe muy bien la diferencia entre las cadenas de Markov y otros sistemas: &quot;Un juego de
serpientes y escaleras o cualquier otro juego cuyas jugadas se determinan enteramente por los dados es una
@ -751,12 +919,15 @@
</blockquote>
</section>
<section class="chapter">
<section class="chapter" id="chapter-code">
<h2 id="c-digo">Código</h2>
<!-- VOEG CODE TOE-->
{% for path, source in sources %}
<h3>{{ path }}</h3>
<pre>{{ source }}</pre>
{% endfor %}
</section>
<section class="chapter">
<section class="chapter" id="chapter-credits">
<h2 id="cr-ditos">Créditos</h2>
<p>Este libro es una creación de Anaïs Berck para Medialab como parte del programa &quot;Residencia Cultura
Digital&quot; iniciado por el Gobierno Flamenco.<br>En esta obra Anaïs Berck está representadx por:</p>

@ -9,33 +9,58 @@
<style>
@font-face {
font-family: Serreria;
src: url({{ STATIC_DIR }}/MFI-Serreria/MFI-Serreria-Extravagante.otf) format('opentype');
src: url("{{ STATIC_DIR }}/MFI-Serreria/MFI-Serreria-Extravagante.otf") format('opentype');
font-weight: normal;
font-style: normal;
}
@font-face {
font-family: PTSerif;
src: url({{ STATIC_DIR }}/PT_Serif/PTSerif-Regular.ttf) format('truetype');
src: url("{{ STATIC_DIR }}/PT_Serif/PTSerif-Regular.ttf"f) format('truetype');
font-weight: normal;
font-style: normal;
}
@font-face {
font-family: PTSerif;
src: url({{ STATIC_DIR }}/PT_Serif/PTSerif-Italic.ttf) format('truetype');
src: url("{{ STATIC_DIR }}/PT_Serif/PTSerif-Italic.ttf") format('truetype');
font-weight: normal;
font-style: italic;
}
@font-face {
font-family: "Fira mono";
src: url("{{ STATIC_DIR }}/fira-mono/FiraMono-Regular.ttf") format("truetype");
font-weight: 400;
font-style: normal;
}
@font-face {
font-family: "Fira mono";
src: url("{{ STATIC_DIR }}/fira-mono/FiraMono-Medium.ttf") format("truetype");
font-weight: 500;
font-style: normal;
}
@font-face {
font-family: "Fira mono";
src: url("{{ STATIC_DIR }}/fira-mono/FiraMono-Bold.ttf") format("truetype");
font-weight: 600;
font-style: bold;
}
@page {
size: 210mm 297mm;
margin: 10mm 20mm 15mm 20mm;
}
@page chain {
margin: 0mm 10mm 15mm 10mm;
}
@page title {
margin: 10mm 10mm 15mm 10mm;
background: green;
margin: 10mm 10mm 15mm 10mm;
}
@page:left {
@ -72,6 +97,9 @@
}
}
#chapter-chain {
page: chain;
}
:root {
--font-size: 10pt;
@ -85,11 +113,6 @@
line-height: var(--line-height);
}
ul {
margin: 0;
padding: 0;
}
h1 {
page: title;
color: white;
@ -105,6 +128,24 @@
font-style: normal;
}
h2 {
font-family: Serreria;
font-size: 18pt;
line-height: 24pt;
}
ul {
margin: 0 0 0 1.1em;
padding: 0;
list-style-type: none;
}
ul li:before {
content: ' ';
position: absolute;
margin-left: -1.1em;
}
section.step {
page-break-before: always;
text-align: center;
@ -114,6 +155,10 @@
padding: 0;
}
section.step.first {
page-break-before: right;
}
section.step_content {
overflow: hidden;
position: absolute;
@ -150,6 +195,11 @@
padding: 0;
}
.traces li:before,
.options li:before {
content: '';
}
.sentence {
z-index: 1;
position: absolute;
@ -162,7 +212,6 @@
/* background: white; */
}
.tree {
z-index: 1;
position: absolute;
@ -182,6 +231,10 @@
/* font-style: italic; */
}
.last-word-previous-step [data-picked] {
text-decoration: none;
}
[data-picked]::after {
content: ' → ';
text-decoration: none;
@ -195,6 +248,90 @@
.traces> :last-child [data-picked]::after {
display: none;
}
.chapter {
page-break-before: right;
}
.chapter#chapter-introduction,
.chapter#chapter-description,
.chapter#chapter-technical-description,
.chapter#chapter-credits {
width: 65%;
}
.pagedjs_right_page .chapter#chapter-introduction,
.pagedjs_right_page .chapter#chapter-description,
.pagedjs_right_page .chapter#chapter-technical-description,
.pagedjs_right_page .chapter#chapter-credits {
margin-left: 30%;
}
.pagedjs_right_page .sources {
float: left;
margin-left: calc(-1 * (40% + 3em));
width: 40%;
}
.pagedjs_left_page .sources {
float: left;
margin-right: calc(-1 * (100% + 3em));
width: 40%;
}
.sources a {
word-break: break-all;
}
a {
text-decoration: underline dotted;
color: currentColor;
}
pre, code, table {
font-family: "Fira mono";
font-size: .8rem;
}
th {
font-weight: normal;
}
h4 {
font-size: 1rem;
}
h4:first-child {
margin-top: 0;
}
.pagedjs_right_page p.images {
margin-left: -25%;
}
.pagedjs_left_page p.images {
margin-right: -25%;
}
img {
display: inline-block;
max-width: 45%;
vertical-align: top;
}
.complete_sentence {
height: 282mm;
text-align: center;
display: flex;
flex-direction: column;
justify-content: center;
page-break-before: right;
page-break-after: left;
padding-left: 15mm;
padding-right: 15mm;
}
</style>
{% if DEBUG %}
<link href="{{ STATIC_DIR }}/pagedjs-interface.css" rel="stylesheet" type="text/css">
@ -203,32 +340,25 @@
</head>
<body>
<h1>Walk along the trees of Madrid con&nbsp;<em>{{ author }}</em> y&nbsp;{{ title }}</h1>
<h1>Walk along the trees of Madrid with&nbsp;<em>{{ author }}</em> and&nbsp;{{ title }}</h1>
<section class="chapter">
<h2 id="introducci-n">Introduction</h2>
<p>Walk along the trees of Madrid is a book in the <em>An Algoliterary Publishing House: making kin with trees</em>.<br>The author of this book is the Markov chains algorithm. It simultaneously generates a poem and a walk along the trees of the neighbourhood Las Letras in the centre of Madrid.<br>The poem is created from a novel chosen by the reader. The reader has the choice between two novels by great Spanish writers of the 19th century:</p>
<ul>
<li><em>The Swan of Vila Morta</em> by the feminist writer Emilia Pardo Bazán published in 1891. </li>
<li><em>Marianela</em> by the writer Benito Pérez Galdós, published in 1878. </li>
</ul>
<p>The walk is generated from the database with trees in Madrid, <a href="http://www-2.munimadrid.es/DGPVE_WUAUA/welcome.do">Un Alcorque, un Árbol</a>. Each significant word - noun, adjective, verb or adverb - is related to a tree in Madrid&#39;s neighbourhood las Letras. The other words create the path between the different trees. Thus one can walk through the neighbourhood reciting parts of the poem to each tree along the promenade.<br>This book is by definition infinite and unique.<br>It is created by Anaïs Berck. It is a pseudonym that represents a collaboration between humans, algorithms and trees. Anaïs Berck explores the specificities of human intelligence in the company of artificial and plant intelligences.<br>An Algoliterary Publishing is a collection of publications in which algorithms are the authors of unusual books. This book was created as part of a residency at the center for contemporary arts Medialab Prado in Madrid. The residency was granted by the programme &quot;Residency Digital Culture&quot; initiated by the Flemish Government. </p>
<p>In this work Anaïs Berck is represented by:</p>
<ul>
<li>the Markov chains algorithm, of which a description is given in this book,</li>
<li>the trees of Madrid, which are geolocated between Medialab Prado, Plaza del Sol and Atocha Renfe, and present in the database <a href="http://www-2.munimadrid.es/DGPVE_WUAUA/welcome.do">Un Alcorque, un Árbol</a>,</li>
<li>the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.</li>
</ul>
</section>
{% for sentence, previous_steps, tree, traces in path %}
<section class="step">
<h2 id="poema-paseo">Poem &amp; Walk</h2>
<section class="chapter" id="chapter-chain">
<h2 id="poema-paseo">Walk along the trees of Madrid with&nbsp;<em>{{ author }}</em> and&nbsp;{{ title }}</h2>
{% for sentence, previous_steps, tree, traces, last_word_previous_step in path %}
<section class="step{% if loop.first %} first{% endif %}">
<section class="sentence">
{{ previous_steps }}
</section>
<section class="step_content">
<ul class="traces">
{% if last_word_previous_step %}
<li class="last-word-previous-step">
<ul class="options">
<li data-picked>{{ last_word_previous_step}}</li>
</ul>
</li>
{% endif %}
{% for word, dice, options in traces %}
<li style="margin-top: calc(-{{ dice }} * var(--line-height))">
<ul class="options">
@ -244,68 +374,227 @@
</ul>
</section>
<section class="tree">
{{ tree.properties.NOMBRE_COMUN }} en {{ tree.properties.MINTDIRECCIONAUX }}
{{ tree.properties.NOMBRE_COMUN }} on {{ tree.properties.MINTDIRECCIONAUX }}
</section>
</section>
{% endfor %}
<section class="complete_sentence">
{{ complete_sentence }}
</section>
</section>
<section class="chapter" id="chapter-introduction">
<h2 id="introducci-n">Introduction</h2>
<p>Walk along the trees of Madrid is a book in the <em>An Algoliterary Publishing House: making kin with
trees</em>.
<br>The author of this book is the Markov chains algorithm. It simultaneously generates a poem and a
walk along the trees of the neighbourhood Las Letras in the centre of Madrid.<br>The poem is created from a novel
chosen by the reader. The reader has the choice between two novels by great Spanish writers of the 19th century:
</p>
<ul>
<li><em>The Swan of Vila Morta</em> by the feminist writer Emilia Pardo Bazán published in 1891. </li>
<li><em>Marianela</em> by the writer Benito Pérez Galdós, published in 1878. </li>
</ul>
<p>The walk is generated from the database with trees in Madrid, <a
href="http://www-2.munimadrid.es/DGPVE_WUAUA/welcome.do">Un Alcorque, un Árbol</a>. Each significant word -
noun, adjective, verb or adverb - is related to a tree in Madrid&#39;s neighbourhood las Letras. The other words
create the path between the different trees. Thus one can walk through the neighbourhood reciting parts of the
poem to each tree along the promenade.<br>This book is by definition infinite and unique.<br>It is created by
Anaïs Berck. It is a pseudonym that represents a collaboration between humans, algorithms and trees. Anaïs Berck
explores the specificities of human intelligence in the company of artificial and plant intelligences.<br>An
Algoliterary Publishing is a collection of publications in which algorithms are the authors of unusual books. This
book was created as part of a residency at the center for contemporary arts Medialab Prado in Madrid. The
residency was granted by the programme &quot;Residency Digital Culture&quot; initiated by the Flemish Government.
</p>
<p>In this work Anaïs Berck is represented by:</p>
<ul>
<li>the Markov chains algorithm, of which a description is given in this book,</li>
<li>the trees of Madrid, which are geolocated between Medialab Prado, Plaza del Sol and Atocha Renfe, and present
in the database <a href="http://www-2.munimadrid.es/DGPVE_WUAUA/welcome.do">Un Alcorque, un Árbol</a>,</li>
<li>the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina
Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.</li>
</ul>
</section>
<section class="chapter">
<section class="chapter" id="chapter-description">
<h2 id="general-description-of-the-markov-chains">General description of the Markov Chains</h2>
<section class="sources">
<h4 id="sources">Sources</h4>
<p><a
<ul>
<li>
<a
href="https://spectrum.ieee.org/andrey-markov-and-claude-shannon-built-the-first-language-generation-models">https://spectrum.ieee.org/andrey-markov-and-claude-shannon-built-the-first-language-generation-models</a>
<a href="http://langvillea.people.cofc.edu/MCapps7.pdf">http://langvillea.people.cofc.edu/MCapps7.pdf</a>
</li>
<li><a href="http://langvillea.people.cofc.edu/MCapps7.pdf">http://langvillea.people.cofc.edu/MCapps7.pdf</a></li>
<li>
<a
href="https://www.irishtimes.com/news/science/that-s-maths-andrey-markov-s-brilliant-ideas-are-still-bearing-fruit-1.3220929">https://www.irishtimes.com/news/science/that-s-maths-andrey-markov-s-brilliant-ideas-are-still-bearing-fruit-1.3220929</a>
</li>
<li>
<a
href="http://www.alpha60.de/research/markov/DavidLink_TracesOfTheMouth_2006.pdf">http://www.alpha60.de/research/markov/DavidLink_TracesOfTheMouth_2006.pdf</a>
</p>
</li>
</ul>
</section>
<h3 id="historias">Histories</h3>
<p>Andrey Andreyevich Markov was a Russian mathematician who lived between 1856 and 1922. His most famous studies were with Markov chains, an algorithm that allows to predict future changes once one knows the current state . The first paper on the subject was published in 1906. He was also interested in literature. He tried establishing a linguistic mathematical model using Markov Chains by manually counting letters of Alexander Pusjkins verse novel Eugene Onegin. Next, he applied the method to the novel Childhood Years of Bagrov&#39;s Grandson by S.T. Aksakov. This links the Markov Chains directly to the field of literature, text and language. And the link will live firmly throughout the history of this algorithm.<br>The following text is based on Oscar Schwartz&#39; article for IEEE Spectrum, <a href="https://spectrum.ieee.org/andrey-markov-and-claude-shannon-built-the-first-language-generation-models">Andrey Markov &amp; Claude Shannon Counted Letters to Build the First Language-Generation Models</a>.<br>In 1913, Andrey Markov sat down in his study in St. Petersburg with a copy of Alexander Pushkins 19th century verse novel, <a href="https://en.wikipedia.org/wiki/Eugene_Onegin">Eugene Onegin</a>, a literary classic at the time. This work comprises almost 400 stanzas of iambic tetrameter. Markov, however, did not start reading Pushkins famous text. Rather, he took a pen and piece of drafting paper, and wrote out the first 20,000 letters of the book in one long string of letters, eliminating all punctuation and spaces. Then he arranged these letters in 200 grids (10-by-10 characters each) and began counting the vowels in every row and column, tallying the results.<br>In separating the vowels from the consonants, Markov was testing a theory of probability that he had developed in 1906 and that we now call a Markov Process or Markov Chain. Up until that point, the field of probability had been mostly limited to analyzing phenomena like roulette or coin flipping, where the outcome of previous events does not change the probability of current events. But Markov felt that most things happen in chains of causality and are dependent on prior outcomes. He wanted a way of modeling these occurrences through probabilistic analysis.<br>Language, Markov believed, was an example of a system where past occurrences partly determine present outcomes. To demonstrate this, he wanted to show that in a text like Pushkins novel, the chance of a certain letter appearing at some point in the text is dependent, to some extent, on the letter that came before it. </p>
<p>To do so, Markov began counting vowels in Eugene Onegin, and found that 43 percent of letters were vowels and 57 percent were consonants. Then Markov separated the 20,000 letters into pairs of vowels and consonant combinations. He found that there were 1,104 vowel-vowel pairs, 3,827 consonant-consonant pairs, and 15,069 vowel-consonant and consonant-vowel pairs. What this demonstrated, statistically speaking, was that for any given letter in Pushkins text, if it was a vowel, odds were that the next letter would be a consonant, and vice versa. </p>
<p>Markov used this analysis to demonstrate that Pushkins Eugene Onegin wasnt just a random distribution of letters but had some underlying statistical qualities that could be modeled. The enigmatic research paper that came out of this study, entitled <a href="http://cs.petrsu.ru/~olbgvl/greatapp/my_project/example_eng.html">An Example of Statistical Investigation of the Text Eugene Onegin Concerning the Connection of Samples in Chains</a> was not widely cited in Markovs lifetime, and not translated to English until 2006. Markov was forced to stop his letter-counting experiments, when he had nearly completely lost his sight due to glaucoma. Even if Markov had had more time and better eyesight to carry his experiments further, extensions would have been very difficult to complete, given the precomputer era he lived in, when computational efforts had to be paid in man-years. </p>
<p><img src="markov_1.jpeg" alt="">
<img src="markov_2.jpeg" alt=""><br>These images show Markovs original notes in computing the probabilities needed for his Pushkin chain. </p>
<p>Andrey Andreyevich Markov was a Russian mathematician who lived between 1856 and 1922. His most famous studies
were with Markov chains, an algorithm that allows to predict future changes once one knows the current state . The
first paper on the subject was published in 1906. He was also interested in literature. He tried establishing a
linguistic mathematical model using Markov Chains by manually counting letters of Alexander Pusjkins verse novel
Eugene Onegin. Next, he applied the method to the novel Childhood Years of Bagrov&#39;s Grandson by S.T.
Aksakov. This links the Markov Chains directly to the field of literature, text and language. And the link will
live firmly throughout the history of this algorithm.<br>The following text is based on Oscar Schwartz&#39;
article for IEEE Spectrum, <a
href="https://spectrum.ieee.org/andrey-markov-and-claude-shannon-built-the-first-language-generation-models">Andrey
Markov &amp; Claude Shannon Counted Letters to Build the First Language-Generation Models</a>.<br>In 1913,
Andrey Markov sat down in his study in St. Petersburg with a copy of Alexander Pushkins 19th century verse novel,
<a href="https://en.wikipedia.org/wiki/Eugene_Onegin">Eugene Onegin</a>, a literary classic at the time. This work
comprises almost 400 stanzas of iambic tetrameter. Markov, however, did not start reading Pushkins famous text.
Rather, he took a pen and piece of drafting paper, and wrote out the first 20,000 letters of the book in one long
string of letters, eliminating all punctuation and spaces. Then he arranged these letters in 200 grids (10-by-10
characters each) and began counting the vowels in every row and column, tallying the results.<br>In separating the
vowels from the consonants, Markov was testing a theory of probability that he had developed in 1906 and that we
now call a Markov Process or Markov Chain. Up until that point, the field of probability had been mostly limited
to analyzing phenomena like roulette or coin flipping, where the outcome of previous events does not change the
probability of current events. But Markov felt that most things happen in chains of causality and are dependent on
prior outcomes. He wanted a way of modeling these occurrences through probabilistic analysis.<br>Language, Markov
believed, was an example of a system where past occurrences partly determine present outcomes. To demonstrate
this, he wanted to show that in a text like Pushkins novel, the chance of a certain letter appearing at some
point in the text is dependent, to some extent, on the letter that came before it. </p>
<p>To do so, Markov began counting vowels in Eugene Onegin, and found that 43 percent of letters were vowels and 57
percent were consonants. Then Markov separated the 20,000 letters into pairs of vowels and consonant combinations.
He found that there were 1,104 vowel-vowel pairs, 3,827 consonant-consonant pairs, and 15,069 vowel-consonant and
consonant-vowel pairs. What this demonstrated, statistically speaking, was that for any given letter in Pushkins
text, if it was a vowel, odds were that the next letter would be a consonant, and vice versa. </p>
<p>Markov used this analysis to demonstrate that Pushkins Eugene Onegin wasnt just a random distribution of
letters but had some underlying statistical qualities that could be modeled. The enigmatic research paper that
came out of this study, entitled <a href="http://cs.petrsu.ru/~olbgvl/greatapp/my_project/example_eng.html">An
Example of Statistical Investigation of the Text Eugene Onegin Concerning the Connection of Samples in
Chains</a> was not widely cited in Markovs lifetime, and not translated to English until 2006. Markov was
forced to stop his letter-counting experiments, when he had nearly completely lost his sight due to glaucoma. Even
if Markov had had more time and better eyesight to carry his experiments further, extensions would have been very
difficult to complete, given the precomputer era he lived in, when computational efforts had to be paid in
man-years. </p>
<p class="images"><img src="{{ STATIC_DIR }}/images/markov_1.jpeg" alt="">
<img src="{{ STATIC_DIR }}/images/markov_2.jpeg" alt=""><br>These images show Markovs original notes in computing the probabilities
needed for his Pushkin chain.
</p>
<h3 id="influence">Influence</h3>
<p>Some of Markov&#39;s central concepts around probability and language spread across the globe, eventually finding re-articulation in Claude Shannons hugely influential paper, <a href="https://people.math.harvard.edu/~ctm/home/text/others/shannon/entropy/entropy.pdf">A Mathematical Theory of Communication</a> which came out in 1948.<br>Shannons paper outlined a way to precisely measure the quantity of information in a message, and in doing so, set the foundations for a theory of information that would come to define the digital age. Shannon was fascinated by Markovs idea that in a given text, the likelihood of some letter or word appearing could be approximated. Like Markov, Shannon demonstrated this by performing some textual experiments that involved making a statistical model of language, then took a step further by trying to use the model to generate text according to those statistical rules.<br>In an initial control experiment, he started by generating a sentence by picking letters randomly from a 27-symbol alphabet (26 letters, plus a space), and got the following output: </p>
<p>Some of Markov&#39;s central concepts around probability and language spread across the globe, eventually finding
re-articulation in Claude Shannons hugely influential paper, <a
href="https://people.math.harvard.edu/~ctm/home/text/others/shannon/entropy/entropy.pdf">A Mathematical Theory
of Communication</a> which came out in 1948.<br>Shannons paper outlined a way to precisely measure the quantity
of information in a message, and in doing so, set the foundations for a theory of information that would come to
define the digital age. Shannon was fascinated by Markovs idea that in a given text, the likelihood of some
letter or word appearing could be approximated. Like Markov, Shannon demonstrated this by performing some textual
experiments that involved making a statistical model of language, then took a step further by trying to use the
model to generate text according to those statistical rules.<br>In an initial control experiment, he started by
generating a sentence by picking letters randomly from a 27-symbol alphabet (26 letters, plus a space), and got
the following output: </p>
<p>XFOML RXKHRJFFJUJ ZLPWCFWKCYJ FFJEYVKCQSGHYD QPAAMKBZAACIBZLHJQD </p>
<p>The sentence was meaningless noise, Shannon said, because when we communicate we dont choose letters with equal probability. As Markov had shown, consonants are more likely than vowels. But at a greater level of granularity, Es are more common than Ss which are more common than Qs. To account for this, Shannon amended his original alphabet so that it modeled the probability of English more closely—he was 11 percent more likely to draw an E from the alphabet than a Q. When he again drew letters at random from this recalibrated corpus he got a sentence that came a bit closer to English. </p>
<p>The sentence was meaningless noise, Shannon said, because when we communicate we dont choose letters with equal
probability. As Markov had shown, consonants are more likely than vowels. But at a greater level of granularity,
Es are more common than Ss which are more common than Qs. To account for this, Shannon amended his original
alphabet so that it modeled the probability of English more closely—he was 11 percent more likely to draw an E
from the alphabet than a Q. When he again drew letters at random from this recalibrated corpus he got a sentence
that came a bit closer to English. </p>
<p>OCRO HLI RGWR NMIELWIS EU LL NBNESEBYA TH EEI ALHENHTTPA OOBTTVA NAH BRL. </p>
<p>In a series of subsequent experiments, Shannon demonstrated that as you make the statistical model even more complex, you get increasingly more comprehensible results. Shannon, via Markov, revealed a statistical framework for the English language, and showed that by modeling this framework—by analyzing the dependent probabilities of letters and words appearing in combination with each other—he could actually generate language. </p>
<p>The more complex the statistical model of a given text, the more accurate the language generation becomes—or as Shannon put it, the greater “resemblance to ordinary English text.” In the final experiment, Shannon drew from a corpus of words instead of letters and achieved the following: </p>
<p>THE HEAD AND IN FRONTAL ATTACK ON AN ENGLISH WRITER THAT THE CHARACTER OF THIS POINT IS THEREFORE ANOTHER METHOD FOR THE LETTERS THAT THE TIME OF WHO EVER TOLD THE PROBLEM FOR AN UNEXPECTED. </p>
<p>For both Shannon and Markov, the insight that languages statistical properties could be modeled offered a way to re-think broader problems that they were working on. For Markov, it extended the study of stochasticity beyond mutually independent events, paving the way for a new era in probability theory. For Shannon, it helped him formulate a precise way of measuring and encoding units of information in a message, which revolutionized telecommunications and, eventually, digital communication. But their statistical approach to language modeling and generation also ushered in a new era for natural language processing, which has ramified through the digital age to this day. As David Link notes in his article, Traces of the Mouth, Markov&#39;s efforts in retrospect “represent an early and momentous attempt to understand the phenomenon of language in mathematical terms.” It&#39;s not an exaggeration to say that Markov&#39;s analysis of text is in principle similar to what Google and other firms now routinely carry out on a massive scale: analyzing words in books and internet documents, the order in which the words occur, analyzing search phrases, detecting spam and so on. </p>
<p>In a series of subsequent experiments, Shannon demonstrated that as you make the statistical model even more
complex, you get increasingly more comprehensible results. Shannon, via Markov, revealed a statistical framework
for the English language, and showed that by modeling this framework—by analyzing the dependent probabilities of
letters and words appearing in combination with each other—he could actually generate language. </p>
<p>The more complex the statistical model of a given text, the more accurate the language generation becomes—or as
Shannon put it, the greater “resemblance to ordinary English text.” In the final experiment, Shannon drew from a
corpus of words instead of letters and achieved the following: </p>
<p>THE HEAD AND IN FRONTAL ATTACK ON AN ENGLISH WRITER THAT THE CHARACTER OF THIS POINT IS THEREFORE ANOTHER METHOD
FOR THE LETTERS THAT THE TIME OF WHO EVER TOLD THE PROBLEM FOR AN UNEXPECTED. </p>
<p>For both Shannon and Markov, the insight that languages statistical properties could be modeled offered a way to
re-think broader problems that they were working on. For Markov, it extended the study of stochasticity beyond
mutually independent events, paving the way for a new era in probability theory. For Shannon, it helped him
formulate a precise way of measuring and encoding units of information in a message, which revolutionized
telecommunications and, eventually, digital communication. But their statistical approach to language modeling and
generation also ushered in a new era for natural language processing, which has ramified through the digital age
to this day. As David Link notes in his article, Traces of the Mouth, Markov&#39;s efforts in retrospect
“represent an early and momentous attempt to understand the phenomenon of language in mathematical terms.”
It&#39;s not an exaggeration to say that Markov&#39;s analysis of text is in principle similar to what Google and
other firms now routinely carry out on a massive scale: analyzing words in books and internet documents, the order
in which the words occur, analyzing search phrases, detecting spam and so on. </p>
<h3 id="applications">Applications</h3>
<p>Since Markov chains can be designed to model many real-world processes, they are used in a wide variety of situations. They appear in physics and chemistry when probabilities are used for unknown quantities. In information processing, they have a role in pattern recognition, automatic speech analysis and synthesis and data compression. They are used by meteorologists, ecologists and biologists. Other applications include the control of driverless cars, machine translation, queuing patterns, and prediction of population growth, asset prices, currency exchange rates and market upheavals. Also artists have used Markov chains, such as musician Iannis Xenakis who developed “Free Stochastic Music” based on Markov chains. </p>
<p>In 2006 the 100th anniversary of Markov&#39;s paper Philipp Von Hilgers and Amy Langville summarized the <a href="http://langvillea.people.cofc.edu/MCapps7.pdf">five greatest applications of Markov chains</a>. This includes the one that is used by most of us on a daily basis, Google&#39;s Page Rank. Every time we search on the internet, the ranking of webpages is based on the solution to massive Markov chain. You can say that all the web pages are states, and the links between them are transitions possessing specific probabilities. In other words, we can say that no matter what youre searching on Google, theres a finite probability of you ending up on a particular web page. If you use Gmail, you mustve noticed their Auto-fill feature. This feature automatically predicts your sentences to help you write emails quickly.<br>And last but not least, have you ever wondered why spam has all those hilarious nonsensical strings of words in it? Theyre pretty odd constructions, not as random as if you picked words randomly out of a hat, almost grammatical much of the time, but still clearly gibberish. Also here the Markov chains have taken on a lot of the work. </p>
<p>Since Markov chains can be designed to model many real-world processes, they are used in a wide variety of
situations. They appear in physics and chemistry when probabilities are used for unknown quantities. In
information processing, they have a role in pattern recognition, automatic speech analysis and synthesis and data
compression. They are used by meteorologists, ecologists and biologists. Other applications include the control of
driverless cars, machine translation, queuing patterns, and prediction of population growth, asset prices,
currency exchange rates and market upheavals. Also artists have used Markov chains, such as musician Iannis
Xenakis who developed “Free Stochastic Music” based on Markov chains. </p>
<p>In 2006 the 100th anniversary of Markov&#39;s paper Philipp Von Hilgers and Amy Langville summarized the <a
href="http://langvillea.people.cofc.edu/MCapps7.pdf">five greatest applications of Markov chains</a>. This
includes the one that is used by most of us on a daily basis, Google&#39;s Page Rank. Every time we search on the
internet, the ranking of webpages is based on the solution to massive Markov chain. You can say that all the web
pages are states, and the links between them are transitions possessing specific probabilities. In other words, we
can say that no matter what youre searching on Google, theres a finite probability of you ending up on a
particular web page. If you use Gmail, you mustve noticed their Auto-fill feature. This feature automatically
predicts your sentences to help you write emails quickly.<br>And last but not least, have you ever wondered why
spam has all those hilarious nonsensical strings of words in it? Theyre pretty odd constructions, not as random
as if you picked words randomly out of a hat, almost grammatical much of the time, but still clearly gibberish.
Also here the Markov chains have taken on a lot of the work. </p>
</section>
<section class="chapter">
<section class="chapter" id="chapter-technical-description">
<h2 id="technical-description-of-the-markov-chains">Technical description of the Markov Chain</h2>
<h4 id="sources-">Sources</h4>
<p><a
<aside class="sources">
<h4>Sources</h4>
<ul>
<li>
<a
href="https://en.wikipedia.org/wiki/Examples_of_Markov_chains">https://en.wikipedia.org/wiki/Examples_of_Markov_chains</a>
</li>
<li>
<a
href="https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/">https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/</a>
</li>
<li>
<a
href="https://towardsdatascience.com/predicting-the-weather-with-markov-chains-a34735f0c4df">https://towardsdatascience.com/predicting-the-weather-with-markov-chains-a34735f0c4df</a>
</p>
<p>In a Markov process we can predict future changes once we know the current state. Wikipedia gives a very good description of the difference between Markov chains and other systems: &#39;A game of snakes and ladders or any other game whose moves are determined entirely by dice is a Markov chain, indeed, an absorbing Markov chain. This is in contrast to card games such as blackjack, where the cards represent a &#39;memory&#39; of the past moves. To see the difference, consider the probability for a certain event in the game. In the above-mentioned dice games, the only thing that matters is the current state of the board. The next state of the board depends on the current state, and the next roll of the dice. It doesn&#39;t depend on how things got to their current state. In a game such as blackjack, a player can gain an advantage by remembering which cards have already been shown (and hence which cards are no longer in the deck), so the next state (or hand) of the game is not independent of the past states.&#39;<br>So, for a Markov process, only the current state determines the next state; the history of the system has no impact. For that reason we describe a Markov process as memoryless. What happens next is determined completely by the current state and the transition probabilities. </p>
<p>In what follows, we describe a classic working of the Markov chains, next to a simplified version we used to develop a Markov game and the code for this book. </p>
</li>
</ul>
</aside>
<p>In a Markov process we can predict future changes once we know the current state. Wikipedia gives a very good
description of the difference between Markov chains and other systems: &#39;A game of snakes and ladders or any
other game whose moves are determined entirely by dice is a Markov chain, indeed, an absorbing Markov chain. This
is in contrast to card games such as blackjack, where the cards represent a &#39;memory&#39; of the past moves. To
see the difference, consider the probability for a certain event in the game. In the above-mentioned dice games,
the only thing that matters is the current state of the board. The next state of the board depends on the current
state, and the next roll of the dice. It doesn&#39;t depend on how things got to their current state. In a game
such as blackjack, a player can gain an advantage by remembering which cards have already been shown (and hence
which cards are no longer in the deck), so the next state (or hand) of the game is not independent of the past
states.&#39;<br>So, for a Markov process, only the current state determines the next state; the history of the
system has no impact. For that reason we describe a Markov process as memoryless. What happens next is determined
completely by the current state and the transition probabilities. </p>
<p>In what follows, we describe a classic working of the Markov chains, next to a simplified version we used to
develop a Markov game and the code for this book. </p>
<h3 id="classic-version">Classic version</h3>
<p>This example is taken from the following source: <a href="https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/">https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/</a></p>
<p>You take a piece of “training” text.<br>You make a list of all the words in it.<br>For each word, make a list of all the other words that come after it, with the number of times each word appears. So with the sentence: “the quick brown fox jumped over the lazy dog”, you would end up with the list: </p>
<p>This example is taken from the following source: <a
href="https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/">https://higherkindedtripe.wordpress.com/2012/02/26/markov-chains-or-daddy-where-does-spam-come-from/</a>
</p>
<p>You take a piece of “training” text.<br>You make a list of all the words in it.<br>For each word, make a list of
all the other words that come after it, with the number of times each word appears. So with the sentence: “the
quick brown fox jumped over the lazy dog”, you would end up with the list: </p>
<ol>
<li>the -&gt; (1, quick), (1, lazy) </li>
<li>quick -&gt; (1, brown)</li>
<li>brown -&gt; (1, fox)</li>
<li>fox -&gt; (1, jumped)</li>
<li>jumped -&gt; (1, over)</li>
<li>over -&gt; (1, the)</li>
<li>lazy -&gt; (1, dog)</li>
<li>dog -&gt;</li>
<li>the → (1, quick), (1, lazy) </li>
<li>quick → (1, brown)</li>
<li>brown → (1, fox)</li>
<li>fox → (1, jumped)</li>
<li>jumped → (1, over)</li>
<li>over → (1, the)</li>
<li>lazy → (1, dog)</li>
<li>dog →</li>
</ol>
<p>Turn the list into a matrix, where the rows represent the “leading” words and the columns represent “following” words, and each number in the matrix says how many times the following word appeared after the leading word. You will get: </p>
<p>Turn the list into a matrix, where the rows represent the “leading” words and the columns represent “following”
words, and each number in the matrix says how many times the following word appeared after the leading word. You
will get: </p>
<table>
<thead>
<tr>
@ -411,7 +700,8 @@
</tr>
</tbody>
</table>
<p>Divide every number in the matrix by the total of its row, and youll notice that each row becomes a sort of probability distribution. </p>
<p>Divide every number in the matrix by the total of its row, and youll notice that each row becomes a sort of
probability distribution. </p>
<table>
<thead>
<tr>
@ -517,61 +807,111 @@
</tr>
</tbody>
</table>
<p>You can interpret this as saying “if the first word is a the theres a 50% chance the next word is quick, and a 50% chance the next word is lazy. For all the other words, there is only one possible word following it.”<br>Almost every word has only one possible following word because the text is so short. But, if you train it with a larger text, and interpret the rows as a probability distribution, you can start to see for every word what sort of word tends to follow it. This gives a very interesting insight into the nature of written text.<br>If you take that big “transition matrix” youve trained from a large text, you can use it to actually generate new text in the following way: </p>
<p>You can interpret this as saying “if the first word is a the theres a 50% chance the next word is quick, and
a 50% chance the next word is lazy. For all the other words, there is only one possible word following
it.”<br>Almost every word has only one possible following word because the text is so short. But, if you train it
with a larger text, and interpret the rows as a probability distribution, you can start to see for every word what
sort of word tends to follow it. This gives a very interesting insight into the nature of written text.<br>If you
take that big “transition matrix” youve trained from a large text, you can use it to actually generate new text
in the following way: </p>
<ol>
<li><p>Pick a “seed” word from the text at random. For best results use one with many possible following words.</p>
<li>
<p>Pick a “seed” word from the text at random. For best results use one with many possible following words.</p>
</li>
<li><p>Find the row in the matrix corresponding to that word. Choose the next word at random, weighted according to the probabilities in the row. That is, if the column corresponding to the word “blue” has the number .05 in it, you have a 5% chance of picking “blue” as the next word, and so on (when we divided each number by the total of its row we made sure that these probabilities would add up to 1).</p>
<li>
<p>Find the row in the matrix corresponding to that word. Choose the next word at random, weighted according to
the probabilities in the row. That is, if the column corresponding to the word “blue” has the number .05 in
it, you have a 5% chance of picking “blue” as the next word, and so on (when we divided each number by the
total of its row we made sure that these probabilities would add up to 1).</p>
</li>
<li><p>Go back to step 2 using this second word as the new “seed” word. Continue this process to generate as long a string of words as you want. If you end up with a word for which no other words follow it (uncommon when you train on a large test, but possible imagine if the last word of a novel was the only occurrence of the word “xylophone”, or whatever), just pick a random word.</p>
<li>
<p>Go back to step 2 using this second word as the new “seed” word. Continue this process to generate as long a
string of words as you want. If you end up with a word for which no other words follow it (uncommon when you
train on a large test, but possible imagine if the last word of a novel was the only occurrence of the word
“xylophone”, or whatever), just pick a random word.</p>
</li>
</ol>
<p>You can see how strings of words generated with this method will follow the “trends” of the training data, meaning that if you were to generate a new transition matrix from the generated words it would, on average, look the same as the original transition matrix since you picked the words according to those weights. This completely mechanical process can generate data which looks, statistically, like meaningful English. Of course, it is not necessarily grammatical, and is certainly devoid of higher meaning since it was generated through this simplistic process. </p>
<p>Those “chains” of words constructed by the above process are an example of Markov chains. And they are also the answer to the question “where does spam come from?”. Those uncannily-almost-grammatical ramblings below the “Viagra” ads, generated through the above process, are the spam-creators way of fooling your spam filter. They include these chains to give their advertisements statistical similarity to meaningful human correspondence. This works because the spam filters are (at least in part) using probabilistic models that depend on word-transitions and word frequencies to classify incoming email as spam. The spammers and the filter-writers are engaged in an eternal game of randomly-generated cat-and-mouse. </p>
<p>You can see how strings of words generated with this method will follow the “trends” of the training data,
meaning that if you were to generate a new transition matrix from the generated words it would, on average, look
the same as the original transition matrix since you picked the words according to those weights. This completely
mechanical process can generate data which looks, statistically, like meaningful English. Of course, it is not
necessarily grammatical, and is certainly devoid of higher meaning since it was generated through this simplistic
process. </p>
<p>Those “chains” of words constructed by the above process are an example of Markov chains. And they are also the
answer to the question “where does spam come from?”. Those uncannily-almost-grammatical ramblings below the
“Viagra” ads, generated through the above process, are the spam-creators way of fooling your spam filter. They
include these chains to give their advertisements statistical similarity to meaningful human correspondence. This
works because the spam filters are (at least in part) using probabilistic models that depend on word-transitions
and word frequencies to classify incoming email as spam. The spammers and the filter-writers are engaged in an
eternal game of randomly-generated cat-and-mouse. </p>
<h3 id="simplified-version">Simplified version</h3>
<p>With <a href="https://algolit.net">Algolit</a>, an artistic research group on libre code and text based in Brussels, we developed a Markov Chain game with sentences and cards. This happened as part of the festival Désert Numérique, in La Drôme in France in 2014. The game was developed by Brendan Howell, Catherine Lenoble and An Mertens. You can listen back to the radio show: <a href="http://desert.numerique.free.fr//archives/?id=1011&amp;ln=fr">http://desert.numerique.free.fr//archives/?id=1011&amp;ln=fr</a>.<br>Next, the game was presented at Transmediale in Berlin in 2015, respecting the following rules. </p>
<p>With <a href="https://algolit.net">Algolit</a>, an artistic research group on libre code and text based in
Brussels, we developed a Markov Chain game with sentences and cards. This happened as part of the festival Désert
Numérique, in La Drôme in France in 2014. The game was developed by Brendan Howell, Catherine Lenoble and An
Mertens. You can listen back to the radio show: <a
href="http://desert.numerique.free.fr//archives/?id=1011&amp;ln=fr">http://desert.numerique.free.fr//archives/?id=1011&amp;ln=fr</a>.<br>Next,
the game was presented at Transmediale in Berlin in 2015, respecting the following rules. </p>
<ol>
<li><p>We take a text, for example:</p>
<li>
<p>We take a text, for example:</p>
<blockquote>
<p>Cqrrelations read as poetry to statisticians. Can statisticians read poetry with machines?Cqrrelations is a practise for artists, for datatravellers, statisticians and other lovers of machines to explore a world of blurry categorisations and crummylations. Machines correlate to dissidents, dissidents correlate to statisticians.</p>
<p>Cqrrelations read as poetry to statisticians. Can statisticians read poetry with machines?Cqrrelations is a
practise for artists, for datatravellers, statisticians and other lovers of machines to explore a world of
blurry categorisations and crummylations. Machines correlate to dissidents, dissidents correlate to
statisticians.</p>
</blockquote>
</li>
<li><p>We create a database for this text; each word is an entry and takes the following word as a possible value. The entry for Cqrrelations will have two values:</p>
<li>
<p>We create a database for this text; each word is an entry and takes the following word as a possible value.
The entry for Cqrrelations will have two values:</p>
<ol>
<li>read</li>
<li>is </li>
</ol>
</li>
<li><p>Once the database is created, we choose a starting word for a new text, for ex. Cqrrelations.</p>
<li>
<p>Once the database is created, we choose a starting word for a new text, for ex. Cqrrelations.</p>
</li>
<li>We roll the dice, odd numbers will give read as the 2nd word of our text; even numbers will give is as the 2nd word.</li>
<li>We roll the dice again, and choose a word amongst the values of the chosen word. This gives the next word of our sentence.</li>
<li>We roll the dice, odd numbers will give read as the 2nd word of our text; even numbers will give is as the
2nd word.</li>
<li>We roll the dice again, and choose a word amongst the values of the chosen word. This gives the next word of
our sentence.</li>
<li>We continue 5 till we arrive at a word with a period (.)</li>
<li>We can repeat rule 3 till 6 until we are satisfied with the amount of generated sentences</li>
</ol>
<p>Based on the input text the output at Transmediale was: </p>
<blockquote>
<p>A world of blurry categorisations and other lovers of blurry categorisations and other lovers of blurry categorisations and other lovers of machines. Cqrrelations read poetry to dissidents correlate to machines. Lovers of machines to statisticians.</p>
<p>A world of blurry categorisations and other lovers of blurry categorisations and other lovers of blurry
categorisations and other lovers of machines. Cqrrelations read poetry to dissidents correlate to machines.
Lovers of machines to statisticians.</p>
</blockquote>
</section>
<section class="chapter">
<section class="chapter" id="chapter-code">
<h2 id="code">Code of the book</h2>
<!-- VOEG CODE TOE-->
{% for path, source in sources %}
<h3>{{ path }}</h3>
<pre>{{ source }}</pre>
{% endfor %}
</section>
<section class="chapter">
<section class="chapter" id="chapter-credits">
<h2 id="credits">Credits</h2>
<p>This book is a creation of Anaïs Berck for Medialab as part of the programme &quot;Residency Digital Cultur&quot; initiated by the Flemish Government.
<p>This book is a creation of Anaïs Berck for Medialab as part of the programme &quot;Residency Digital Cultur&quot;
initiated by the Flemish Government.
In this work Anaïs Berck is represented by:</p>
<ul>
<li>the Markov chains algorithm, of which a description is given in this book,</li>
<li>the trees of Madrid, which are geolocated between Medialab Prado, Plaza del Sol and Atocha Renfe, and present in the database <a href="http://www-2.munimadrid.es/DGPVE_WUAUA/welcome.do">Un Alcorque, un Árbol</a>,</li>
<li>the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.</li>
<li>the trees of Madrid, which are geolocated between Medialab Prado, Plaza del Sol and Atocha Renfe, and present
in the database <a href="http://www-2.munimadrid.es/DGPVE_WUAUA/welcome.do">Un Alcorque, un Árbol</a>,</li>
<li>the human beings Emilia Pardo Bazán, Benito Pérez Gáldos, Jaime Munárriz, Luis Morell, An Mertens, Eva Marina
Gracia, Gijs de Heij, Ana Isabel Garrido Mártinez, Alfredo Calosci, Daniel Arribas Hedo.</li>
</ul>
<p>The copy of this book is unique and the print run is by definition infinite.<br>
This copy is the {{ edition_count }} number of copies downloaded. </p>
<p>Collective terms of (re)use (CC4r), 2021<br>Copyleft with a difference: You are invited to copy, distribute, and modify this work under the terms of the work under the terms of the <a href="https://gitlab.constantvzw.org/unbound/cc4r">CC4r</a>.</p>
<p>Collective terms of (re)use (CC4r), 2021<br>Copyleft with a difference: You are invited to copy, distribute, and
modify this work under the terms of the work under the terms of the <a
href="https://gitlab.constantvzw.org/unbound/cc4r">CC4r</a>.</p>
</section>
</body>

@ -68,6 +68,15 @@
font-size: 34pt;
line-height: 45pt;
margin-top: 0;
margin-bottom: 0;
}
a {
color: currentColor;
}
a:hover {
text-decoration: none;
}
p {
@ -157,7 +166,10 @@
</style>
</head>
<body>
<section id="title">
<h1 id="title">Paseo por los árboles de Madrid</h1>
<a href="{{ BASEURL }}/en">en</a>
</section>
<section id="introduction">
<p>En este libro, el algoritmo de las cadenas de Markov genera simultáneamente un poema y un paseo por los árboles del barrio de Las Letras, en el centro de Madrid. A pesar de la impresión de que hay pocos árboles en el barrio, el algoritmo cuenta con 460 de ellos. </p>
<p>La cadena de Markov fue diseñada en 1906 por Andrey Markov, un matemático ruso fallecido en 1992. Este algoritmo está en la base de muchos programas informáticos que generan spam. Se utiliza para sistemas que describen una serie de eventos que son interdependientes. Lo que ocurre depende únicamente del paso anterior.</p>

@ -68,6 +68,15 @@
font-size: 34pt;
line-height: 45pt;
margin-top: 0;
margin-bottom: 0;
}
a {
color: currentColor;
}
a:hover {
text-decoration: none;
}
p {
@ -157,7 +166,10 @@
</style>
</head>
<body>
<h1 id="title">Walk along the trees of Madrid</h1>
<div id="title">
<h1>Walk along the trees of Madrid</h1>
<a href="{{ BASEURL }}/es">es</a>
</div>
<section id="introduction">
<p>In this book, the Markov chain algorithm simultaneously generates a poem and a walk along the trees of the neighbourhood Las Letras in the centre of Madrid. Despite the impression that there are few trees in the neighbourhood, the algorithm counts 460 of them.</p>
<p>The Markov chain was designed in 1906 by Andrey Markov, a Russian mathematician who died in 1992. This algorithm is at the basis of many computer programs that generate spam. It is used for systems that describe a series of events that are interdependent. What happens depends only on the previous step.</p>

Loading…
Cancel
Save