new algolit session to see how summarization reacts to different translations
parent
e69058624e
commit
4a4ed042ac
@ -0,0 +1,93 @@
|
|||||||
|
from jinja2 import Template
|
||||||
|
from markdown import markdown
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# appending a path
|
||||||
|
sys.path.append('../')
|
||||||
|
|
||||||
|
# importing customised module
|
||||||
|
import summa.edits
|
||||||
|
from summa.edits import scored_sentences
|
||||||
|
|
||||||
|
import wikipage
|
||||||
|
from wikipage.page import get_wikipage, is_header
|
||||||
|
|
||||||
|
# variables
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
languages = ['en','fr']
|
||||||
|
page_requests = ['river', 'rivière']
|
||||||
|
|
||||||
|
TEMPLATE_PATH = 'template.html'
|
||||||
|
HTML_PATH = 'www/index.html'
|
||||||
|
|
||||||
|
|
||||||
|
# utilities
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def page_request(request, lang):
|
||||||
|
|
||||||
|
# get text from wikipedia
|
||||||
|
print('--- WIKI ---')
|
||||||
|
page = get_wikipage(request, lang)
|
||||||
|
if not page:
|
||||||
|
sys.exit("--- STOP ---")
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
# main
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
processed_pages = []
|
||||||
|
|
||||||
|
for i in range(len(languages)):
|
||||||
|
|
||||||
|
# --- WIKI REQUEST ---
|
||||||
|
lang = languages[i]
|
||||||
|
request = page_requests[i]
|
||||||
|
page = page_request(request, lang)
|
||||||
|
|
||||||
|
print("got " + page.title)
|
||||||
|
|
||||||
|
# add the lang
|
||||||
|
page.lang = lang
|
||||||
|
|
||||||
|
# --- APPLY TEXTRANK ---
|
||||||
|
sentences = scored_sentences(page.content)
|
||||||
|
sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
|
||||||
|
page.sentences = sorted_sentences
|
||||||
|
|
||||||
|
# for s in sorted_sentences[:5]:
|
||||||
|
# print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text))
|
||||||
|
|
||||||
|
# remove header
|
||||||
|
# page.content =
|
||||||
|
|
||||||
|
processed_pages.append(page)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# print('--- SENTENCES ---')
|
||||||
|
# for s in sorted_sentence:
|
||||||
|
# print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text))
|
||||||
|
|
||||||
|
|
||||||
|
# -- TEMPLATING ---
|
||||||
|
|
||||||
|
# getting the template
|
||||||
|
with open(TEMPLATE_PATH, 'r') as file:
|
||||||
|
template = Template(file.read())
|
||||||
|
# render template
|
||||||
|
html = template.render(pages = processed_pages)
|
||||||
|
with open(HTML_PATH, 'w') as file:
|
||||||
|
file.write(html)
|
@ -0,0 +1,42 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html lang="fr">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
|
||||||
|
<title>TextRank Opacity</title>
|
||||||
|
<meta name="description" content="a call for 2 desks in studio 5 of the Meyboom artist-run spaces">
|
||||||
|
|
||||||
|
<link rel="stylesheet" type="text/css" href="css/main.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="css/typography.css" />
|
||||||
|
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
|
||||||
|
{% for page in pages %}
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<h1>{{ page.title|safe }} ({{ page.lang|safe }})</h1>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
{% for s in page.sentences[:5] %}
|
||||||
|
<li><code>[{{ s.score|safe }}]</code> {{ s.text|safe }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
</main>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
@ -0,0 +1 @@
|
|||||||
|
A warehouse is a building for storing goods. Warehouses are used by manufacturers, importers, exporters, wholesalers, transport businesses, customs, etc. They are usually large plain buildings in industrial parks on the outskirts of cities, towns, or villages. They usually have loading docks to load and unload goods from trucks. Sometimes warehouses are designed for the loading and unloading of goods directly from railways, airports, or seaports. They often have cranes and forklifts for moving goods, which are usually placed on ISO standard pallets and then loaded into pallet racks. Stored goods can include any raw materials, packing materials, spare parts, components, or finished goods associated with agriculture, manufacturing, and production. In India and Hong Kong, a warehouse may be referred to as a "godown". There are also godowns in the Shanghai Bund.
|
@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
:root{
|
||||||
|
--lh: 1.35rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
body{
|
||||||
|
margin: var(--lh);
|
||||||
|
line-height: var(--lh);
|
||||||
|
}
|
||||||
|
|
||||||
|
@media print{
|
||||||
|
body{
|
||||||
|
margin: 0;
|
||||||
|
font-size: 10pt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main{
|
||||||
|
max-width: 42rem;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* h1,h2,h3,h4,h5,h6{
|
||||||
|
line-height: var(--lh);
|
||||||
|
} */
|
||||||
|
|
||||||
|
h1{
|
||||||
|
text-align: center;
|
||||||
|
margin: calc(2 * var(--lh)) 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2,h3,h4,h5,h6{
|
||||||
|
margin: calc(3 * var(--lh)) 0 var(--lh);
|
||||||
|
}
|
||||||
|
|
||||||
|
:is(h1,h2,h3,h4,h5,h6) + :is(h1,h2,h3,h4,h5,h6){
|
||||||
|
margin-top: var(--lh);
|
||||||
|
}
|
@ -0,0 +1,71 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html lang="fr">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
|
||||||
|
<title>TextRank Opacity</title>
|
||||||
|
<meta name="description" content="a call for 2 desks in studio 5 of the Meyboom artist-run spaces">
|
||||||
|
|
||||||
|
<link rel="stylesheet" type="text/css" href="css/main.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="css/typography.css" />
|
||||||
|
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<h1>River (en)</h1>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
|
||||||
|
<li><code>[0.1145916799619322]</code> Levees and flood-banks can also increase flooding upstream because of the back-water pressure as the river flow is impeded by the narrow channel banks.</li>
|
||||||
|
|
||||||
|
<li><code>[0.11086427143320547]</code> Straightening rivers allows water to flow more rapidly downstream, increasing the risk of flooding places further downstream.</li>
|
||||||
|
|
||||||
|
<li><code>[0.10725363112211873]</code> The water in a river is usually confined to a channel, made up of a stream bed between banks.</li>
|
||||||
|
|
||||||
|
<li><code>[0.10657133955295318]</code> A river flowing in its channel is a source of energy that acts on the river channel to change its shape and form.</li>
|
||||||
|
|
||||||
|
<li><code>[0.10590942715308657]</code> Most but not all rivers flow on the surface.</li>
|
||||||
|
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<h1>Rivière (fr)</h1>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
|
||||||
|
<li><code>[0.22940504189612096]</code> En effet, il n'est pas rare de rencontrer des fleuves qui sont plus « petits » que certaines rivières (à titre d'exemples : la Saône à Lyon, qui présente un lit et un débit importants, reste une rivière.</li>
|
||||||
|
|
||||||
|
<li><code>[0.21916558158176408]</code> l'importance du méandrage est également liée à la quantité de matières solides transportées par la rivière : plus elles sont de grosse taille et nombreuses, plus elles ralentissent le débit de l'eau, même en pente forte supérieure à 3 % ; elles ont alors pour conséquence un méandrage moindre.</li>
|
||||||
|
|
||||||
|
<li><code>[0.21060618367643322]</code> Pour l'écologie du paysage, les rivières (et leurs berges et milieux associés) jouent un rôle majeur de corridor biologique, que la loi (Lois Grenelle) demande de ne pas artificiellement fragmenter sans mesure compensatoire efficaces permettant aux espèces de circuler le plus normalement dans tout le cours d'eau.</li>
|
||||||
|
|
||||||
|
<li><code>[0.20784897960944404]</code> si le lit est non forcé dans la roche, la rivière (alors classifiée « libre ») adoptera son trajet selon un critère capital : la pente du lit et de l'eau par rapport à la valeur critique 3 %[réf.</li>
|
||||||
|
|
||||||
|
<li><code>[0.2021124917687332]</code> En hydrographie, une rivière est un cours d'eau au débit moyen à modéré (supérieur à 2 m3/s), recevant des affluents et qui se jette dans une autre rivière ou dans un fleuve.</li>
|
||||||
|
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</main>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
Loading…
Reference in New Issue