From 4a4ed042ac5c852d00b9323c7af05921ff081cd1 Mon Sep 17 00:00:00 2001 From: Dorian Date: Fri, 24 Feb 2023 15:23:51 +0100 Subject: [PATCH] new algolit session to see how summarization reacts to different translations --- exp.translations/make.py | 93 ++++++++++++++++++++++++++++ exp.translations/template.html | 42 +++++++++++++ exp.translations/texts/warehouse.txt | 1 + exp.translations/www/css/main.css | 38 ++++++++++++ exp.translations/www/index.html | 71 +++++++++++++++++++++ 5 files changed, 245 insertions(+) create mode 100644 exp.translations/make.py create mode 100644 exp.translations/template.html create mode 100644 exp.translations/texts/warehouse.txt create mode 100644 exp.translations/www/css/main.css create mode 100644 exp.translations/www/index.html diff --git a/exp.translations/make.py b/exp.translations/make.py new file mode 100644 index 0000000..e2b0f7c --- /dev/null +++ b/exp.translations/make.py @@ -0,0 +1,93 @@ +from jinja2 import Template +from markdown import markdown +import sys + +# appending a path +sys.path.append('../') + +# importing customised module +import summa.edits +from summa.edits import scored_sentences + +import wikipage +from wikipage.page import get_wikipage, is_header + +# variables +# ------------------------------------------------------------------------ + + +languages = ['en','fr'] +page_requests = ['river', 'rivière'] + +TEMPLATE_PATH = 'template.html' +HTML_PATH = 'www/index.html' + + +# utilities +# ------------------------------------------------------------------------ + + +def page_request(request, lang): + + # get text from wikipedia + print('--- WIKI ---') + page = get_wikipage(request, lang) + if not page: + sys.exit("--- STOP ---") + + return page + + +# main +# ------------------------------------------------------------------------ + +if __name__ == '__main__': + + + + processed_pages = [] + + for i in range(len(languages)): + + # --- WIKI REQUEST --- + lang = languages[i] + request = page_requests[i] + page = page_request(request, lang) + + print("got " + page.title) + + # add the lang + page.lang = lang + + # --- APPLY TEXTRANK --- + sentences = scored_sentences(page.content) + sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True) + page.sentences = sorted_sentences + + # for s in sorted_sentences[:5]: + # print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text)) + + # remove header + # page.content = + + processed_pages.append(page) + + + + + + + # print('--- SENTENCES ---') + # for s in sorted_sentence: + # print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text)) + + + # -- TEMPLATING --- + + # getting the template + with open(TEMPLATE_PATH, 'r') as file: + template = Template(file.read()) + # render template + html = template.render(pages = processed_pages) + with open(HTML_PATH, 'w') as file: + file.write(html) diff --git a/exp.translations/template.html b/exp.translations/template.html new file mode 100644 index 0000000..0e72aa5 --- /dev/null +++ b/exp.translations/template.html @@ -0,0 +1,42 @@ + + + + + + + + + TextRank Opacity + + + + + + + + + + + +
+ + {% for page in pages %} + +
+

{{ page.title|safe }} ({{ page.lang|safe }})

+ +
    + {% for s in page.sentences[:5] %} +
  1. [{{ s.score|safe }}] {{ s.text|safe }}
  2. + {% endfor %} +
+ +
+ + {% endfor %} + +
+ + + + diff --git a/exp.translations/texts/warehouse.txt b/exp.translations/texts/warehouse.txt new file mode 100644 index 0000000..b16a42e --- /dev/null +++ b/exp.translations/texts/warehouse.txt @@ -0,0 +1 @@ +A warehouse is a building for storing goods. Warehouses are used by manufacturers, importers, exporters, wholesalers, transport businesses, customs, etc. They are usually large plain buildings in industrial parks on the outskirts of cities, towns, or villages. They usually have loading docks to load and unload goods from trucks. Sometimes warehouses are designed for the loading and unloading of goods directly from railways, airports, or seaports. They often have cranes and forklifts for moving goods, which are usually placed on ISO standard pallets and then loaded into pallet racks. Stored goods can include any raw materials, packing materials, spare parts, components, or finished goods associated with agriculture, manufacturing, and production. In India and Hong Kong, a warehouse may be referred to as a "godown". There are also godowns in the Shanghai Bund. \ No newline at end of file diff --git a/exp.translations/www/css/main.css b/exp.translations/www/css/main.css new file mode 100644 index 0000000..46097e8 --- /dev/null +++ b/exp.translations/www/css/main.css @@ -0,0 +1,38 @@ + +:root{ + --lh: 1.35rem; +} + +body{ + margin: var(--lh); + line-height: var(--lh); +} + +@media print{ + body{ + margin: 0; + font-size: 10pt; + } +} + +main{ + max-width: 42rem; + margin: 0 auto; +} + +/* h1,h2,h3,h4,h5,h6{ + line-height: var(--lh); +} */ + +h1{ + text-align: center; + margin: calc(2 * var(--lh)) 0; +} + +h2,h3,h4,h5,h6{ + margin: calc(3 * var(--lh)) 0 var(--lh); +} + +:is(h1,h2,h3,h4,h5,h6) + :is(h1,h2,h3,h4,h5,h6){ + margin-top: var(--lh); +} \ No newline at end of file diff --git a/exp.translations/www/index.html b/exp.translations/www/index.html new file mode 100644 index 0000000..d9e9249 --- /dev/null +++ b/exp.translations/www/index.html @@ -0,0 +1,71 @@ + + + + + + + + + TextRank Opacity + + + + + + + + + + + +
+ + + +
+

River (en)

+ +
    + +
  1. [0.1145916799619322] Levees and flood-banks can also increase flooding upstream because of the back-water pressure as the river flow is impeded by the narrow channel banks.
  2. + +
  3. [0.11086427143320547] Straightening rivers allows water to flow more rapidly downstream, increasing the risk of flooding places further downstream.
  4. + +
  5. [0.10725363112211873] The water in a river is usually confined to a channel, made up of a stream bed between banks.
  6. + +
  7. [0.10657133955295318] A river flowing in its channel is a source of energy that acts on the river channel to change its shape and form.
  8. + +
  9. [0.10590942715308657] Most but not all rivers flow on the surface.
  10. + +
+ +
+ + + +
+

Rivière (fr)

+ +
    + +
  1. [0.22940504189612096] En effet, il n'est pas rare de rencontrer des fleuves qui sont plus « petits » que certaines rivières (à titre d'exemples : la Saône à Lyon, qui présente un lit et un débit importants, reste une rivière.
  2. + +
  3. [0.21916558158176408] l'importance du méandrage est également liée à la quantité de matières solides transportées par la rivière : plus elles sont de grosse taille et nombreuses, plus elles ralentissent le débit de l'eau, même en pente forte supérieure à 3 % ; elles ont alors pour conséquence un méandrage moindre.
  4. + +
  5. [0.21060618367643322] Pour l'écologie du paysage, les rivières (et leurs berges et milieux associés) jouent un rôle majeur de corridor biologique, que la loi (Lois Grenelle) demande de ne pas artificiellement fragmenter sans mesure compensatoire efficaces permettant aux espèces de circuler le plus normalement dans tout le cours d'eau.
  6. + +
  7. [0.20784897960944404] si le lit est non forcé dans la roche, la rivière (alors classifiée « libre ») adoptera son trajet selon un critère capital : la pente du lit et de l'eau par rapport à la valeur critique 3 %[réf.
  8. + +
  9. [0.2021124917687332] En hydrographie, une rivière est un cours d'eau au débit moyen à modéré (supérieur à 2 m3/s), recevant des affluents et qui se jette dans une autre rivière ou dans un fleuve.
  10. + +
+ +
+ + + +
+ + + + \ No newline at end of file