You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
2.0 KiB
Python
94 lines
2.0 KiB
Python
2 years ago
|
from jinja2 import Template
|
||
|
from markdown import markdown
|
||
|
import sys
|
||
|
|
||
|
# appending a path
|
||
|
sys.path.append('../')
|
||
|
|
||
|
# importing customised module
|
||
|
import summa.edits
|
||
|
from summa.edits import scored_sentences
|
||
|
|
||
|
import wikipage
|
||
|
from wikipage.page import get_wikipage, is_header
|
||
|
|
||
|
# variables
|
||
|
# ------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
languages = ['en','fr']
|
||
|
page_requests = ['river', 'rivière']
|
||
|
|
||
|
TEMPLATE_PATH = 'template.html'
|
||
|
HTML_PATH = 'www/index.html'
|
||
|
|
||
|
|
||
|
# utilities
|
||
|
# ------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
def page_request(request, lang):
|
||
|
|
||
|
# get text from wikipedia
|
||
|
print('--- WIKI ---')
|
||
|
page = get_wikipage(request, lang)
|
||
|
if not page:
|
||
|
sys.exit("--- STOP ---")
|
||
|
|
||
|
return page
|
||
|
|
||
|
|
||
|
# main
|
||
|
# ------------------------------------------------------------------------
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
|
||
|
|
||
|
|
||
|
processed_pages = []
|
||
|
|
||
|
for i in range(len(languages)):
|
||
|
|
||
|
# --- WIKI REQUEST ---
|
||
|
lang = languages[i]
|
||
|
request = page_requests[i]
|
||
|
page = page_request(request, lang)
|
||
|
|
||
|
print("got " + page.title)
|
||
|
|
||
|
# add the lang
|
||
|
page.lang = lang
|
||
|
|
||
|
# --- APPLY TEXTRANK ---
|
||
|
sentences = scored_sentences(page.content)
|
||
|
sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
|
||
|
page.sentences = sorted_sentences
|
||
|
|
||
|
# for s in sorted_sentences[:5]:
|
||
|
# print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text))
|
||
|
|
||
|
# remove header
|
||
|
# page.content =
|
||
|
|
||
|
processed_pages.append(page)
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
# print('--- SENTENCES ---')
|
||
|
# for s in sorted_sentence:
|
||
|
# print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text))
|
||
|
|
||
|
|
||
|
# -- TEMPLATING ---
|
||
|
|
||
|
# getting the template
|
||
|
with open(TEMPLATE_PATH, 'r') as file:
|
||
|
template = Template(file.read())
|
||
|
# render template
|
||
|
html = template.render(pages = processed_pages)
|
||
|
with open(HTML_PATH, 'w') as file:
|
||
|
file.write(html)
|