from jinja2 import Template from markdown import markdown import sys # appending a path sys.path.append('../') # importing customised module import summa.edits from summa.edits import scored_sentences import wikipage from wikipage.page import get_wikipage, is_header # variables # ------------------------------------------------------------------------ # languages = ['en','fr'] # page_requests = ['river', 'rivière'] languages = ['en','fr', 'es'] languages_full = ['english', 'french', 'spanish'] page_requests = ['woman', 'femme', 'mujer'] # possible languages for summa # "danish", "dutch", "english", "finnish", "french", "german", # "hungarian", "italian", "norwegian", "porter", "portuguese", # "romanian", "russian", "spanish", "swedish" # according to doc: https://summanlp.github.io/textrank/ TEMPLATE_PATH = 'template.html' HTML_PATH = 'www/index.html' # utilities # ------------------------------------------------------------------------ def page_request(request, lang): # get text from wikipedia print('--- WIKI ---') page = get_wikipage(request, lang) if not page: sys.exit("--- STOP ---") return page # main # ------------------------------------------------------------------------ if __name__ == '__main__': processed_pages = [] for i in range(len(languages)): # --- WIKI REQUEST --- lang = languages[i] lang_full = languages_full[i] request = page_requests[i] page = page_request(request, lang) print("got " + page.title) # add the lang page.lang = lang # --- APPLY TEXTRANK --- sentences = scored_sentences(page.content, language = lang_full) sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True) page.sentences = sorted_sentences # for s in sorted_sentences[:5]: # print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text)) # remove header # page.content = processed_pages.append(page) # print('--- SENTENCES ---') # for s in sorted_sentence: # print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text)) # -- TEMPLATING --- # getting the template with open(TEMPLATE_PATH, 'r') as file: template = Template(file.read()) # render template html = template.render(pages = processed_pages) with open(HTML_PATH, 'w') as file: file.write(html)