wikipage getter in a separate folder so its also usable from different experiments
parent
fcf80318be
commit
8ae06f04df
@ -0,0 +1,56 @@
|
||||
from jinja2 import Template
|
||||
from markdown import markdown
|
||||
import sys
|
||||
|
||||
# appending a path
|
||||
sys.path.append('../')
|
||||
|
||||
# importing customised module
|
||||
import summa.edits
|
||||
from summa.edits import scored_sentences, similarity_graph
|
||||
|
||||
import wikipage
|
||||
from wikipage.page import get_wikipage
|
||||
|
||||
wikipedia_page = "mushroom"
|
||||
|
||||
# main
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# --- WIKI REQUEST ---
|
||||
|
||||
# get text from wikipedia
|
||||
print('--- WIKI ---')
|
||||
page = get_wikipage(wikipedia_page)
|
||||
if not page:
|
||||
sys.exit("--- STOP ---")
|
||||
title = '<h1>'+page.title+'</h1>'
|
||||
text = page.content
|
||||
|
||||
# print text in terminal
|
||||
print('--- TXT ---')
|
||||
print(text)
|
||||
|
||||
# --- APPLY TEXTRANK ---
|
||||
|
||||
# apply textrank
|
||||
graph = similarity_graph(text)
|
||||
|
||||
# print ranked sentences in terminal
|
||||
print('--- GRAPH ---')
|
||||
|
||||
|
||||
# for i in len(graph.nodes()):
|
||||
# for j in len(graph.nodes()):
|
||||
|
||||
# s1 = graph.nodes()[i]
|
||||
# s2 = graph.nodes()[j]
|
||||
# weight = graph.edge_weight((i, j))
|
||||
|
||||
# print('---')
|
||||
# print('1. ' + s1)
|
||||
# print('2. ' + s1)
|
||||
# print('similarity: ' + weight)
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,25 @@
|
||||
import wikipedia
|
||||
|
||||
wikipedia.set_lang("en")
|
||||
|
||||
# wikipedia
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def get_wikipage(pagename):
|
||||
# get wikipedia page content by name of the page
|
||||
|
||||
print(pagename)
|
||||
try:
|
||||
results = wikipedia.search(pagename, results=1, suggestion=False)
|
||||
try:
|
||||
pagename = results[0]
|
||||
except IndexError:
|
||||
# if there is no suggestion or search results, the page doesn't exist
|
||||
raise wikipedia.PageError(pagename)
|
||||
return wikipedia.WikipediaPage(pagename, redirect=True, preload=True)
|
||||
except wikipedia.exceptions.DisambiguationError as e:
|
||||
print(e.options)
|
||||
page = ''
|
||||
|
||||
return page
|
||||
|
Loading…
Reference in New Issue