wikipage getter in a separate folder so its also usable from different experiments
parent
fcf80318be
commit
8ae06f04df
@ -0,0 +1,56 @@
|
|||||||
|
from jinja2 import Template
|
||||||
|
from markdown import markdown
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# appending a path
|
||||||
|
sys.path.append('../')
|
||||||
|
|
||||||
|
# importing customised module
|
||||||
|
import summa.edits
|
||||||
|
from summa.edits import scored_sentences, similarity_graph
|
||||||
|
|
||||||
|
import wikipage
|
||||||
|
from wikipage.page import get_wikipage
|
||||||
|
|
||||||
|
wikipedia_page = "mushroom"
|
||||||
|
|
||||||
|
# main
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# --- WIKI REQUEST ---
|
||||||
|
|
||||||
|
# get text from wikipedia
|
||||||
|
print('--- WIKI ---')
|
||||||
|
page = get_wikipage(wikipedia_page)
|
||||||
|
if not page:
|
||||||
|
sys.exit("--- STOP ---")
|
||||||
|
title = '<h1>'+page.title+'</h1>'
|
||||||
|
text = page.content
|
||||||
|
|
||||||
|
# print text in terminal
|
||||||
|
print('--- TXT ---')
|
||||||
|
print(text)
|
||||||
|
|
||||||
|
# --- APPLY TEXTRANK ---
|
||||||
|
|
||||||
|
# apply textrank
|
||||||
|
graph = similarity_graph(text)
|
||||||
|
|
||||||
|
# print ranked sentences in terminal
|
||||||
|
print('--- GRAPH ---')
|
||||||
|
|
||||||
|
|
||||||
|
# for i in len(graph.nodes()):
|
||||||
|
# for j in len(graph.nodes()):
|
||||||
|
|
||||||
|
# s1 = graph.nodes()[i]
|
||||||
|
# s2 = graph.nodes()[j]
|
||||||
|
# weight = graph.edge_weight((i, j))
|
||||||
|
|
||||||
|
# print('---')
|
||||||
|
# print('1. ' + s1)
|
||||||
|
# print('2. ' + s1)
|
||||||
|
# print('similarity: ' + weight)
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,25 @@
|
|||||||
|
import wikipedia
|
||||||
|
|
||||||
|
wikipedia.set_lang("en")
|
||||||
|
|
||||||
|
# wikipedia
|
||||||
|
# ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_wikipage(pagename):
|
||||||
|
# get wikipedia page content by name of the page
|
||||||
|
|
||||||
|
print(pagename)
|
||||||
|
try:
|
||||||
|
results = wikipedia.search(pagename, results=1, suggestion=False)
|
||||||
|
try:
|
||||||
|
pagename = results[0]
|
||||||
|
except IndexError:
|
||||||
|
# if there is no suggestion or search results, the page doesn't exist
|
||||||
|
raise wikipedia.PageError(pagename)
|
||||||
|
return wikipedia.WikipediaPage(pagename, redirect=True, preload=True)
|
||||||
|
except wikipedia.exceptions.DisambiguationError as e:
|
||||||
|
print(e.options)
|
||||||
|
page = ''
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
Loading…
Reference in New Issue