You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
3.4 KiB
Python
125 lines
3.4 KiB
Python
from flask import Flask
|
|
from flask import request, render_template
|
|
from markdown import markdown
|
|
import sys
|
|
|
|
app = Flask("recommend")
|
|
|
|
# appending a path
|
|
sys.path.append('../')
|
|
|
|
# importing customised module
|
|
import summa.edits
|
|
from summa.edits import scored_sentences, similarity_graph
|
|
|
|
import wikipage
|
|
from wikipage.page import get_wikipage
|
|
|
|
context = {
|
|
'page_name' : "door",
|
|
'page' : None,
|
|
'graph' : None,
|
|
'sentences' : None,
|
|
'current_sentence' : None
|
|
}
|
|
|
|
# TextRank
|
|
# ------------------------------------------------------------------------
|
|
|
|
|
|
def get_similar_sentences(s0, sentences, graph):
|
|
# return a list of all the sentences ordered by similarity to s0
|
|
similar_sentences = sorted(sentences, key=lambda s: graph.edge_weight((s0.token, s.token)), reverse=True)
|
|
return similar_sentences
|
|
|
|
|
|
# main
|
|
# ------------------------------------------------------------------------
|
|
|
|
def new_page(page_name):
|
|
# from a pagename get
|
|
# 1. get the page text
|
|
# 2. graph and scored sentences
|
|
|
|
# --- WIKI REQUEST ---
|
|
|
|
# get text from wikipedia
|
|
print('--- WIKI ---')
|
|
context['page'] = get_wikipage(page_name)
|
|
if not context['page']:
|
|
sys.exit("--- STOP ---")
|
|
text = context['page'].content
|
|
|
|
# print text in terminal
|
|
print('--- TXT ---')
|
|
print(text)
|
|
|
|
# --- APPLY TEXTRANK ---
|
|
|
|
# apply textrank
|
|
context['sentences'] = scored_sentences(text)
|
|
context['graph'] = similarity_graph(text)
|
|
|
|
|
|
@app.route("/")
|
|
def index():
|
|
|
|
if 'token' in request.args:
|
|
token = request.args['token']
|
|
context['current_sentence'] = [s for s in context['sentences'] if s.token == token][0]
|
|
|
|
else:
|
|
new_page(context['page_name'])
|
|
sorted_sentences = sorted(context['sentences'], key=lambda s: s.score, reverse=True)
|
|
best_sentence = sorted_sentences[0]
|
|
|
|
print("--- START/BEST ---")
|
|
print(best_sentence.text)
|
|
|
|
context['current_sentence'] = best_sentence
|
|
|
|
# sentences ordered by similarity to the best one
|
|
similar_sentences = get_similar_sentences(context['current_sentence'] , context['sentences'], context['graph'])
|
|
|
|
current_sentence=context['current_sentence']
|
|
return render_template('index.html', current_sentence=current_sentence , similar_sentences=similar_sentences)
|
|
|
|
|
|
# # --- APPLY TEXTRANK ---
|
|
|
|
# # apply textrank
|
|
# sentences = scored_sentences(text)
|
|
# graph = similarity_graph(text)
|
|
|
|
# # print ranked sentences in terminal
|
|
# print('--- GRAPH ---')
|
|
|
|
# # notes:
|
|
# # * the matrix/graph does not contain the sentences but their .token
|
|
|
|
# sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
|
|
# best_sentence = sorted_sentences[0]
|
|
|
|
# print("START/BEST: \n")
|
|
# print(best_sentence.text + "\n")
|
|
|
|
# # sentences ordered by similarity to the best one
|
|
# similar_sentences = similar_sentences(best_sentence, sentences, graph)
|
|
|
|
# for i in range(len(similar_sentences)):
|
|
# print(str(i) + ". " + similar_sentences[i].text)
|
|
|
|
|
|
# # for s1 in sentences:
|
|
# # for s2 in sentences:
|
|
|
|
# # if s1 != s2:
|
|
# # if graph.has_node(s1.token) and graph.has_node(s2.token):
|
|
|
|
# # weight = graph.edge_weight((s1.token, s2.token))
|
|
|
|
# # print('---')
|
|
# # print('1. ' + s1.text)
|
|
# # print('2. ' + s2.text)
|
|
# # print('similarity: ' + str(weight))
|