You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
3.4 KiB
Python

from flask import Flask
from flask import request, render_template
from markdown import markdown
import sys
app = Flask("recommend")
# appending a path
sys.path.append('../')
# importing customised module
import summa.edits
from summa.edits import scored_sentences, similarity_graph
import wikipage
from wikipage.page import get_wikipage
context = {
'page_name' : "door",
'page' : None,
'graph' : None,
'sentences' : None,
'current_sentence' : None
}
# TextRank
# ------------------------------------------------------------------------
def get_similar_sentences(s0, sentences, graph):
# return a list of all the sentences ordered by similarity to s0
similar_sentences = sorted(sentences, key=lambda s: graph.edge_weight((s0.token, s.token)), reverse=True)
return similar_sentences
# main
# ------------------------------------------------------------------------
def new_page(page_name):
# from a pagename get
# 1. get the page text
# 2. graph and scored sentences
# --- WIKI REQUEST ---
# get text from wikipedia
print('--- WIKI ---')
context['page'] = get_wikipage(page_name)
if not context['page']:
sys.exit("--- STOP ---")
text = context['page'].content
# print text in terminal
print('--- TXT ---')
print(text)
# --- APPLY TEXTRANK ---
# apply textrank
context['sentences'] = scored_sentences(text)
context['graph'] = similarity_graph(text)
@app.route("/")
def index():
if 'token' in request.args:
token = request.args['token']
context['current_sentence'] = [s for s in context['sentences'] if s.token == token][0]
else:
new_page(context['page_name'])
sorted_sentences = sorted(context['sentences'], key=lambda s: s.score, reverse=True)
best_sentence = sorted_sentences[0]
print("--- START/BEST ---")
print(best_sentence.text)
context['current_sentence'] = best_sentence
# sentences ordered by similarity to the best one
similar_sentences = get_similar_sentences(context['current_sentence'] , context['sentences'], context['graph'])
current_sentence=context['current_sentence']
return render_template('index.html', current_sentence=current_sentence , similar_sentences=similar_sentences)
# # --- APPLY TEXTRANK ---
# # apply textrank
# sentences = scored_sentences(text)
# graph = similarity_graph(text)
# # print ranked sentences in terminal
# print('--- GRAPH ---')
# # notes:
# # * the matrix/graph does not contain the sentences but their .token
# sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
# best_sentence = sorted_sentences[0]
# print("START/BEST: \n")
# print(best_sentence.text + "\n")
# # sentences ordered by similarity to the best one
# similar_sentences = similar_sentences(best_sentence, sentences, graph)
# for i in range(len(similar_sentences)):
# print(str(i) + ". " + similar_sentences[i].text)
# # for s1 in sentences:
# # for s2 in sentences:
# # if s1 != s2:
# # if graph.has_node(s1.token) and graph.has_node(s2.token):
# # weight = graph.edge_weight((s1.token, s2.token))
# # print('---')
# # print('1. ' + s1.text)
# # print('2. ' + s2.text)
# # print('similarity: ' + str(weight))