second experiment as a flask interactive application to jump between recommended sentences using the similarity matrix
parent
8ae06f04df
commit
fb71713ade
Binary file not shown.
@ -0,0 +1,124 @@
|
||||
from flask import Flask
|
||||
from flask import request, render_template
|
||||
from markdown import markdown
|
||||
import sys
|
||||
|
||||
app = Flask("recommend")
|
||||
|
||||
# appending a path
|
||||
sys.path.append('../')
|
||||
|
||||
# importing customised module
|
||||
import summa.edits
|
||||
from summa.edits import scored_sentences, similarity_graph
|
||||
|
||||
import wikipage
|
||||
from wikipage.page import get_wikipage
|
||||
|
||||
context = {
|
||||
'page_name' : "door",
|
||||
'page' : None,
|
||||
'graph' : None,
|
||||
'sentences' : None,
|
||||
'current_sentence' : None
|
||||
}
|
||||
|
||||
# TextRank
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_similar_sentences(s0, sentences, graph):
|
||||
# return a list of all the sentences ordered by similarity to s0
|
||||
similar_sentences = sorted(sentences, key=lambda s: graph.edge_weight((s0.token, s.token)), reverse=True)
|
||||
return similar_sentences
|
||||
|
||||
|
||||
# main
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def new_page(page_name):
|
||||
# from a pagename get
|
||||
# 1. get the page text
|
||||
# 2. graph and scored sentences
|
||||
|
||||
# --- WIKI REQUEST ---
|
||||
|
||||
# get text from wikipedia
|
||||
print('--- WIKI ---')
|
||||
context['page'] = get_wikipage(page_name)
|
||||
if not context['page']:
|
||||
sys.exit("--- STOP ---")
|
||||
text = context['page'].content
|
||||
|
||||
# print text in terminal
|
||||
print('--- TXT ---')
|
||||
print(text)
|
||||
|
||||
# --- APPLY TEXTRANK ---
|
||||
|
||||
# apply textrank
|
||||
context['sentences'] = scored_sentences(text)
|
||||
context['graph'] = similarity_graph(text)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
|
||||
if 'token' in request.args:
|
||||
token = request.args['token']
|
||||
context['current_sentence'] = [s for s in context['sentences'] if s.token == token][0]
|
||||
|
||||
else:
|
||||
new_page(context['page_name'])
|
||||
sorted_sentences = sorted(context['sentences'], key=lambda s: s.score, reverse=True)
|
||||
best_sentence = sorted_sentences[0]
|
||||
|
||||
print("--- START/BEST ---")
|
||||
print(best_sentence.text)
|
||||
|
||||
context['current_sentence'] = best_sentence
|
||||
|
||||
# sentences ordered by similarity to the best one
|
||||
similar_sentences = get_similar_sentences(context['current_sentence'] , context['sentences'], context['graph'])
|
||||
|
||||
current_sentence=context['current_sentence']
|
||||
return render_template('index.html', current_sentence=current_sentence , similar_sentences=similar_sentences)
|
||||
|
||||
|
||||
# # --- APPLY TEXTRANK ---
|
||||
|
||||
# # apply textrank
|
||||
# sentences = scored_sentences(text)
|
||||
# graph = similarity_graph(text)
|
||||
|
||||
# # print ranked sentences in terminal
|
||||
# print('--- GRAPH ---')
|
||||
|
||||
# # notes:
|
||||
# # * the matrix/graph does not contain the sentences but their .token
|
||||
|
||||
# sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
|
||||
# best_sentence = sorted_sentences[0]
|
||||
|
||||
# print("START/BEST: \n")
|
||||
# print(best_sentence.text + "\n")
|
||||
|
||||
# # sentences ordered by similarity to the best one
|
||||
# similar_sentences = similar_sentences(best_sentence, sentences, graph)
|
||||
|
||||
# for i in range(len(similar_sentences)):
|
||||
# print(str(i) + ". " + similar_sentences[i].text)
|
||||
|
||||
|
||||
# # for s1 in sentences:
|
||||
# # for s2 in sentences:
|
||||
|
||||
# # if s1 != s2:
|
||||
# # if graph.has_node(s1.token) and graph.has_node(s2.token):
|
||||
|
||||
# # weight = graph.edge_weight((s1.token, s2.token))
|
||||
|
||||
# # print('---')
|
||||
# # print('1. ' + s1.text)
|
||||
# # print('2. ' + s2.text)
|
||||
# # print('similarity: ' + str(weight))
|
@ -1,56 +0,0 @@
|
||||
from jinja2 import Template
|
||||
from markdown import markdown
|
||||
import sys
|
||||
|
||||
# appending a path
|
||||
sys.path.append('../')
|
||||
|
||||
# importing customised module
|
||||
import summa.edits
|
||||
from summa.edits import scored_sentences, similarity_graph
|
||||
|
||||
import wikipage
|
||||
from wikipage.page import get_wikipage
|
||||
|
||||
wikipedia_page = "mushroom"
|
||||
|
||||
# main
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# --- WIKI REQUEST ---
|
||||
|
||||
# get text from wikipedia
|
||||
print('--- WIKI ---')
|
||||
page = get_wikipage(wikipedia_page)
|
||||
if not page:
|
||||
sys.exit("--- STOP ---")
|
||||
title = '<h1>'+page.title+'</h1>'
|
||||
text = page.content
|
||||
|
||||
# print text in terminal
|
||||
print('--- TXT ---')
|
||||
print(text)
|
||||
|
||||
# --- APPLY TEXTRANK ---
|
||||
|
||||
# apply textrank
|
||||
graph = similarity_graph(text)
|
||||
|
||||
# print ranked sentences in terminal
|
||||
print('--- GRAPH ---')
|
||||
|
||||
|
||||
# for i in len(graph.nodes()):
|
||||
# for j in len(graph.nodes()):
|
||||
|
||||
# s1 = graph.nodes()[i]
|
||||
# s2 = graph.nodes()[j]
|
||||
# weight = graph.edge_weight((i, j))
|
||||
|
||||
# print('---')
|
||||
# print('1. ' + s1)
|
||||
# print('2. ' + s1)
|
||||
# print('similarity: ' + weight)
|
||||
|
@ -0,0 +1,4 @@
|
||||
|
||||
export FLASK_APP=app
|
||||
export FLASK_ENV=development
|
||||
flask run
|
@ -0,0 +1,47 @@
|
||||
|
||||
:root{
|
||||
--lh: 1.35;
|
||||
}
|
||||
|
||||
body{
|
||||
line-height: var(--lh);
|
||||
|
||||
display: grid;
|
||||
grid-template-columns: 2fr 1fr;
|
||||
|
||||
gap: 6em;
|
||||
}
|
||||
|
||||
main{
|
||||
padding: 6em;
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
main p{
|
||||
border: thin solid lightgrey;
|
||||
box-sizing: border-box;
|
||||
border-radius: 1em;
|
||||
padding: 4em;
|
||||
height: 100%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
aside{
|
||||
font-size: 0.8em;
|
||||
box-sizing: border-box;
|
||||
padding: 2em 0;
|
||||
padding-left: 0;
|
||||
padding-right: 6em;
|
||||
height: 100vh;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
aside li{
|
||||
padding: 2em 0;
|
||||
}
|
||||
|
||||
h2{
|
||||
font-weight: bold;
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
/* http://meyerweb.com/eric/tools/css/reset/
|
||||
v2.0 | 20110126
|
||||
License: none (public domain)
|
||||
*/
|
||||
|
||||
html, body, div, span, applet, object, iframe,
|
||||
h1, h2, h3, h4, h5, h6, p, blockquote, pre,
|
||||
a, abbr, acronym, address, big, cite, code,
|
||||
del, dfn, em, img, ins, kbd, q, s, samp,
|
||||
small, strike, strong, sub, sup, tt, var,
|
||||
b, u, i, center,
|
||||
dl, dt, dd, ol, ul, li,
|
||||
fieldset, form, label, legend,
|
||||
table, caption, tbody, tfoot, thead, tr, th, td,
|
||||
article, aside, canvas, details, embed,
|
||||
figure, figcaption, footer, header, hgroup,
|
||||
menu, nav, output, ruby, section, summary,
|
||||
time, mark, audio, video {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
border: 0;
|
||||
font-size: 100%;
|
||||
font: inherit;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
/* HTML5 display-role reset for older browsers */
|
||||
article, aside, details, figcaption, figure,
|
||||
footer, header, hgroup, menu, nav, section {
|
||||
display: block;
|
||||
}
|
||||
body {
|
||||
line-height: 1;
|
||||
}
|
||||
ol, ul {
|
||||
list-style: none;
|
||||
}
|
||||
blockquote, q {
|
||||
quotes: none;
|
||||
}
|
||||
blockquote:before, blockquote:after,
|
||||
q:before, q:after {
|
||||
content: '';
|
||||
content: none;
|
||||
}
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
border-spacing: 0;
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
|
||||
<meta charset="UTF-8" />
|
||||
|
||||
<title>Recommended Reading</title>
|
||||
|
||||
<meta name="description" content="">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="static/css/reset.css" />
|
||||
<link rel="stylesheet" type="text/css" href="static/css/main.css" />
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<main>
|
||||
<p>
|
||||
{{ current_sentence.text }}
|
||||
</p>
|
||||
</main>
|
||||
|
||||
<aside>
|
||||
<nav>
|
||||
<h2>recommended:</h2>
|
||||
<ul>
|
||||
{% for s in similar_sentences %}
|
||||
<li>
|
||||
<a href="/?token={{ s.token }}">{{ s.text }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</nav>
|
||||
</aside>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
Binary file not shown.
Loading…
Reference in New Issue