master
Dorian 2 years ago
parent 4a0bfb820d
commit fcf80318be

@ -11,7 +11,7 @@ sys.path.append('../')
# importing required module
import summa.summarizer
from summa.summarizer import summarize
from summa.summarizer import scored_sentences
# TODO:
@ -167,7 +167,7 @@ def wiki_parse(sentences):
def txt2rankedsentences(txt):
# from txt to ranked sentences
return summarize(txt, split=True)
return scored_sentences(txt, split=True)
# main

@ -1,233 +0,0 @@
from jinja2 import Template
import os
import wikipedia
from markdown import markdown
# importing module
import sys
# appending a path
# sys.path.append('textrank')
# importing required module
import summa.summarizer
from summa.summarizer import summarize
# TODO:
# * DONE: wiki header
# those 3 would ask to start from the HTML itself and keep and index...
# * wiki paragraph
# * wiki hyperlinks
# * list
# variables
# ------------------------------------------------------------------------
# wikipedia_page = "forest"
# wikipedia_page = "warehouse"
# wikipedia_page = "river"
wikipedia_page = "elderflower"
# wikipedia_page = "mushroom"
TEMPLATE_PATH = 'template.html'
HTML_PATH = 'www/index.html'
# utilities
# ------------------------------------------------------------------------
def map_value(value, min, max, new_min, new_max):
return (((value - min) / (max - min)) * (new_max - new_min)) + new_min
def remap_score(s, min_score, max_score):
s.score = 1 - map_value(s.score, min_score, max_score, 0, 1)
return s
def compress_score(s):
# compress whites
s.score = s.score**3
# stretch + limiter
# s.score = min(map_value(s.score, 0, 1, 0, 1.5), 1)
s.score = 1 if s.score > 0.8 else s.score
return s
# wikipedia
# ------------------------------------------------------------------------
def wikipage(pagename):
# get wikipedia page content by name of the page
print(pagename)
wikipedia.set_lang("en")
try:
results = wikipedia.search(pagename, results=1, suggestion=False)
try:
pagename = results[0]
except IndexError:
# if there is no suggestion or search results, the page doesn't exist
raise wikipedia.PageError(pagename)
return wikipedia.WikipediaPage(pagename, redirect=True, preload=True)
except wikipedia.exceptions.DisambiguationError as e:
print(e.options)
page = ''
return page
# parsing and gluing html
# ------------------------------------------------------------------------
def is_header(s):
# i is the header level
i = 0
while s.text[i] == '=' and s.text[len(s.text) - 1 - i] == '=':
i += 1
if i > 0:
header_text = s.text[i:(-1-i)].strip()
header_level = i
return [header_text, header_level]
def wiki_parse(sentences):
# TODO: doesn't work with section nesting!!
# 1. replace wikitext header with html header
# 2. add the opacity to each elements
# 3. compute an artificial score for header that is an average of the score of the section
new_sentences = []
print('--- HEADERS ---')
for i in range(len(sentences)):
s = sentences[i]
# if sentences is header
header = is_header(s)
if header:
print(header[0])
# start computing the average of score of this section
current_total = 0
current_count = 0
next_header_found = False
j = i + 1
# iterating while we find next header with greatest or same level
while j < len(sentences) and not next_header_found:
s2 = sentences[j]
s2_header = is_header(s2)
if s2_header:
print(' ' + s2_header[0])
if header[1] >= s2_header[1]:
# encounter header of higher level
next_header_found = True
print('X ' + s2_header[0])
else:
# adding every sentence to the average
current_total += s2.score
current_count += 1
j += 1
if current_count != 0:
s.score = current_total / current_count
else:
s.score = "NaN"
s.html = '<h'+str(header[1])+' style="opacity:'+str(s.score)+';">'+header[0]+'</h'+str(header[1])+'>'
# stops at the references part
if header[0] == "References" or header[0] == "See also":
break
new_sentences.append(s)
# not a header
else:
s.html = '<span style="opacity:'+str(s.score)+';">'+s.text+'</span>'
new_sentences.append(s)
return new_sentences
# textrank
# ------------------------------------------------------------------------
def txt2rankedsentences(txt):
# from txt to ranked sentences
return summarize(txt, split=True)
# main
# ------------------------------------------------------------------------
if __name__ == '__main__':
# --- WIKI REQUEST ---
# get text from wikipedia
print('--- WIKI ---')
page = wikipage(wikipedia_page)
if not page:
sys.exit("--- STOP ---")
title = '<h1>'+page.title+'</h1>'
text = page.content
# print text in terminal
print('--- TXT ---')
print(text)
# --- APPLY TEXTRANK ---
# apply textrank
sentences = txt2rankedsentences(text)
# print ranked sentences in terminal
print('--- SENTENCES ---')
for s in sentences:
print('[{score}] : {sentence}'.format(score = s.score, sentence = s.text))
# --- REMAP AND COMPRESS ---
# sorted version of the list
sorted_sentences = sorted(sentences, key=lambda s: s.score, reverse=True)
# remap sentences from 0 to 1
max_score = sorted_sentences[0].score
min_score = sorted_sentences[-1].score
sentences = [remap_score(s, min_score, max_score) for s in sentences]
# compress scores (make more stuff invisible)
sentences = [compress_score(s) for s in sentences]
# -- PARSE ---
# parse every sentences to either span or header
sentences = wiki_parse(sentences)
# add back page title
sentences = [{ 'html': title, 'text': page.title, 'score': 1 }] + sentences
# -- TEMPLATING ---
# getting the template
with open(TEMPLATE_PATH, 'r') as file:
template = Template(file.read())
# render template
html = template.render(sentences = sentences)
with open(HTML_PATH, 'w') as file:
file.write(html)

@ -1,2 +1,2 @@
from summa import commons, graph, keywords, pagerank_weighted, \
summarizer, syntactic_unit, textrank
summarizer, edits, syntactic_unit, textrank

@ -0,0 +1,33 @@
from .pagerank_weighted import pagerank_weighted_scipy as _pagerank
from .preprocessing.textcleaner import clean_text_by_sentences as _clean_text_by_sentences
from .commons import build_graph as _build_graph
from .commons import remove_unreachable_nodes as _remove_unreachable_nodes
from .summarizer import _set_graph_edge_weights
from .summarizer import _add_scores_to_sentences
def scored_sentences(text, language="english", split=False, additional_stopwords=None):
if not isinstance(text, str):
raise ValueError("Text parameter must be a Unicode object (str)!")
# Gets a list of processed sentences.
sentences = _clean_text_by_sentences(text, language, additional_stopwords)
# Creates the graph and calculates the similarity coefficient for every pair of nodes.
graph = _build_graph([sentence.token for sentence in sentences])
_set_graph_edge_weights(graph)
# Remove all nodes with all edges weights equal to zero.
_remove_unreachable_nodes(graph)
# PageRank cannot be run in an empty graph.
if len(graph.nodes()) == 0:
return [] if split else ""
# Ranks the tokens using the PageRank algorithm. Returns dict of sentence -> score
pagerank_scores = _pagerank(graph)
# Adds the summa scores to the sentence objects.
_add_scores_to_sentences(sentences, pagerank_scores)
return sentences

@ -1,31 +0,0 @@
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8" />
<title>TextRank Opacity</title>
<meta name="description" content="a call for 2 desks in studio 5 of the Meyboom artist-run spaces">
<link rel="stylesheet" type="text/css" href="css/main.css" />
<link rel="stylesheet" type="text/css" href="css/typography.css" />
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<main>
{% for s in sentences %}
{{ s.html|safe }}
{% endfor %}
</main>
</body>
</html>

@ -1 +0,0 @@
A warehouse is a building for storing goods. Warehouses are used by manufacturers, importers, exporters, wholesalers, transport businesses, customs, etc. They are usually large plain buildings in industrial parks on the outskirts of cities, towns, or villages. They usually have loading docks to load and unload goods from trucks. Sometimes warehouses are designed for the loading and unloading of goods directly from railways, airports, or seaports. They often have cranes and forklifts for moving goods, which are usually placed on ISO standard pallets and then loaded into pallet racks. Stored goods can include any raw materials, packing materials, spare parts, components, or finished goods associated with agriculture, manufacturing, and production. In India and Hong Kong, a warehouse may be referred to as a "godown". There are also godowns in the Shanghai Bund.

@ -1,38 +0,0 @@
:root{
--lh: 1.35rem;
}
body{
margin: var(--lh);
line-height: var(--lh);
}
@media print{
body{
margin: 0;
font-size: 10pt;
}
}
main{
max-width: 42rem;
margin: 0 auto;
}
/* h1,h2,h3,h4,h5,h6{
line-height: var(--lh);
} */
h1{
text-align: center;
margin: calc(2 * var(--lh)) 0;
}
h2,h3,h4,h5,h6{
margin: calc(3 * var(--lh)) 0 var(--lh);
}
:is(h1,h2,h3,h4,h5,h6) + :is(h1,h2,h3,h4,h5,h6){
margin-top: var(--lh);
}

@ -1,177 +0,0 @@
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8" />
<title>TextRank Opacity</title>
<meta name="description" content="a call for 2 desks in studio 5 of the Meyboom artist-run spaces">
<link rel="stylesheet" type="text/css" href="css/main.css" />
<link rel="stylesheet" type="text/css" href="css/typography.css" />
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<main>
<h1>Sambucus</h1>
<span style="opacity:0.023354250368401927;">Sambucus is a genus of flowering plants in the family Adoxaceae.</span>
<span style="opacity:0.008019401476129553;">The various species are commonly called elder or elderberry.</span>
<span style="opacity:0.26031502027326375;">The genus was formerly placed in the honeysuckle family, Caprifoliaceae, but was reclassified as Adoxaceae due to genetic and morphological comparisons to plants in the genus Adoxa.</span>
<h2 style="opacity:0.11901683057809066;">Description</h2>
<span style="opacity:0.34993223091241904;">The oppositely arranged leaves are pinnate with 59 leaflets (or, rarely, 3 or 11).</span>
<span style="opacity:0.6657790550844742;">Each leaf is 530 cm (212 in) long, and the leaflets have serrated margins.</span>
<span style="opacity:0.15164144458890563;">They bear large clusters of small white or cream-colored flowers in late spring; these are followed by clusters of small black, blue-black, or red berries (rarely yellow or white).</span>
<h3 style="opacity:0.037512192700824155;">Color</h3>
<span style="opacity:0.002234682837227867;">Sambucus fruit is rich in anthocyanidins that combine to give elderberry juice an intense blue-purple coloration that turns reddish on dilution with water.</span>
<span style="opacity:0.04596217339828307;">These pigments are used as colorants in various products, and "elderberry juice color" is listed by the US FDA as allowable in certified organic food products.</span>
<span style="opacity:0.06433972186696155;">In Japan, elderberry juice is listed as an approved "natural color additive" under the Food and Sanitation Law. Fibers can be dyed with elderberry juice (using alum as a mordant) to give a light "elderberry" color.</span>
<h3 style="opacity:0.005859165534145169;">Toxicity</h3>
<span style="opacity:0.018703350472802083;">Although the cooked berries (pulp and skin) of most species of Sambucus are edible, the uncooked berries and other parts of plants from this genus are poisonous.</span>
<span style="opacity:0.0;">Leaves, twigs, branches, seeds, roots, flowers, and berries of Sambucus plants produce cyanogenic glycosides, which have toxic properties.</span>
<span style="opacity:0.0024907571958084017;">Ingesting a sufficient quantity of cyanogenic glycosides from berry juice, flower tea, or beverages made from fresh leaves, branches, and fruit has been shown to cause illness, including nausea, vomiting, abdominal cramps, diarrhea, and weakness.</span>
<span style="opacity:0.004068285156595224;">In August 1983, a group of 25 people in Monterey County, California, became suddenly ill by ingesting elderberry juice pressed from fresh, uncooked Sambucus mexicana berries, leaves, and stems.</span>
<span style="opacity:0.004033434845520135;">The density of cyanogenic glycosides is higher in tea made from flowers (or leaves) than from the berries.The seeds of Sambucus callicarpa are reported to be poisonous and may cause vomiting or diarrhea.</span>
<h2 style="opacity:0.3047373240294963;">Taxonomy</h2>
<span style="opacity:0.1435576912325227;">The taxonomy of the genus Sambucus L., originally described by Carl Linnaeus and hence its botanical authority, has been complicated by its wide geographical distribution and morphological diversity.</span>
<span style="opacity:0.4903541107710174;">This has led to overdescription of the species and infraspecific taxa (subspecies, varieties or forms).</span>
<span style="opacity:0.28030017008494884;">The name comes from the Greek word sambuce, an ancient wind instrument, about the removal of pith from the twigs to make whistles.Species recognized in this genus are:</span>
<h2 style="opacity:0.3774205134479782;">Distribution and habitat</h2>
<span style="opacity:0.5170795700859395;">The genus occurs in temperate to subtropical regions of the world.</span>
<span style="opacity:0.7289657600921431;">More widespread in the Northern Hemisphere, its Southern Hemisphere occurrence is restricted to parts of Australasia and South America.</span>
<span style="opacity:0.004931060704500269;">Many species are widely cultivated for their ornamental leaves, flowers, and fruit.</span>
<h3 style="opacity:0.34774180081331607;">Habitat</h3>
<span style="opacity:0.23667717197372362;">Elder commonly grows near farms and homesteads.</span>
<span style="opacity:0.3192589413693254;">It is a nitrogen-dependent plant and thus is generally found near places of organic waste disposal.</span>
<span style="opacity:0.20272560262481226;">Elders are often grown as a hedgerow plant in Britain since they take very fast, can be bent into shape easily, and grow quite profusely, thus having gained the reputation of being 'an instant hedge'.</span>
<span style="opacity:0.632305487285403;">It is not generally affected by soil type or pH level and will virtually grow anywhere sufficient sunlight is available.</span>
<h2 style="opacity:0.36011640223198155;">Ecology</h2>
<span style="opacity:0.09934447961441183;">In Northern California, elderberries are a food for migrating band-tailed pigeons.</span>
<span style="opacity:0.10833689405967695;">Elders are used as food plants by the larvae of some Lepidoptera species including brown-tail, buff ermine, dot moth, emperor moth, engrailed moth, swallow-tailed moth and the V-pug.</span>
<span style="opacity:0.40509224339149436;">The crushed foliage and immature fruit have a strong fetid smell.</span>
<span style="opacity:0.16493413805985815;">Valley elderberry longhorn beetles in California are very often found around red or blue elderberry bushes.</span>
<span style="opacity:1;">Females lay their eggs on the bark.</span>
<span style="opacity:0.38299065826644807;">The pith of elder has been used by watchmakers for cleaning tools before intricate work.</span>
<h2 style="opacity:0.4959298303208725;">Cultivation</h2>
<span style="opacity:0.0023273321750337233;">Traditional uses of Sambucus involved berries, seeds, leaves, and flowers or component extracts.</span>
<span style="opacity:0.021538616933372428;">Ornamental varieties of Sambucus are grown in gardens for their showy flowers, fruits and lacy foliage which support habitat for wildlife.</span>
<span style="opacity:0.37967191922582566;">Of the many native species, three are used as ornamentals, S.</span>
<span style="opacity:0.5720411135910031;">nigra, S.</span>
<span style="opacity:1;">canadensis and S.</span>
<span style="opacity:1;">racemosa.</span>
<h2 style="opacity:0.26037935627574993;">Uses</h2>
<h3 style="opacity:0.17679277695747428;">Nutrition</h3>
<span style="opacity:0.16562834706461427;">Raw elderberries are 80% water, 18% carbohydrates, and less than 1% each of protein and fat (table).</span>
<span style="opacity:0.1368265507355418;">In a 100-gram (3+12 oz) amount, elderberries supply 305 kilojoules (73 kcal) of food energy and are a rich source of vitamin C, providing 43% of the Daily Value (DV).</span>
<span style="opacity:0.2279234330722667;">Elderberries also have moderate contents of vitamin B6 (18% DV) and iron (12% DV), with no other nutrients in significant content.</span>
<h3 style="opacity:0.2385522392706748;">Dietary supplement</h3>
<span style="opacity:0.007324180292626893;">Elderberry fruit or flowers are used as dietary supplements to prevent or provide relief from minor diseases, such as flu, colds, constipation, and other conditions, served as a tea, extract or in a capsule.</span>
<span style="opacity:0.07793442880325537;">The use of elderberry supplements increased early in the COVID-19 pandemic.</span>
<span style="opacity:0.5690343662424139;">There is insufficient research to establish its effectiveness for such uses, or its safety profile.</span>
<span style="opacity:0.3046053054729051;">The raw or unripe fruit of S.</span>
<span style="opacity:0.23386291554217284;">nigra or its extracts may contain a cyanogenic glycoside that is potentially toxic.</span>
<h3 style="opacity:0.30036772549577245;">Traditional medicine</h3>
<span style="opacity:0.12635509010408633;">Although practitioners of traditional medicine have used elderberry over centuries, there is no high-quality clinical evidence that such practices provide any benefit.</span>
<span style="opacity:0.04619027362780958;">The flowers of Sambucus nigra are used to produce elderflower cordial.</span>
<span style="opacity:0.5605661316829006;">St-Germain, a French liqueur, is made from elderflowers.</span>
<span style="opacity:0.6249597590948577;">Hallands Fläder, a Swedish akvavit, is flavoured with elderflowers.</span>
<span style="opacity:0.058473741273256635;">Hollowed elderberry twigs have traditionally been used as spiles to tap maple trees for syrup.</span>
<span style="opacity:0.36742927908821876;">Additionally, they have been hollowed out and used as flutes, blowguns, and syringes.The fruit of S.</span>
<span style="opacity:0.7665194357270494;">callicarpa is eaten by birds and mammals.</span>
<span style="opacity:0.04897270752302531;">It is inedible to humans when raw but can be made into wine.Elderberry twigs and fruit are employed in creating dyes for basketry.</span>
<span style="opacity:0.10384311134074758;">These stems are dyed a very deep black by soaking them in a wash made from the berry stems of the elderberry.</span>
<h2 style="opacity:0.1940215897275913;">In popular culture</h2>
<span style="opacity:0.26902332053709666;">Folklore related to elder trees is extensive and can vary according to region.</span>
<span style="opacity:0.042182724177350944;">In some traditions, the elder tree is thought to ward off evil and give protection from witches, while other beliefs say that witches often congregate under the plant, especially when it is full of fruit.</span>
<span style="opacity:0.24981940690065454;">If an elder tree was cut down, a spirit known as the Elder Mother would be released and take her revenge.</span>
<span style="opacity:0.18680917834736657;">The tree could only safely be cut while chanting a rhyme to the Elder Mother.Made from the branch of an elder tree, the Elder Wand plays a pivotal role in the final book of the Harry Potter series, which was nearly named Harry Potter and the Elder Wand before author J.</span>
<span style="opacity:0.22868031954081833;">K. Rowling decided on Harry Potter and the Deathly Hallows.Elton John's 1973 album Don't Shoot Me I'm Only the Piano Player features a song titled "Elderberry Wine".</span>
<span style="opacity:0.1876145888622608;">In Monty Python and the Holy Grail, John Cleese as the French Taunter tells the knights of Camelot, "Your mother was a hamster, and your father smelt of elderberries."</span>
<h2 style="opacity:NaN;">Gallery</h2>
</main>
</body>
</html>
Loading…
Cancel
Save