thewarehouseandtheforest/summa/edits.py

from .pagerank_weighted import pagerank_weighted_scipy as _pagerank
from .preprocessing.textcleaner import clean_text_by_sentences as _clean_text_by_sentences
from .commons import build_graph as _build_graph
from .commons import remove_unreachable_nodes as _remove_unreachable_nodes
from .summarizer import _set_graph_edge_weights
from .summarizer import _add_scores_to_sentences


def scored_sentences(text, language="english", split=False, additional_stopwords=None):
    if not isinstance(text, str):
        raise ValueError("Text parameter must be a Unicode object (str)!")

    # Gets a list of processed sentences.
    sentences = _clean_text_by_sentences(text, language, additional_stopwords)

    # Creates the graph and calculates the similarity coefficient for every pair of nodes.
    graph = _build_graph([sentence.token for sentence in sentences])
    _set_graph_edge_weights(graph)

    # Remove all nodes with all edges weights equal to zero.
    _remove_unreachable_nodes(graph)

    # PageRank cannot be run in an empty graph.
    if len(graph.nodes()) == 0:
        return [] if split else ""

    # Ranks the tokens using the PageRank algorithm. Returns dict of sentence -> score
    pagerank_scores = _pagerank(graph)

    # Adds the summa scores to the sentence objects.
    _add_scores_to_sentences(sentences, pagerank_scores)

    return sentences
clean 2 years ago			`from .pagerank_weighted import pagerank_weighted_scipy as _pagerank`
			`from .preprocessing.textcleaner import clean_text_by_sentences as _clean_text_by_sentences`
			`from .commons import build_graph as _build_graph`
			`from .commons import remove_unreachable_nodes as _remove_unreachable_nodes`
			`from .summarizer import _set_graph_edge_weights`
			`from .summarizer import _add_scores_to_sentences`


			`def scored_sentences(text, language="english", split=False, additional_stopwords=None):`
			`if not isinstance(text, str):`
			`raise ValueError("Text parameter must be a Unicode object (str)!")`

			`# Gets a list of processed sentences.`
			`sentences = _clean_text_by_sentences(text, language, additional_stopwords)`

			`# Creates the graph and calculates the similarity coefficient for every pair of nodes.`
			`graph = _build_graph([sentence.token for sentence in sentences])`
			`_set_graph_edge_weights(graph)`

			`# Remove all nodes with all edges weights equal to zero.`
			`_remove_unreachable_nodes(graph)`

			`# PageRank cannot be run in an empty graph.`
			`if len(graph.nodes()) == 0:`
			`return [] if split else ""`

			`# Ranks the tokens using the PageRank algorithm. Returns dict of sentence -> score`
			`pagerank_scores = _pagerank(graph)`

			`# Adds the summa scores to the sentence objects.`
			`_add_scores_to_sentences(sentences, pagerank_scores)`

			`return sentences`