from .pagerank_weighted import pagerank_weighted_scipy as _pagerank from .preprocessing.textcleaner import clean_text_by_sentences as _clean_text_by_sentences from .commons import build_graph as _build_graph from .commons import remove_unreachable_nodes as _remove_unreachable_nodes from .summarizer import _set_graph_edge_weights from .summarizer import _add_scores_to_sentences def scored_sentences(text, language="english", split=False, additional_stopwords=None): if not isinstance(text, str): raise ValueError("Text parameter must be a Unicode object (str)!") # Gets a list of processed sentences. sentences = _clean_text_by_sentences(text, language, additional_stopwords) # Creates the graph and calculates the similarity coefficient for every pair of nodes. graph = _build_graph([sentence.token for sentence in sentences]) _set_graph_edge_weights(graph) # Remove all nodes with all edges weights equal to zero. _remove_unreachable_nodes(graph) # PageRank cannot be run in an empty graph. if len(graph.nodes()) == 0: return [] if split else "" # Ranks the tokens using the PageRank algorithm. Returns dict of sentence -> score pagerank_scores = _pagerank(graph) # Adds the summa scores to the sentence objects. _add_scores_to_sentences(sentences, pagerank_scores) return sentences