import sys import json import glob from graphviz import Graph # appending a path sys.path.append('../') # importing customised module import summa.edits from summa.edits import csv_to_matrix, custom_summarize # main # ------------------------------------------------------------------------ def print_score(name, scored_sentences): print(name) for s in sorted_sentences: print(s.text) print(s.score) print() print() if __name__ == '__main__': paths = glob.glob('../summa/data/*.csv') matrices = [] for path in paths: # CSV 2 MATRIC matrix = csv_to_matrix(path) matrices.append(matrix) # print(json.dumps(matrix, indent=4)) # we give summa the whole text to tokenize, from the csv keys text = " ".join(matrix.keys()) # random walk with pagerank scored_sentences, weighted_graph = custom_summarize(text, matrix) # sort the scored sentences sorted_sentences = sorted(scored_sentences, key=lambda s: s.score, reverse=True) # print them print_score(path, sorted_sentences) # create a graph graphviz_graph = Graph(path, engine='fdp', format='svg') graphviz_graph.attr('graph', overlap='false') for i, node1 in enumerate(weighted_graph.nodes()): graphviz_graph.node(str(i), node1) for j, node2 in enumerate(weighted_graph.nodes()): if weighted_graph.edge_weight((node1, node2)) != weighted_graph.edge_weight((node2, node1)): print("THIS IS NOT SYMMETRICAL") if i < j: original_weight = weighted_graph.edge_weight((node1, node2)) graphviz_graph.edge(str(i), str(j), weight = str(original_weight * 10), penwidth = str(original_weight)) elif i > j: original_weight = weighted_graph.edge_weight((node2, node1)) graphviz_graph.edge(str(j), str(i), weight = str(original_weight * 10), penwidth = str(original_weight)) graphviz_graph.render(path + '_graph') # # compute the average matrix # matrix_keys = matrices[0].keys() # average_matrix = {} # for i in matrix_keys: # average_matrix[i] = {} # for j in matrix_keys: # average_matrix[i][j] = sum([matrix[i][j] for matrix in matrices]) / len(matrices) # # random walk with pagerank # scored_sentences = custom_summarize(text, average_matrix) # # sort the scored sentences # sorted_sentences = sorted(scored_sentences, key=lambda s: s.score, reverse=True) # # print them # print_score('average', sorted_sentences)