thewarehouseandtheforest/exp.subjective-similarity-m.../main.py

import sys
import json
import glob
from graphviz import Graph

# appending a path
sys.path.append('../')

# importing customised module
import summa.edits
from summa.edits import csv_to_matrix, custom_summarize


#   main
#   ------------------------------------------------------------------------


def print_score(name, scored_sentences):
    print(name)
    for s in sorted_sentences:
        print(s.text)
        print(s.score)
        print()
    print()


if __name__ == '__main__':

    paths = glob.glob('../summa/data/*.csv')

    matrices = []

    for path in paths:

        # CSV 2 MATRIC
        matrix = csv_to_matrix(path)
        matrices.append(matrix)
        # print(json.dumps(matrix, indent=4))

        # we give summa the whole text to tokenize, from the csv keys
        text = " ".join(matrix.keys())

        # random walk with pagerank
        scored_sentences, weighted_graph = custom_summarize(text, matrix)

        # sort the scored sentences
        sorted_sentences = sorted(scored_sentences, key=lambda s: s.score, reverse=True)

        # print them
        print_score(path, sorted_sentences)

        # create a graph
        graphviz_graph = Graph(path, engine='fdp', format='svg')
        graphviz_graph.attr('graph', overlap='false')

        for i, node1 in enumerate(weighted_graph.nodes()):
            graphviz_graph.node(str(i), node1)

            for j, node2 in enumerate(weighted_graph.nodes()):

                if weighted_graph.edge_weight((node1, node2)) != weighted_graph.edge_weight((node2, node1)):
                    print("THIS IS NOT SYMMETRICAL")

                if i < j:
                    original_weight = weighted_graph.edge_weight((node1, node2))
                    graphviz_graph.edge(str(i), str(j), weight = str(original_weight * 10), penwidth = str(original_weight))

                elif i > j:
                    original_weight = weighted_graph.edge_weight((node2, node1))
                    graphviz_graph.edge(str(j), str(i), weight = str(original_weight * 10), penwidth = str(original_weight))


        graphviz_graph.render(path + '_graph')


    # # compute the average matrix
    # matrix_keys = matrices[0].keys()
    # average_matrix = {}
    # for i in matrix_keys:
    #     average_matrix[i] = {}
    #     for j in matrix_keys:
    #         average_matrix[i][j] = sum([matrix[i][j] for matrix in matrices]) / len(matrices)

    # # random walk with pagerank
    # scored_sentences = custom_summarize(text, average_matrix)
    # # sort the scored sentences
    # sorted_sentences = sorted(scored_sentences, key=lambda s: s.score, reverse=True)

    # # print them
    # print_score('average', sorted_sentences)