#!/usr/bin/env/ python

#    This script unfolds the Levenhstein Distance algorithm, 
#    often used in spellcheckers and to detect similarity between texts. 
#    The algorithm reads a fragment on a eucalyptus tree from 
#    Julio Cortazar's 'Historias de Cronopios y Famas', Alfaguara, 2012.

#    Copyright (C) 2021, Anais Berck
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details: <http://www.gnu.org/licenses/>.

# Python libraries
import random
import math
import random
import textwrap
import Levenshtein
import sys

# Functions

# Open & prepare textfile for machinal reading
def openfile(txt):
	all_text = []
	with open(txt, 'r') as source:
		# Read the text
		text = source.read()
		# Remove punctuation
		characters = ['\n', '-', ':', '.', ',']
		for c in characters:
			if c in text:
				clean_text = text.replace(c,' ')
		# Transform fragment in a list of words
		words = clean_text.split()
		# Recolt all unique words of the fragment in a set of words
		for word in words:
			word = word.lower()
			word = word.strip()
			all_text.append(word)
	return all_text

# Levenhstein Distance 
def levenshtein(a, b):
	if not a: return len(b)
	if not b: return len(a)
	return min(levenshtein(a[1:], b[1:])+(a[0] != b[0]),
			   levenshtein(a[1:], b)+1,
			   levenshtein(a, b[1:])+1)

# Create map
def space_available (grid, start, end, line):
	other_words = grid[line]

	for other_start, other_end, _ in other_words:
		if start < other_end and end > other_start:
			return False
	return True

# Create frame
def formatTable(words, cellWidth, cellHeight, padding=1, cells=6):
	def makeRow (height):
		return [''] * cellHeight

	def makeLine (width, text = '', fillChar=' ', padding=1):
		text = (padding * fillChar) + text
		return text.center(width - padding, ' ') + (padding * fillChar)
  
	out = ""

	row = makeRow(cellHeight)

	cell = 0

	for word in words:
		wrapped = textwrap.wrap(word, width=cellWidth - (2 * padding))
		lines = padding * [makeLine(cellWidth, padding=padding)]

		for line in wrapped:
			lines.append(makeLine(cellWidth, text=line))

		for _ in range(cellHeight - len(lines)):
			lines.append(makeLine(cellWidth, padding=padding))

		cell += 1

		for (i, line) in enumerate(lines):
			row[i] += line

		if cell == cells:
			out += '\n'.join(row)
			row = makeRow(cellHeight)
			cell = 0

	if cell > 0:
		out += '\n'.join(row)
		row = makeRow(cellHeight)

	return out

# print statements for interaction with reader
def pick_new_main_tree_terminal(fragment, all_simple, file_out=sys.stdout):
	print(fragment, file=file_out)
	## ask input reader
	print("\nIn order for Levenshtein Distance to read the text and produce a unique book, you need to change one word.", file=file_out)
	print("\nYou can choose another tree species for the eucalyptus, the main tree in the fragment. The Levenshtein Distance will then calculate which species is in its vicinity and the more generic word 'trees' will be replaced in the fragment by this new species.", file=file_out)
	print("\nPlease choose the main tree from the list below:", file=file_out)
	print(', '.join(all_simple), file=file_out)
	print("\nI would like to replace eucalyptus with:", file=file_out)
	new_main_tree = input()
	while new_main_tree not in all_simple:
			print("Maybe you misspelled the name of the species, please try again.", file=file_out)
			new_main_tree = input()
	return new_main_tree

# Levenshtein Distance between new main tree and other tree species
def calculate_distances (center_tree, other_trees):
	# Declare dictionary object
	distances = {}
	# For every species in the list, compare the main tree to the selected species and count the similarity between both
	for tree in other_trees:
		# using the Levenhstein Distance algorithm
		# count = levenshtein(new_main_tree,tree)
		count = Levenshtein.distance(center_tree, tree)
		# save each compared species and its similarity count the dictionary
		distances[tree] = count

	return distances


def sort_distances (distances):
	return sorted(distances.items(), key = lambda kv: kv[1])


# Find the minimum distance between new main tree and near species
def find_nearest_species(sorted_distances, trees):
	# First entry in sorted_distances is the new_main_tree, distance 0
	# pick the next value
	minimum_distance = sorted_distances[1][1]
	possible_trees = []

	for tree in sorted_distances[1:]:
		if tree[1] == minimum_distance:
			possible_trees.append(tree)
		else:
			break

	near_species = random.choice(possible_trees)[0]
	nearest_species = trees[near_species]

	return (near_species, nearest_species)

# rewrite fragment Cortazar
def generate_new_fragment(fragment, new_main_tree, nearest_species):
	new_fragment = fragment.replace("eucalyptus", new_main_tree)
	new_fragment = new_fragment.replace("trees", nearest_species)
	new_fragment = new_fragment.replace("A fama is walking", "An ignorant fama is walking")

	return new_fragment

# generate in between species and show the process of the Levenhstein Distance algorithm 
def generate_in_between_species (new_main_tree, near_species, file_out=sys.stdout):
	# Define length of main character tree and major species
	length_main_character_tree = len(new_main_tree)
	length_near_species = len(near_species)

	# Declare a list of in between words showing the process
	in_between_species = []
	# Loop over every number until reaching the total lenght of the original word
	for cell in range(length_main_character_tree):
		#print('row number: ', element)
		# Loop over every number until reaching the total lenght of the replacement word
		for number in range(length_near_species):
			#print('column number: ', number)
			# select the number of letters +1 of the original word
			part1 = new_main_tree[:cell+1]
			print('The Levenshtein Distance observes a part of',new_main_tree, ':', part1, file=file_out)
			# select the number of letters +1 of the replacement word
			part2 = near_species[:number+1]
			print('Next it observes a part of', near_species, ':', part2, file=file_out)
			# selected letters of the original words replace the selected letters of the replacement word
			new_species = part1 + near_species[number+1:]
			print('In its attempt to compare it replaces the part of', near_species, 'for the part of', new_main_tree, 'and as such, it creates a new intermediary species, the', new_species, file=file_out)
			# add in between words to the list
			in_between_species.append(new_species)
			# calculate the similarity between in between words
			print('It calculates the actions needed to replace', new_main_tree[:cell+1], 'by', near_species[:number+1], ': ', Levenshtein.distance(new_main_tree[:cell+1], near_species[:number+1]), '\n', file=file_out)
			# print('\n', file=file_out)

	## Print all in between words
	#print('in between species: ', in_between_species, file=file_out)

	return in_between_species

# Draw a map of all in between species and their distance to main tree
def print_map(sorted_distances, show_distances=True, file_out=sys.stdout):
	# As characters are less wide than high make them a bit wider
	xscale = 2
	# Height of the map
	height = 70
	# Width of the map
	width = int(70 * xscale)
	# Centerpoint of the map
	middle = (int(30 * xscale), 35)

	grid = [[] for x in range(height + 1)]

	centerpoint = sorted_distances[0][0]
	start = middle[0] - max(0, int(len(centerpoint) / 2))

	grid[middle[1]].append((start, start + len(centerpoint), centerpoint))

	for treename, distance in sorted_distances[1:]:
		placed = False
		
		treename = ' {}({}) '.format(treename, distance) if show_distances else ' {} '.format(treename)
		angle = random.random() * math.pi * 2
		step = 0
		steps = 180
		while (not placed) and step < steps:
		
			x = int(math.floor(math.cos(angle) * (distance * 2.8)) * xscale + middle[0])
			y = int(math.floor(math.sin(angle) * (distance * 2.8)) + middle[1])

			start = x - max(0, int(len(treename) / 2))
			end = start + len(treename)
		
			if space_available(grid, start, end, y):
				grid[y].append((start, end, treename))
				placed = True
		
			angle += (math.pi * 2 / steps)
			step += 1

		if not placed:
			print('Could not place {}'.format(treename), file=file_out)
		# print(angle, x, y)

	for row in grid:
		# Sort by first key of the tuples, start of the word
		row = sorted(row, key=lambda r: r[0])

		if len(row):
			line = ''
			for start, _, treename in row:
				line += ((start) - len(line)) * ' ' + treename

			print(line, file=file_out)

# draw table with all new intermediary species
def print_table(new_main_tree, near_species, in_between_species, file_out=sys.stdout):
	## 8. Print tabel
	print('{} → {}'.format(new_main_tree, near_species), file=file_out)
	print('', file=file_out)
	print(formatTable(in_between_species, 20, 5, cells=len(near_species)), file=file_out)

# Execute functions
if __name__ == '__main__':
	## 1A. Open textfiles & turn textfiles in machine readable lists

	# Cortazar
	txt = 'eucalipto_cortazar_en.txt'
	# Open textfile
	all_text = openfile(txt)

	# List of trees
	txt1 = 'arboles_simple_en.txt'
	txt2 = 'arboles_plural_en.txt'
	all_simple = openfile(txt1)
	all_plural = openfile(txt2)

	## 1B. Turn list of trees into dictionary of single/plural words
	trees = dict(zip(all_simple, all_plural))

	## 2. HOMEPAGE print statements
	## print fragment Cortazar
	with open(txt, 'r') as source:
		fragment = source.read()

	## 2b. Ask user to pick a new main tree
	new_main_tree = pick_new_main_tree_terminal(fragment, all_simple, sys.stdout)
	
	## 3. Count the similarity between the main tree and the other species in the forest
	distances = calculate_distances(new_main_tree, all_simple)
	
	## 4. Sort the distances between the trees and the new main tree from the lowest to the highest counts
	sorted_distances = sort_distances(distances)

	# Find the nearest species
	near_species, nearest_species = find_nearest_species(sorted_distances, trees)

	# Print rewritten fragment
	print("\n")
	new_fragment = generate_new_fragment(fragment, new_main_tree, nearest_species)
	print(new_fragment, file=sys.stdout)

	## 6. Compare the similarity between the main character tree and the main species in the forest, 
	in_between_species = generate_in_between_species(new_main_tree, near_species, file_out=sys.stdout)

	# Show the sorted distances dictionary
	# print('Sorted distances: ', sorted_distances, file=sys.stdout)
	# print('\n', file=sys.stdout)

	# generate_new_text(fragment, new_main_tree, all_simple, all_plural, trees, sys.stdout)
	## 7. Generate map of the woods
	print_map(sorted_distances, file_out=sys.stdout)

	## 8. Generate intermediary species table
	print_table(new_main_tree, near_species, in_between_species, sys.stdout)