import csv

# with open("../original_files/data_export_henry_selection.csv", 'r') as file:
# 	csv_reader = csv.DictReader(file)
# 	for each_row in csv_reader:
# 		print(each_row)
#
# quit()


# load dataset as a dictionary and save selected columns in a list of lists
fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"]
collection = []
with open("../original_files/data_export_henry.csv", 'r') as file:
	csv_reader = csv.DictReader(file)
	for each_row in csv_reader:
		barcode = each_row["BARCODE_FULL"]
		name_current= each_row["CURRENT_NAME"]
		name_type = each_row["NOMEN_TYPE_NAME"]
		type = each_row["NOMEN_TYPE_KIND_FULL"]
		date = each_row["COLL_DT_DISP"]
		country = each_row["COUNTRY_FULL"]
		specimen = [barcode, name_current, name_type, type, date, country]
		collection.append(specimen)
#print(collection)

#[['Acanthodium hirtum Hochst. ex Nees', 'Isotype', '11 NOV 1839', 'Sudan'], \
#['Acanthus dioscoridis subsp. longistylis Freyn', 'Isotype', '2 JUL 1888', 'Turkey'], \

# -----------------------------

# count Types
# types = []
# for specimen in collection:
# 	if specimen[1] == 'Type':
# 		types.append(specimen)
# # print(len(types))

# -----------------------------

# selection of Types only collected in Congo, Rwanda, Burundi between 1885 and 1960
types_belgian_colony = []
countries = ["Congo", "Rwanda", "Burundi"]
for specimen in collection:
	if specimen[3] == 'Type':
		year = specimen[4].split(' ')[-1]
		if year >= str(1885) and year <= str(1960):
			for country in countries:
				if country in specimen[5]:
					types_belgian_colony.append(specimen)

# remove all doubles
set_types_belgian_colony = []
names_cited = set()
for type in types_belgian_colony:
	name = type[1]
	name = name.lower()
	if name not in names_cited:
		names_cited.add(name)
		set_types_belgian_colony.append(type)
	else:
		print('plant is already there')

#print(names_cited)
print(set_types_belgian_colony)


#save selection to new csv
with open('selection_data_export_barcode_cleaned.csv', 'w') as destination:
	for type in set_types_belgian_colony:
		writer = csv.writer(destination)
		writer.writerow(type)
# --------------------------------

# extract names of specimens and isolate name of describer (last element)
names_plants = []
describers = set()
for type in types_belgian_colony:
	describer = type[0].split(' ')[-1]
	describers.add(describer)
# print(describers)
# {'Wild.', 'Vollesen', 'Bornm.', 'Büttner', 'Nees', 'Freyn'}

# with open('describers_short_sample.txt', 'w') as destination:
# 	for describer in describers:
# 		destination.write(describer)
# 		destination.write('\n')