import csv # with open("../original_files/data_export_henry_selection.csv", 'r') as file: # csv_reader = csv.DictReader(file) # for each_row in csv_reader: # print(each_row) # # quit() # load dataset as a dictionary and save selected columns in a list of lists fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"] collection = [] with open("../original_files/data_export_henry.csv", 'r') as file: csv_reader = csv.DictReader(file) for each_row in csv_reader: barcode = each_row["BARCODE_FULL"] name_current= each_row["CURRENT_NAME"] name_type = each_row["NOMEN_TYPE_NAME"] type = each_row["NOMEN_TYPE_KIND_FULL"] date = each_row["COLL_DT_DISP"] country = each_row["COUNTRY_FULL"] specimen = [barcode, name_current, name_type, type, date, country] collection.append(specimen) #print(collection) #[['Acanthodium hirtum Hochst. ex Nees', 'Isotype', '11 NOV 1839', 'Sudan'], \ #['Acanthus dioscoridis subsp. longistylis Freyn', 'Isotype', '2 JUL 1888', 'Turkey'], \ # ----------------------------- # count Types # types = [] # for specimen in collection: # if specimen[1] == 'Type': # types.append(specimen) # # print(len(types)) # ----------------------------- # selection of Types only collected in Congo, Rwanda, Burundi between 1885 and 1960 types_belgian_colony = [] countries = ["Congo", "Rwanda", "Burundi"] for specimen in collection: if specimen[3] == 'Type': year = specimen[4].split(' ')[-1] if year >= str(1885) and year <= str(1960): for country in countries: if country in specimen[5]: types_belgian_colony.append(specimen) # remove all doubles set_types_belgian_colony = [] names_cited = set() for type in types_belgian_colony: name = type[1] name = name.lower() if name not in names_cited: names_cited.add(name) set_types_belgian_colony.append(type) else: print('plant is already there') #print(names_cited) print(set_types_belgian_colony) #save selection to new csv with open('selection_data_export_barcode_cleaned.csv', 'w') as destination: for type in set_types_belgian_colony: writer = csv.writer(destination) writer.writerow(type) # -------------------------------- # extract names of specimens and isolate name of describer (last element) names_plants = [] describers = set() for type in types_belgian_colony: describer = type[0].split(' ')[-1] describers.add(describer) # print(describers) # {'Wild.', 'Vollesen', 'Bornm.', 'Büttner', 'Nees', 'Freyn'} # with open('describers_short_sample.txt', 'w') as destination: # for describer in describers: # destination.write(describer) # destination.write('\n')