script that extracts data from offocial db of BG Meise
							parent
							
								
									312cb1d1d1
								
							
						
					
					
						commit
						c3eb3d82e5
					
				| @ -0,0 +1,87 @@ | ||||
| import csv | ||||
| 
 | ||||
| # with open("../original_files/data_export_henry_selection.csv", 'r') as file: | ||||
| # 	csv_reader = csv.DictReader(file) | ||||
| # 	for each_row in csv_reader: | ||||
| # 		print(each_row) | ||||
| # | ||||
| # quit() | ||||
| 
 | ||||
| 
 | ||||
| # load dataset as a dictionary and save selected columns in a list of lists | ||||
| fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"] | ||||
| collection = [] | ||||
| with open("../original_files/data_export_henry.csv", 'r') as file: | ||||
| 	csv_reader = csv.DictReader(file) | ||||
| 	for each_row in csv_reader: | ||||
| 		barcode = each_row["BARCODE_FULL"] | ||||
| 		name_current= each_row["CURRENT_NAME"] | ||||
| 		name_type = each_row["NOMEN_TYPE_NAME"] | ||||
| 		type = each_row["NOMEN_TYPE_KIND_FULL"] | ||||
| 		date = each_row["COLL_DT_DISP"] | ||||
| 		country = each_row["COUNTRY_FULL"] | ||||
| 		specimen = [barcode, name_current, name_type, type, date, country] | ||||
| 		collection.append(specimen) | ||||
| #print(collection) | ||||
| 
 | ||||
| #[['Acanthodium hirtum Hochst. ex Nees', 'Isotype', '11 NOV 1839', 'Sudan'], \ | ||||
| #['Acanthus dioscoridis subsp. longistylis Freyn', 'Isotype', '2 JUL 1888', 'Turkey'], \ | ||||
| 
 | ||||
| # ----------------------------- | ||||
| 
 | ||||
| # count Types | ||||
| # types = [] | ||||
| # for specimen in collection: | ||||
| # 	if specimen[1] == 'Type': | ||||
| # 		types.append(specimen) | ||||
| # # print(len(types)) | ||||
| 
 | ||||
| # ----------------------------- | ||||
| 
 | ||||
| # selection of Types only collected in Congo, Rwanda, Burundi between 1885 and 1960 | ||||
| types_belgian_colony = [] | ||||
| countries = ["Congo", "Rwanda", "Burundi"] | ||||
| for specimen in collection: | ||||
| 	if specimen[3] == 'Type': | ||||
| 		year = specimen[4].split(' ')[-1] | ||||
| 		if year >= str(1885) and year <= str(1960): | ||||
| 			for country in countries: | ||||
| 				if country in specimen[5]: | ||||
| 					types_belgian_colony.append(specimen) | ||||
| 
 | ||||
| # remove all doubles | ||||
| set_types_belgian_colony = [] | ||||
| names_cited = set() | ||||
| for type in types_belgian_colony: | ||||
| 	name = type[1] | ||||
| 	name = name.lower() | ||||
| 	if name not in names_cited: | ||||
| 		names_cited.add(name) | ||||
| 		set_types_belgian_colony.append(type) | ||||
| 	else: | ||||
| 		print('plant is already there') | ||||
| 
 | ||||
| #print(names_cited) | ||||
| print(set_types_belgian_colony) | ||||
| 
 | ||||
| 
 | ||||
| #save selection to new csv | ||||
| with open('selection_data_export_barcode_cleaned.csv', 'w') as destination: | ||||
| 	for type in set_types_belgian_colony: | ||||
| 		writer = csv.writer(destination) | ||||
| 		writer.writerow(type) | ||||
| # -------------------------------- | ||||
| 
 | ||||
| # extract names of specimens and isolate name of describer (last element) | ||||
| names_plants = [] | ||||
| describers = set() | ||||
| for type in types_belgian_colony: | ||||
| 	describer = type[0].split(' ')[-1] | ||||
| 	describers.add(describer) | ||||
| # print(describers) | ||||
| # {'Wild.', 'Vollesen', 'Bornm.', 'Büttner', 'Nees', 'Freyn'} | ||||
| 
 | ||||
| # with open('describers_short_sample.txt', 'w') as destination: | ||||
| # 	for describer in describers: | ||||
| # 		destination.write(describer) | ||||
| # 		destination.write('\n') | ||||
					Loading…
					
					
				
		Reference in New Issue