script combines 2 datasets based on 3rd element of plantname

4 years ago · 167331c23f
parent dcb226b3a4
commit 167331c23f
1 changed files with 53 additions and 0 deletions
--- a/scripts/combining_datasets.py
+++ b/scripts/combining_datasets.py
@ -0,0 +1,53 @@
+import csv
+
+# Find all data of cleaned up set with barcodes
+fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"]
+collection = []
+with open("selection_data_export_barcode_cleaned.csv", 'r') as file:
+	csv_reader = csv.DictReader(file)
+	for each_row in csv_reader:
+		barcode = each_row["BARCODE_FULL"]
+		name_current= each_row["CURRENT_NAME"]
+		name_type = each_row["NOMEN_TYPE_NAME"]
+		type = each_row["NOMEN_TYPE_KIND_FULL"]
+		date = each_row["COLL_DT_DISP"]
+		country = each_row["COUNTRY_FULL"]
+		specimen = [barcode, name_current, name_type, type, date, country]
+		collection.append(specimen)
+#print(collection)
+
+# find all data of cleaned up set describers
+describers = []
+with open("describers_short_long.csv", 'r') as source:
+	csv_reader = csv.DictReader(source)
+	for each_row in csv_reader:
+		short_name = each_row["3rd element"]
+		full_name = each_row["Name_describer"]
+		url_ipni = each_row["IPNI"]
+		url_wikipedia = each_row["Wikipedia"]
+		amount = each_row["Amount"]
+		gender = each_row["Gender"]
+		describer = [short_name, full_name, url_ipni, url_wikipedia, amount, gender]
+		describers.append(describer)
+#print(describers)
+
+# link data about describers at each of the specimen of the collection, based of 3rd element of type name
+belgian_colony_data_all = []
+enhanced_specimen = []
+for describer in describers:
+	short = describer[0]
+	#print ('short:', short)
+	for specimen in collection:
+		third_element = specimen[2].split(' ')[-1]
+		if short == third_element:
+			#print('short == specimen', short, specimen)
+			enhanced_specimen = specimen + describer
+			#print('short:', short, enhanced_specimen)
+			belgian_colony_data_all.append(enhanced_specimen)
+
+
+# generate new csv file with all linked data
+with open("belgian_colony_data_all.csv", "w") as destination:
+		writer = csv.writer(destination)
+		for enhanced_specimen in belgian_colony_data_all:
+			writer.writerow(enhanced_specimen)