From 167331c23f1680dc6cc1cc0fb6917bb9fda5cb08 Mon Sep 17 00:00:00 2001 From: ana Date: Thu, 9 Jun 2022 16:29:18 +0200 Subject: [PATCH] script combines 2 datasets based on 3rd element of plantname --- scripts/combining_datasets.py | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/combining_datasets.py diff --git a/scripts/combining_datasets.py b/scripts/combining_datasets.py new file mode 100644 index 0000000..e49be50 --- /dev/null +++ b/scripts/combining_datasets.py @@ -0,0 +1,53 @@ +import csv + +# Find all data of cleaned up set with barcodes +fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"] +collection = [] +with open("selection_data_export_barcode_cleaned.csv", 'r') as file: + csv_reader = csv.DictReader(file) + for each_row in csv_reader: + barcode = each_row["BARCODE_FULL"] + name_current= each_row["CURRENT_NAME"] + name_type = each_row["NOMEN_TYPE_NAME"] + type = each_row["NOMEN_TYPE_KIND_FULL"] + date = each_row["COLL_DT_DISP"] + country = each_row["COUNTRY_FULL"] + specimen = [barcode, name_current, name_type, type, date, country] + collection.append(specimen) +#print(collection) + +# find all data of cleaned up set describers +describers = [] +with open("describers_short_long.csv", 'r') as source: + csv_reader = csv.DictReader(source) + for each_row in csv_reader: + short_name = each_row["3rd element"] + full_name = each_row["Name_describer"] + url_ipni = each_row["IPNI"] + url_wikipedia = each_row["Wikipedia"] + amount = each_row["Amount"] + gender = each_row["Gender"] + describer = [short_name, full_name, url_ipni, url_wikipedia, amount, gender] + describers.append(describer) +#print(describers) + +# link data about describers at each of the specimen of the collection, based of 3rd element of type name +belgian_colony_data_all = [] +enhanced_specimen = [] +for describer in describers: + short = describer[0] + #print ('short:', short) + for specimen in collection: + third_element = specimen[2].split(' ')[-1] + if short == third_element: + #print('short == specimen', short, specimen) + enhanced_specimen = specimen + describer + #print('short:', short, enhanced_specimen) + belgian_colony_data_all.append(enhanced_specimen) + + +# generate new csv file with all linked data +with open("belgian_colony_data_all.csv", "w") as destination: + writer = csv.writer(destination) + for enhanced_specimen in belgian_colony_data_all: + writer.writerow(enhanced_specimen)