From c3eb3d82e5ffe7ce53833012e1ca9f2ad2063956 Mon Sep 17 00:00:00 2001 From: ana Date: Wed, 8 Jun 2022 18:03:23 +0200 Subject: [PATCH] script that extracts data from offocial db of BG Meise --- scripts/export_selection_data.py | 87 ++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 scripts/export_selection_data.py diff --git a/scripts/export_selection_data.py b/scripts/export_selection_data.py new file mode 100644 index 0000000..5413e00 --- /dev/null +++ b/scripts/export_selection_data.py @@ -0,0 +1,87 @@ +import csv + +# with open("../original_files/data_export_henry_selection.csv", 'r') as file: +# csv_reader = csv.DictReader(file) +# for each_row in csv_reader: +# print(each_row) +# +# quit() + + +# load dataset as a dictionary and save selected columns in a list of lists +fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"] +collection = [] +with open("../original_files/data_export_henry.csv", 'r') as file: + csv_reader = csv.DictReader(file) + for each_row in csv_reader: + barcode = each_row["BARCODE_FULL"] + name_current= each_row["CURRENT_NAME"] + name_type = each_row["NOMEN_TYPE_NAME"] + type = each_row["NOMEN_TYPE_KIND_FULL"] + date = each_row["COLL_DT_DISP"] + country = each_row["COUNTRY_FULL"] + specimen = [barcode, name_current, name_type, type, date, country] + collection.append(specimen) +#print(collection) + +#[['Acanthodium hirtum Hochst. ex Nees', 'Isotype', '11 NOV 1839', 'Sudan'], \ +#['Acanthus dioscoridis subsp. longistylis Freyn', 'Isotype', '2 JUL 1888', 'Turkey'], \ + +# ----------------------------- + +# count Types +# types = [] +# for specimen in collection: +# if specimen[1] == 'Type': +# types.append(specimen) +# # print(len(types)) + +# ----------------------------- + +# selection of Types only collected in Congo, Rwanda, Burundi between 1885 and 1960 +types_belgian_colony = [] +countries = ["Congo", "Rwanda", "Burundi"] +for specimen in collection: + if specimen[3] == 'Type': + year = specimen[4].split(' ')[-1] + if year >= str(1885) and year <= str(1960): + for country in countries: + if country in specimen[5]: + types_belgian_colony.append(specimen) + +# remove all doubles +set_types_belgian_colony = [] +names_cited = set() +for type in types_belgian_colony: + name = type[1] + name = name.lower() + if name not in names_cited: + names_cited.add(name) + set_types_belgian_colony.append(type) + else: + print('plant is already there') + +#print(names_cited) +print(set_types_belgian_colony) + + +#save selection to new csv +with open('selection_data_export_barcode_cleaned.csv', 'w') as destination: + for type in set_types_belgian_colony: + writer = csv.writer(destination) + writer.writerow(type) +# -------------------------------- + +# extract names of specimens and isolate name of describer (last element) +names_plants = [] +describers = set() +for type in types_belgian_colony: + describer = type[0].split(' ')[-1] + describers.add(describer) +# print(describers) +# {'Wild.', 'Vollesen', 'Bornm.', 'Büttner', 'Nees', 'Freyn'} + +# with open('describers_short_sample.txt', 'w') as destination: +# for describer in describers: +# destination.write(describer) +# destination.write('\n')