From c3eb3d82e5ffe7ce53833012e1ca9f2ad2063956 Mon Sep 17 00:00:00 2001
From: ana <an@collectifs.net>
Date: Wed, 8 Jun 2022 18:03:23 +0200
Subject: [PATCH] script that extracts data from offocial db of BG Meise

---
 scripts/export_selection_data.py | 87 ++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 scripts/export_selection_data.py

diff --git a/scripts/export_selection_data.py b/scripts/export_selection_data.py
new file mode 100644
index 0000000..5413e00
--- /dev/null
+++ b/scripts/export_selection_data.py
@@ -0,0 +1,87 @@
+import csv
+
+# with open("../original_files/data_export_henry_selection.csv", 'r') as file:
+# 	csv_reader = csv.DictReader(file)
+# 	for each_row in csv_reader:
+# 		print(each_row)
+#
+# quit()
+
+
+# load dataset as a dictionary and save selected columns in a list of lists
+fieldnames = ["BARCODE_FULL", "CURRENT_NAME", "NOMEN_TYPE_NAME", "NOMEN_TYPE_KIND_FULL", "COLL_DT_DISP","COUNTRY_FULL"]
+collection = []
+with open("../original_files/data_export_henry.csv", 'r') as file:
+	csv_reader = csv.DictReader(file)
+	for each_row in csv_reader:
+		barcode = each_row["BARCODE_FULL"]
+		name_current= each_row["CURRENT_NAME"]
+		name_type = each_row["NOMEN_TYPE_NAME"]
+		type = each_row["NOMEN_TYPE_KIND_FULL"]
+		date = each_row["COLL_DT_DISP"]
+		country = each_row["COUNTRY_FULL"]
+		specimen = [barcode, name_current, name_type, type, date, country]
+		collection.append(specimen)
+#print(collection)
+
+#[['Acanthodium hirtum Hochst. ex Nees', 'Isotype', '11 NOV 1839', 'Sudan'], \
+#['Acanthus dioscoridis subsp. longistylis Freyn', 'Isotype', '2 JUL 1888', 'Turkey'], \
+
+# -----------------------------
+
+# count Types
+# types = []
+# for specimen in collection:
+# 	if specimen[1] == 'Type':
+# 		types.append(specimen)
+# # print(len(types))
+
+# -----------------------------
+
+# selection of Types only collected in Congo, Rwanda, Burundi between 1885 and 1960
+types_belgian_colony = []
+countries = ["Congo", "Rwanda", "Burundi"]
+for specimen in collection:
+	if specimen[3] == 'Type':
+		year = specimen[4].split(' ')[-1]
+		if year >= str(1885) and year <= str(1960):
+			for country in countries:
+				if country in specimen[5]:
+					types_belgian_colony.append(specimen)
+
+# remove all doubles
+set_types_belgian_colony = []
+names_cited = set()
+for type in types_belgian_colony:
+	name = type[1]
+	name = name.lower()
+	if name not in names_cited:
+		names_cited.add(name)
+		set_types_belgian_colony.append(type)
+	else:
+		print('plant is already there')
+
+#print(names_cited)
+print(set_types_belgian_colony)
+
+
+#save selection to new csv
+with open('selection_data_export_barcode_cleaned.csv', 'w') as destination:
+	for type in set_types_belgian_colony:
+		writer = csv.writer(destination)
+		writer.writerow(type)
+# --------------------------------
+
+# extract names of specimens and isolate name of describer (last element)
+names_plants = []
+describers = set()
+for type in types_belgian_colony:
+	describer = type[0].split(' ')[-1]
+	describers.add(describer)
+# print(describers)
+# {'Wild.', 'Vollesen', 'Bornm.', 'Büttner', 'Nees', 'Freyn'}
+
+# with open('describers_short_sample.txt', 'w') as destination:
+# 	for describer in describers:
+# 		destination.write(describer)
+# 		destination.write('\n')