Levenshtein-Distance-lee-a-.../scrape_html_page.py

#!/usr/bin/env/ python
# encoding=utf8

from bs4 import BeautifulSoup
import requests

species = []
name_species = ''

url = "https://www.arbolapp.es/especies-nombre-cientifico/"

# Getting the webpage, creating a Response object.
response = requests.get(url)

if response:
	# Extracting the source code of the page.
	data = response.text
	soup = BeautifulSoup(data, 'lxml')
	# find all elements inside a div element of class contenido
	selector = 'div.contenido > h4'
	# find elements that contain the data we want
	found = soup.select(selector)
	for element in found:
		heading_data = element.text
		print(heading_data.lower())
	# print('soup:', soup)
	# for link in soup.find_all("a"):
	# 	url = link.get("href", "")
	# 	print('url:', url)
	# 	if "/wiki/" in url:
	# 		name_species = url.replace("/wiki/", "")
	# 	species.append(name_species)

	# 	destination = "List_of_tree_genera.txt"
	# 	with open(destination, 'w') as source:
	# 			for specie in species:
	# 				source.write(specie)
	# 				source.write('\n')
else:
	pass

# complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"]

'''
comments:
Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa

'''