Levenshtein-Distance-lee-a-.../scrape_html_page.py

#!/usr/bin/env/ python
# encoding=utf8 

from bs4 import BeautifulSoup
import requests

species = []
name_species = ''

url = "https://www.arbolapp.es/especies-nombre-cientifico/"

# Getting the webpage, creating a Response object.
response = requests.get(url)

if response:
	# Extracting the source code of the page.
	data = response.text
	soup = BeautifulSoup(data, 'lxml')
	# find all elements inside a div element of class contenido
	selector = 'div.contenido > h4'
	# find elements that contain the data we want
	found = soup.select(selector)
	for element in found:
		heading_data = element.text
		print(heading_data.lower())
	# print('soup:', soup)
	# for link in soup.find_all("a"):
	# 	url = link.get("href", "")
	# 	print('url:', url)
	# 	if "/wiki/" in url:
	# 		name_species = url.replace("/wiki/", "")
	# 	species.append(name_species)

	# 	destination = "List_of_tree_genera.txt"
	# 	with open(destination, 'w') as source:
	# 			for specie in species:
	# 				source.write(specie)
	# 				source.write('\n')
else:
	pass

# complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"]

'''
comments:
Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa

'''
functional copy of the folder in Algolit repository -Algoliterary Publishing- where this project is still saved. 4 years ago			`#!/usr/bin/env/ python`
			`# encoding=utf8`

			`from bs4 import BeautifulSoup`
			`import requests`

			`species = []`
			`name_species = ''`

			`url = "https://www.arbolapp.es/especies-nombre-cientifico/"`

			`# Getting the webpage, creating a Response object.`
			`response = requests.get(url)`

			`if response:`
			`# Extracting the source code of the page.`
			`data = response.text`
			`soup = BeautifulSoup(data, 'lxml')`
			`# find all elements inside a div element of class contenido`
			`selector = 'div.contenido > h4'`
			`# find elements that contain the data we want`
			`found = soup.select(selector)`
			`for element in found:`
			`heading_data = element.text`
			`print(heading_data.lower())`
			`# print('soup:', soup)`
			`# for link in soup.find_all("a"):`
			`# url = link.get("href", "")`
			`# print('url:', url)`
			`# if "/wiki/" in url:`
			`# name_species = url.replace("/wiki/", "")`
			`# species.append(name_species)`

			`# destination = "List_of_tree_genera.txt"`
			`# with open(destination, 'w') as source:`
			`# for specie in species:`
			`# source.write(specie)`
			`# source.write('\n')`
			`else:`
			`pass`

			`# complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"]`

			`'''`
			`comments:`
			`Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa`

			`'''`