You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
#!/usr/bin/env/ python
|
|
# encoding=utf8
|
|
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
|
|
species = []
|
|
name_species = ''
|
|
|
|
url = "https://www.arbolapp.es/especies-nombre-cientifico/"
|
|
|
|
# Getting the webpage, creating a Response object.
|
|
response = requests.get(url)
|
|
|
|
if response:
|
|
# Extracting the source code of the page.
|
|
data = response.text
|
|
soup = BeautifulSoup(data, 'lxml')
|
|
# find all elements inside a div element of class contenido
|
|
selector = 'div.contenido > h4'
|
|
# find elements that contain the data we want
|
|
found = soup.select(selector)
|
|
for element in found:
|
|
heading_data = element.text
|
|
print(heading_data.lower())
|
|
# print('soup:', soup)
|
|
# for link in soup.find_all("a"):
|
|
# url = link.get("href", "")
|
|
# print('url:', url)
|
|
# if "/wiki/" in url:
|
|
# name_species = url.replace("/wiki/", "")
|
|
# species.append(name_species)
|
|
|
|
# destination = "List_of_tree_genera.txt"
|
|
# with open(destination, 'w') as source:
|
|
# for specie in species:
|
|
# source.write(specie)
|
|
# source.write('\n')
|
|
else:
|
|
pass
|
|
|
|
# complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"]
|
|
|
|
'''
|
|
comments:
|
|
Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa
|
|
|
|
''' |