#!/usr/bin/env/ python # encoding=utf8 from bs4 import BeautifulSoup import requests species = [] name_species = '' url = "https://www.arbolapp.es/especies-nombre-cientifico/" # Getting the webpage, creating a Response object. response = requests.get(url) if response: # Extracting the source code of the page. data = response.text soup = BeautifulSoup(data, 'lxml') # find all elements inside a div element of class contenido selector = 'div.contenido > h4' # find elements that contain the data we want found = soup.select(selector) for element in found: heading_data = element.text print(heading_data.lower()) # print('soup:', soup) # for link in soup.find_all("a"): # url = link.get("href", "") # print('url:', url) # if "/wiki/" in url: # name_species = url.replace("/wiki/", "") # species.append(name_species) # destination = "List_of_tree_genera.txt" # with open(destination, 'w') as source: # for specie in species: # source.write(specie) # source.write('\n') else: pass # complete_links =["https://en.wikipedia.org/wiki/", "https://es.wikipedia.org/wiki/", "https://fr.wikipedia.org/wiki/", "https://nl.wikipedia.org/wiki/"] ''' comments: Trees of Africa refer to all countries listed here: https://en.wikipedia.org/wiki/Ecoregions_of_Africa '''