script that scrapes screenshots describers plants
parent
d94611313c
commit
5c6401f935
@ -0,0 +1,39 @@
|
|||||||
|
import csv
|
||||||
|
from selenium import webdriver
|
||||||
|
from time import sleep
|
||||||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
|
# declare driver
|
||||||
|
driver = webdriver.Chrome(ChromeDriverManager().install())
|
||||||
|
|
||||||
|
# get data
|
||||||
|
urls = []
|
||||||
|
names = []
|
||||||
|
with open("describers_short_long.csv", 'r') as source:
|
||||||
|
csv_reader = csv.DictReader(source)
|
||||||
|
for each_row in csv_reader:
|
||||||
|
name = each_row["Short"]
|
||||||
|
names.append(name)
|
||||||
|
url_wikipedia = each_row["Wikipedia"]
|
||||||
|
urls.append(url_wikipedia)
|
||||||
|
|
||||||
|
# get screenshots, with 30 seconds break after 7 requests
|
||||||
|
rounds = 0
|
||||||
|
position = 0
|
||||||
|
for url in urls:
|
||||||
|
print('url:', url)
|
||||||
|
name = names[position]
|
||||||
|
filename = name+".png"
|
||||||
|
print('filename:', filename)
|
||||||
|
driver.get(url)
|
||||||
|
sleep(3)
|
||||||
|
driver.get_screenshot_as_file(filename)
|
||||||
|
sleep(3)
|
||||||
|
position +=1
|
||||||
|
rounds += 1
|
||||||
|
if rounds == 5:
|
||||||
|
sleep(60)
|
||||||
|
rounds == 0
|
||||||
|
|
||||||
|
driver.quit()
|
||||||
|
print("end...")
|
Loading…
Reference in New Issue