diff --git a/scripts/scrape_screenshots.py b/scripts/scrape_screenshots.py new file mode 100644 index 0000000..ea15f85 --- /dev/null +++ b/scripts/scrape_screenshots.py @@ -0,0 +1,39 @@ +import csv +from selenium import webdriver +from time import sleep +from webdriver_manager.chrome import ChromeDriverManager + +# declare driver +driver = webdriver.Chrome(ChromeDriverManager().install()) + +# get data +urls = [] +names = [] +with open("describers_short_long.csv", 'r') as source: + csv_reader = csv.DictReader(source) + for each_row in csv_reader: + name = each_row["Short"] + names.append(name) + url_wikipedia = each_row["Wikipedia"] + urls.append(url_wikipedia) + +# get screenshots, with 30 seconds break after 7 requests +rounds = 0 +position = 0 +for url in urls: + print('url:', url) + name = names[position] + filename = name+".png" + print('filename:', filename) + driver.get(url) + sleep(3) + driver.get_screenshot_as_file(filename) + sleep(3) + position +=1 + rounds += 1 + if rounds == 5: + sleep(60) + rounds == 0 + +driver.quit() +print("end...")