script that scrapes screenshots describers plants
							parent
							
								
									d94611313c
								
							
						
					
					
						commit
						5c6401f935
					
				@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					import csv
 | 
				
			||||||
 | 
					from selenium import webdriver
 | 
				
			||||||
 | 
					from time import sleep
 | 
				
			||||||
 | 
					from webdriver_manager.chrome import ChromeDriverManager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# declare driver
 | 
				
			||||||
 | 
					driver = webdriver.Chrome(ChromeDriverManager().install())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get data
 | 
				
			||||||
 | 
					urls = []
 | 
				
			||||||
 | 
					names = []
 | 
				
			||||||
 | 
					with open("describers_short_long.csv", 'r') as source:
 | 
				
			||||||
 | 
						csv_reader = csv.DictReader(source)
 | 
				
			||||||
 | 
						for each_row in csv_reader:
 | 
				
			||||||
 | 
							name = each_row["Short"]
 | 
				
			||||||
 | 
							names.append(name)
 | 
				
			||||||
 | 
							url_wikipedia = each_row["Wikipedia"]
 | 
				
			||||||
 | 
							urls.append(url_wikipedia)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get screenshots, with 30 seconds break after 7 requests
 | 
				
			||||||
 | 
					rounds = 0
 | 
				
			||||||
 | 
					position = 0
 | 
				
			||||||
 | 
					for url in urls:
 | 
				
			||||||
 | 
						print('url:', url)
 | 
				
			||||||
 | 
						name = names[position]
 | 
				
			||||||
 | 
						filename = name+".png"
 | 
				
			||||||
 | 
						print('filename:', filename)
 | 
				
			||||||
 | 
						driver.get(url)
 | 
				
			||||||
 | 
						sleep(3)
 | 
				
			||||||
 | 
						driver.get_screenshot_as_file(filename)
 | 
				
			||||||
 | 
						sleep(3)
 | 
				
			||||||
 | 
						position +=1
 | 
				
			||||||
 | 
						rounds += 1
 | 
				
			||||||
 | 
						if rounds == 5:
 | 
				
			||||||
 | 
							sleep(60)
 | 
				
			||||||
 | 
							rounds == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					driver.quit()
 | 
				
			||||||
 | 
					print("end...")
 | 
				
			||||||
					Loading…
					
					
				
		Reference in New Issue