#store car data
car_data=[]
#scraper
try:
url = 'https://www.autotrader.co.uk/car-search?year-to=2023&postcode=E1W1BQ&include-delivery-option=on&advertising-location=at_cars&page=1'
driver.get(url)
except Exception:
print('incorrect url')
else:
for i in range(100): #to loop over the from pages 1-100
#extract car links on page
search='car-details'
car_links=[]
cars=driver.find_elements(By.CLASS_NAME,'search-page__result')
for car in cars:
link_tag=car.find_elements(By.TAG_NAME,'a')
for i in link_tag:
if search in (i.get_attribute('href')):
link=i.get_attribute('href')
car_links.append(link)
#from car link,open new tab, and extract informations needed
for link in car_links:
# Open a new window
driver.execute_script("window.open('');")
# Switch to the new window and open new URL
driver.switch_to.window(driver.window_handles[1])
driver.get(link)
#extract needed info
#info1
time.sleep(3)
try:
name=driver.find_element('xpath','//*[@id="layout-desktop"]/aside/section[1]/p[1]').text
price=driver.find_element('xpath','//*[@id="layout-desktop"]/aside/section[1]/div[1]/div[1]/h2').text
miles=driver.find_element('xpath','//*[@id="layout-desktop"]/article/section[2]/span[1]/span[2]').text
#info2
info_2=driver.find_element('xpath','//*[@id="layout-desktop"]/article/section[2]/ul').text.split('\n')
#remove text
text_to_remove='Information on the number of owners'
if text_to_remove in info_2:
info_2.pop()
else:
pass
#scroll down to specification section
driver.execute_script("window.scrollTo(0, 1000);")
wait = WebDriverWait(driver, 1)
#click specification
spec_button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#layout-desktop > article > div:nth-child(4) > button'))).click()
#click performance and extract info
wait = WebDriverWait(driver, 1)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li:nth-child(1) > span > button'))).click()
perfm_details=driver.find_element(By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li.atc-accordion.atc-accordion--expanded > span > div > ul').text.split('\n')
close_perf=driver.find_element(By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li.atc-accordion.atc-accordion--expanded > span > button').click()
#click Dimensions and extract info
items=driver.find_elements(By.CLASS_NAME,"atc-accordion")
for i in items:
if i.text=='Dimensions':
i.click()
driver.execute_script("window.scrollTo(0, 7);")
dims_details=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li.atc-accordion.atc-accordion--expanded > span > div > ul'))).text.split('\n')
except:
#if error is encountered on a tab,close and return to first tab
driver.close()
driver.switch_to.window(driver.window_handles[0])
continue
#combine info_2 and all the other lists
info_2.extend(perfm_details)
info_2.extend(dims_details)
#make dictionary
car_info=info_2.copy()
car_dict=convert(car_info)
#add extra info to dictionary
car_dict['name']=name
car_dict['price']=price
car_dict['miles']=miles
#append to list
car_data.append(car_dict)
# Closing new_url tab
driver.close()
# Switching to old tab
driver.switch_to.window(driver.window_handles[0])
driver.find_element(By.CLASS_NAME,'paginationMini--right__active').click()
time.sleep(3)
print('Scraping done')