Web Scraping Used Car Listings

Victor Adewoyin

Data Scraper
ML Engineer
Data Analyst
Python
Selenium

Project Overview:

The goal of the project was to extract data on various used car models, the data extracted includes car name, price, mileage, number of previous owners and car specifications.The scraped data was stored in a csv file for further analysis. The data was scraped from Autotrader Uk website Using python language and selenium framework. Over 2000 used cars data was scraped from the site.

Result:

The scraped data was stored in a csv file. The screenshot of the csv file is shown below:

scraped data
scraped data

Code snippet:

A portion of the code used to scrape the data is given below:

#store car data

car_data=[]

#scraper
try:
url = 'https://www.autotrader.co.uk/car-search?year-to=2023&postcode=E1W1BQ&include-delivery-option=on&advertising-location=at_cars&page=1'
driver.get(url)
except Exception:
print('incorrect url')
else:
for i in range(100): #to loop over the from pages 1-100
#extract car links on page
search='car-details'
car_links=[]
cars=driver.find_elements(By.CLASS_NAME,'search-page__result')
for car in cars:
link_tag=car.find_elements(By.TAG_NAME,'a')
for i in link_tag:
if search in (i.get_attribute('href')):
link=i.get_attribute('href')
car_links.append(link)

#from car link,open new tab, and extract informations needed
for link in car_links:

# Open a new window
driver.execute_script("window.open('');")

# Switch to the new window and open new URL
driver.switch_to.window(driver.window_handles[1])
driver.get(link)

#extract needed info
#info1
time.sleep(3)
try:
name=driver.find_element('xpath','//*[@id="layout-desktop"]/aside/section[1]/p[1]').text
price=driver.find_element('xpath','//*[@id="layout-desktop"]/aside/section[1]/div[1]/div[1]/h2').text
miles=driver.find_element('xpath','//*[@id="layout-desktop"]/article/section[2]/span[1]/span[2]').text

#info2
info_2=driver.find_element('xpath','//*[@id="layout-desktop"]/article/section[2]/ul').text.split('\n')
#remove text
text_to_remove='Information on the number of owners'
if text_to_remove in info_2:
info_2.pop()
else:
pass

#scroll down to specification section

driver.execute_script("window.scrollTo(0, 1000);")
wait = WebDriverWait(driver, 1)
#click specification
spec_button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#layout-desktop > article > div:nth-child(4) > button'))).click()


#click performance and extract info
wait = WebDriverWait(driver, 1)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li:nth-child(1) > span > button'))).click()
perfm_details=driver.find_element(By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li.atc-accordion.atc-accordion--expanded > span > div > ul').text.split('\n')
close_perf=driver.find_element(By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li.atc-accordion.atc-accordion--expanded > span > button').click()

#click Dimensions and extract info
items=driver.find_elements(By.CLASS_NAME,"atc-accordion")
for i in items:
if i.text=='Dimensions':
i.click()
driver.execute_script("window.scrollTo(0, 7);")
dims_details=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#modal-root > div > section > div.sc-eYhfvQ.DXVhN > div > ul > li.atc-accordion.atc-accordion--expanded > span > div > ul'))).text.split('\n')

except:
#if error is encountered on a tab,close and return to first tab
driver.close()
driver.switch_to.window(driver.window_handles[0])
continue

#combine info_2 and all the other lists
info_2.extend(perfm_details)
info_2.extend(dims_details)
#make dictionary
car_info=info_2.copy()
car_dict=convert(car_info)
#add extra info to dictionary
car_dict['name']=name
car_dict['price']=price
car_dict['miles']=miles

#append to list
car_data.append(car_dict)

# Closing new_url tab
driver.close()

# Switching to old tab
driver.switch_to.window(driver.window_handles[0])

driver.find_element(By.CLASS_NAME,'paginationMini--right__active').click()
time.sleep(3)

print('Scraping done')

Partner With Victor
View Services

More Projects by Victor