from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
def extract_product_info():
title_element = None
price_element = None
delivery_element = None
sellername_element = None
sellerrating_element = None
shipontime_element = None
daraz_status = None
rating_element= None
try:
title_element = driver.find_element(By.CSS_SELECTOR, '#module_product_title_1 > div > div > span').text
except NoSuchElementException:
print("Title not found")
try:
price_element = driver.find_element(By.CLASS_NAME, 'pdp-price').text.strip()
except NoSuchElementException:
print("Price not found")
try:
delivery_element = driver.find_element(By.CSS_SELECTOR, '#module_seller_delivery .delivery-option-item__title span:nth-child(1)').text.strip()
except NoSuchElementException:
print("Delivery option not found")
try:
sellername_element = driver.find_element(By.CLASS_NAME, 'pdp-link_theme_black.seller-name__detail-name').text.strip()
except NoSuchElementException:
print("Seller name not found")
try:
sellerrating_element = driver.find_element(By.CLASS_NAME, 'seller-info-value.rating-positive').text
except NoSuchElementException:
print("Seller rating not found")
try:
shipontime_element = driver.find_element(By.CLASS_NAME, 'seller-info-value').text
except NoSuchElementException:
print("Shipping on time information not found")
try:
daraz_element = driver.find_element(By.CLASS_NAME,'pdp-seller-badge')
daraz_status = True
except NoSuchElementException:
print("Not on Daraz Mall")
daraz_status = False
try:
driver.execute_script("window.scrollTo(0, 850);")
time.sleep(1)
rating_element=driver.find_element(By.CLASS_NAME,'score').text
except NoSuchElementException:
print("No rating for this product")
return {
"Title": title_element,
"Price": price_element,
"Delivery": delivery_element,
"SellerName": sellername_element,
"SellerRating": sellerrating_element,
"ShipOnTime": shipontime_element,
"Daraz Mall Status": daraz_status,
"Ratings": rating_element,
}
driver.get('https://www.daraz.pk/catalog/?_keyori=ss&clickTrackInfo=textId--8106960466929852021__abId--None__pvid--51001bb4-fe5c-406a-98ef-7f1bf391c08e__matchType--1__abGroup--None__srcQuery--mobile%20phones__spellQuery--mobile%20phones__ntType--nt-common&from=suggest_normal&page=1&q=mobile%20phones&spm=a2a0e.home.search.1.6a274076lTRNcE&sugg=mobile%20phones_0_1')
columns = ["Title", "Price", "Delivery", "SellerName", "SellerRating", "ShipOnTime", "Daraz Mall Status", "Ratings"]
df = pd.DataFrame(columns=columns)
num_pages = 6
products_per_page = 40
for page in range(num_pages):
print(f"Extracting data from page {page + 1}...")
product_elements = driver.find_elements(By.CLASS_NAME, 'gridItem--Yd0sa')
products_extracted = 0
for index, product_element in enumerate(product_elements):
try:
print(f"Extracting data for product {index + 1} on page {page + 1}...")
# Check if the 'a' tag is present within the product_element
try:
product_url_element = product_element.find_element(By.TAG_NAME, 'a')
if product_url_element:
product_url = product_url_element.get_attribute('href')
# Open the product URL in a new tab or window
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get(product_url)
try:
product_info = extract_product_info()
if product_info:
row_data = pd.Series(product_info)
df = df.append(row_data, ignore_index=True)
products_extracted += 1
except NoSuchElementException:
print("Error extracting product info. Skipping.")
# Close the tab or window and switch back to the main page
driver.close()
driver.switch_to.window(driver.window_handles[0])
except StaleElementReferenceException:
print("Stale element reference. Skipping.")
continue # Skip to the next product if there's a stale element reference
except NoSuchElementException:
print("Product URL not found. Skipping.")
if products_extracted >= products_per_page:
break
if page < num_pages:
try:
driver.execute_script("window.scrollTo(0, 3850);")
time.sleep(10)
page_number = page + 1
print(f"Clicking on page {page_number} button...")
page_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CLASS_NAME, f'ant-pagination-item-{page_number}'))
)
page_button.click()
time.sleep(2)
except NoSuchElementException:
print(f"Page button for page {page_number} not found. Exiting.")
break
except Exception as e:
print(f"An error occurred: {e}")
break
# Close the browser
driver.quit()
print(df)