Interactive Chatbot for Customer Support

Shahryar A

Data Scientist
Web Designer
Data Analyst

Technical Description:

1. Web Crawling (Selenium):

You used Selenium for web crawling to fetch data from the Daraz website. Selenium is a powerful tool for browser automation, and in this project, it was employed to navigate through the website, scrape product information, and collect product reviews.

2. Data Processing and Transformation:

The data obtained from web crawling, including product information and reviews, were stored in CSV files. Subsequently, you used Python to transform these CSV files into a structured SQL database. This involved creating tables, defining schemas, and inserting data into the database.

3. Database Interaction (SQL):

You utilized SQL for database management. The data collected from the Daraz website was structured into tables, allowing for efficient storage and retrieval. SQL queries were used to interact with the database, including inserting new data and querying existing data.

4. GPT Model for Chatbot:

A GPT (Generative Pre-trained Transformer) model was trained to function as an interactive chatbot. GPT models are known for their natural language processing capabilities. This chatbot was trained on a diverse dataset and fine-tuned for your specific use case. It can understand and generate human-like responses.

5. Flask for Front End:

Flask, a web framework in Python, was employed to create the front end of the project. The Flask application served as the interface for users to interact with the chatbot and access data insights. The front end provided a user-friendly experience, allowing users to input queries and receive responses.
from selenium import webdriver

from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

def extract_product_info():
title_element = None
price_element = None
delivery_element = None
sellername_element = None
sellerrating_element = None
shipontime_element = None
daraz_status = None
rating_element= None

try:
title_element = driver.find_element(By.CSS_SELECTOR, '#module_product_title_1 > div > div > span').text
except NoSuchElementException:
print("Title not found")



try:
price_element = driver.find_element(By.CLASS_NAME, 'pdp-price').text.strip()
except NoSuchElementException:
print("Price not found")

try:
delivery_element = driver.find_element(By.CSS_SELECTOR, '#module_seller_delivery .delivery-option-item__title span:nth-child(1)').text.strip()
except NoSuchElementException:
print("Delivery option not found")

try:
sellername_element = driver.find_element(By.CLASS_NAME, 'pdp-link_theme_black.seller-name__detail-name').text.strip()
except NoSuchElementException:
print("Seller name not found")

try:
sellerrating_element = driver.find_element(By.CLASS_NAME, 'seller-info-value.rating-positive').text
except NoSuchElementException:
print("Seller rating not found")

try:
shipontime_element = driver.find_element(By.CLASS_NAME, 'seller-info-value').text
except NoSuchElementException:
print("Shipping on time information not found")

try:
daraz_element = driver.find_element(By.CLASS_NAME,'pdp-seller-badge')
daraz_status = True
except NoSuchElementException:
print("Not on Daraz Mall")
daraz_status = False

try:
driver.execute_script("window.scrollTo(0, 850);")
time.sleep(1)
rating_element=driver.find_element(By.CLASS_NAME,'score').text
except NoSuchElementException:
print("No rating for this product")

return {
"Title": title_element,
"Price": price_element,
"Delivery": delivery_element,
"SellerName": sellername_element,
"SellerRating": sellerrating_element,
"ShipOnTime": shipontime_element,
"Daraz Mall Status": daraz_status,
"Ratings": rating_element,
}


driver.get('https://www.daraz.pk/catalog/?_keyori=ss&clickTrackInfo=textId--8106960466929852021__abId--None__pvid--51001bb4-fe5c-406a-98ef-7f1bf391c08e__matchType--1__abGroup--None__srcQuery--mobile%20phones__spellQuery--mobile%20phones__ntType--nt-common&from=suggest_normal&page=1&q=mobile%20phones&spm=a2a0e.home.search.1.6a274076lTRNcE&sugg=mobile%20phones_0_1')

columns = ["Title", "Price", "Delivery", "SellerName", "SellerRating", "ShipOnTime", "Daraz Mall Status", "Ratings"]
df = pd.DataFrame(columns=columns)
num_pages = 6
products_per_page = 40

for page in range(num_pages):
print(f"Extracting data from page {page + 1}...")

product_elements = driver.find_elements(By.CLASS_NAME, 'gridItem--Yd0sa')

products_extracted = 0

for index, product_element in enumerate(product_elements):
try:
print(f"Extracting data for product {index + 1} on page {page + 1}...")

# Check if the 'a' tag is present within the product_element
try:
product_url_element = product_element.find_element(By.TAG_NAME, 'a')
if product_url_element:
product_url = product_url_element.get_attribute('href')

# Open the product URL in a new tab or window
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get(product_url)

try:
product_info = extract_product_info()

if product_info:
row_data = pd.Series(product_info)
df = df.append(row_data, ignore_index=True)
products_extracted += 1
except NoSuchElementException:
print("Error extracting product info. Skipping.")

# Close the tab or window and switch back to the main page
driver.close()
driver.switch_to.window(driver.window_handles[0])

except StaleElementReferenceException:
print("Stale element reference. Skipping.")
continue # Skip to the next product if there's a stale element reference

except NoSuchElementException:
print("Product URL not found. Skipping.")

if products_extracted >= products_per_page:
break

if page < num_pages:
try:
driver.execute_script("window.scrollTo(0, 3850);")
time.sleep(10)
page_number = page + 1
print(f"Clicking on page {page_number} button...")
page_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CLASS_NAME, f'ant-pagination-item-{page_number}'))
)
page_button.click()
time.sleep(2)
except NoSuchElementException:
print(f"Page button for page {page_number} not found. Exiting.")
break
except Exception as e:
print(f"An error occurred: {e}")
break

# Close the browser
driver.quit()

print(df)
Partner With Shahryar
View Services

More Projects by Shahryar