Oleksii Gavrylenko
This task I got from Upwork, where I was already contracted with Scale AI.
The task was to create a web scraper for a automobile details' webiste. This website is mainly focused on company owners and isn't availble for non-corporate clients.
Unfortunatelly I did not finish the project, since the client disappeared and failed to communicate further for two months.
Even though the project came to an end before the work was fully complete, I managed to create an automative login solution with an AI captcha solver.
Here is the code snippet of the program I've created:
#The hardest part, the part of solving the captcha
def request_captcha():
#Getting the timestamp of current time
now = datetime.now()
timestamp = now.strftime("%Y%m%d_%H%M%S")
#Getting the link which will get us a captcha
#We need to remember, that we need to have our Basic Cookies, which should have been requested before in order to successfully solve the captcha and log in
captcha_url = os.getenv("captcha_url")
#Creating a unique captcha URL
captcha_url = f"{captcha_url}?0.{random_num()}"
#Getting the image and saving it as captcha_image.png
response = session.get(captcha_url, stream=True, timeout=10)
image_path = os.path.join(captcha_dir, f"original_{timestamp}.png")
if os.path.exists(image_path):
os.remove(image_path)
with open(image_path, "wb") as image_file:
image_file.write(response.content)
#Proccessing the image, so it is clearly seen what characters and numbers are displayed
th1 = 140
sig = 1.3 # the blurring sigma
image = Image.open(image_path)
black_and_white = image.convert("L") # converting to black and white
bw_path = os.path.join(captcha_dir, f"black_white_{timestamp}.png")
black_and_white.save(bw_path)
first_threshold = black_and_white.point(lambda p: p > th1 and 255)
ft_path = os.path.join(captcha_dir, f"first_threshold_{timestamp}.png")
first_threshold.save(ft_path)
blur = numpy.array(first_threshold) # create an image array
blurred = gaussian_filter(blur, sigma=sig)
blurred = Image.fromarray(blurred)
blur_path = os.path.join(captcha_dir, f"blurred_{timestamp}.png")
blurred.save(blur_path)
model = EdsrModel.from_pretrained('eugenesiow/edsr-base', scale=4)
inputs = ImageLoader.load_image(Image.open(blur_path))
preds = model(inputs)
scaled_path = os.path.join(captcha_dir, f"scaled_4x_{timestamp}.png")
ImageLoader.save_image(preds, scaled_path)
sharpen_filter=numpy.array([[-1,-1,-1],
[-1,9,-1],
[-1,-1,-1]])
# applying kernels to the input image to get the sharpened image
original_image=cv2.imread(scaled_path)
sharp_image=cv2.filter2D(original_image,-1,sharpen_filter)
#Identifying characters
captcha_text = pytesseract.image_to_string(sharp_image, lang='eng',
config='--psm 7 --oem 3 -c tessedit_char_whitelist=0123456789QWERTYUIOPASDFGHJKLZXCVBNM').strip()
#Info about the process
print(f"Got the captcha: {captcha_text}")
logging.debug(f"Got the captcha: {captcha_text}")
return captcha_text