Sign Up
View All Projects
Predicting Credit Card Approvals
Basit Qureshi
Data Scientist
Data Visualizer
Data Analyst
pandas
Python
scikit-learn
# Import pandas
import pandas as pd
# Load dataset
cc_apps = pd.read_csv("datasets/cc_approvals.data", header=None)
# Inspect data
cc_apps.head()
# Print summary statistics
cc_apps_description = cc_apps.describe()
print(cc_apps_description)
print('\n')
# Print DataFrame information
cc_apps_info = cc_apps.info()
print(cc_apps_info)
print('\n')
# Inspect missing values in the dataset
cc_apps.tail(17)
# Import train_test_split
from sklearn.model_selection import train_test_split
# Drop the features 11 and 13
cc_apps = cc_apps.drop([11, 13], axis=1)
# Split into train and test sets
cc_apps_train, cc_apps_test = train_test_split(cc_apps, test_size=0.33, random_state=42)
# Import numpy
import numpy as np
# Replace the '?'s with NaN in the train and test sets
cc_apps_train = cc_apps_train.replace('?', np.NaN)
cc_apps_test = cc_apps_test.replace('?', np.NaN)
# Impute the missing values with mean imputation
cc_apps_train.fillna(cc_apps_train.mean(), inplace=True)
cc_apps_test.fillna(cc_apps_train.mean(), inplace=True)
# Count the number of NaNs in the datasets and print the counts to verify
print(cc_apps_train.isnull().sum())
print(cc_apps_test.isnull().sum())
# Iterate over each column of cc_apps_train
for col in cc_apps_train.columns:
# Check if the column is of object type
if cc_apps_train[col].dtypes == 'object':
# Impute with the most frequent value
cc_apps_train = cc_apps_train.fillna(cc_apps_train[col].value_counts().index[0])
cc_apps_test = cc_apps_test.fillna(cc_apps_train[col].value_counts().index[0])
# Count the number of NaNs in the dataset and print
the counts to verify
print(cc_apps_train.isnull().sum())
print(cc_apps_test.isnull().sum())
# Convert the categorical features in the train and test sets independently
cc_apps_train = pd.get_dummies(cc_apps_train)
cc_apps_test = pd.get_dummies(cc_apps_test)
# Reindex the columns of the test set aligning with the train set
cc_apps_test = cc_apps_test.reindex(columns=cc_apps_train.columns, fill_value=0)
# Import MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
# Segregate features and labels into separate variables
X_train, y_train = cc_apps_train.iloc[:, :-1].values, cc_apps_train.iloc[:, [-1]].values
X_test, y_test = cc_apps_test.iloc[:, :-1].values, cc_apps_test.iloc[:, [-1]].values
# Instantiate MinMaxScaler and use it to rescale X_train and X_test
scaler = MinMaxScaler(feature_range=(0, 1))
rescaledX_train = scaler.fit_transform(X_train)
rescaledX_test = scaler.transform(X_test)
# Import LogisticRegression
from sklearn.linear_model import LogisticRegression
# Instantiate a LogisticRegression classifier with default parameter values
logreg = LogisticRegression()
# Fit logreg to the train set
logreg.fit(rescaledX_train,y_train)
# Import confusion_matrix
from sklearn.metrics import confusion_matrix
# Use logreg to predict instances from the test set and store it
y_pred = logreg.predict(rescaledX_test)
# Get the accuracy score of logreg model and print it
print("Accuracy of logistic regression classifier: ", logreg.score(rescaledX_test,y_test))
# Print the confusion matrix of the logreg model
confusion_matrix(y_test,y_pred)
Partner With Basit
View Services
More Projects by Basit
A Visual History of Nobel Prize Winners
Dr. Semmelweis and the Discovery of Handwashing
How it Works
Contra For Independents
Contra For Hiring
Success Stories
Commission-Free
Company
Mission
Careers
Newsroom
Resources
FAQ
Tips & Guides
Hire
Support
Dіscover Freelancers
Design
Engineering
Marketing
Music & Audio
Social Media
Video & Animation
Writing
Drops
Freelance Industry Report
Social
Terms & Conditions
Privacy Policy
Cookie Policy
© 2024 Contra.Work Inc All Rights Reserved.