Predicting Credit Card Approvals

Data Scientist

Data Visualizer

Data Analyst

pandas

Python

scikit-learn

# Import pandas

import pandas as pd

# Load dataset

cc_apps = pd.read_csv("datasets/cc_approvals.data", header=None)

# Inspect data

cc_apps.head()

# Print summary statistics

cc_apps_description = cc_apps.describe()

print(cc_apps_description)

print('\n')

# Print DataFrame information

cc_apps_info = cc_apps.info()

print(cc_apps_info)

print('\n')

# Inspect missing values in the dataset

cc_apps.tail(17)

# Import train_test_split

from sklearn.model_selection import train_test_split

# Drop the features 11 and 13

cc_apps = cc_apps.drop([11, 13], axis=1)

# Split into train and test sets

cc_apps_train, cc_apps_test = train_test_split(cc_apps, test_size=0.33, random_state=42)

# Import numpy

import numpy as np

# Replace the '?'s with NaN in the train and test sets

cc_apps_train = cc_apps_train.replace('?', np.NaN)

cc_apps_test = cc_apps_test.replace('?', np.NaN)

# Impute the missing values with mean imputation

cc_apps_train.fillna(cc_apps_train.mean(), inplace=True)

cc_apps_test.fillna(cc_apps_train.mean(), inplace=True)

# Count the number of NaNs in the datasets and print the counts to verify

print(cc_apps_train.isnull().sum())

print(cc_apps_test.isnull().sum())

# Iterate over each column of cc_apps_train

for col in cc_apps_train.columns:

# Check if the column is of object type

if cc_apps_train[col].dtypes == 'object':

# Impute with the most frequent value

cc_apps_train = cc_apps_train.fillna(cc_apps_train[col].value_counts().index[0])

cc_apps_test = cc_apps_test.fillna(cc_apps_train[col].value_counts().index[0])

# Count the number of NaNs in the dataset and print

the counts to verify

print(cc_apps_train.isnull().sum())

print(cc_apps_test.isnull().sum())

# Convert the categorical features in the train and test sets independently

cc_apps_train = pd.get_dummies(cc_apps_train)

cc_apps_test = pd.get_dummies(cc_apps_test)

# Reindex the columns of the test set aligning with the train set

cc_apps_test = cc_apps_test.reindex(columns=cc_apps_train.columns, fill_value=0)

# Import MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

# Segregate features and labels into separate variables

X_train, y_train = cc_apps_train.iloc[:, :-1].values, cc_apps_train.iloc[:, [-1]].values

X_test, y_test = cc_apps_test.iloc[:, :-1].values, cc_apps_test.iloc[:, [-1]].values

# Instantiate MinMaxScaler and use it to rescale X_train and X_test

scaler = MinMaxScaler(feature_range=(0, 1))

rescaledX_train = scaler.fit_transform(X_train)

rescaledX_test = scaler.transform(X_test)

# Import LogisticRegression

from sklearn.linear_model import LogisticRegression

# Instantiate a LogisticRegression classifier with default parameter values

logreg = LogisticRegression()

# Fit logreg to the train set

logreg.fit(rescaledX_train,y_train)

# Import confusion_matrix

from sklearn.metrics import confusion_matrix

# Use logreg to predict instances from the test set and store it

y_pred = logreg.predict(rescaledX_test)

# Get the accuracy score of logreg model and print it

print("Accuracy of logistic regression classifier: ", logreg.score(rescaledX_test,y_test))

# Print the confusion matrix of the logreg model

confusion_matrix(y_test,y_pred)

Partner With Basit

View Services

More Projects by Basit

A Visual History of Nobel Prize Winners

Dr. Semmelweis and the Discovery of Handwashing

How it Works

Contra For Independents Contra For Hiring Success Stories Commission-Free

Company

Mission Careers Newsroom

Resources

FAQ Tips & Guides Hire Support

Dіscover Freelancers

Design Engineering Marketing Music & Audio Social Media Video & Animation Writing

Drops

Freelance Industry Report

Social

Terms & Conditions Privacy Policy Cookie Policy