# count total customer that is churn/exited or not based on their gender
q='''
SELECT gender,
COUNT(gender) as Total,
COUNT(case when exited = 1 then 1 end) as Churn,
COUNT(case when exited = 0 then 1 end) as Not_churn
FROM customer_churn_records
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
gender total churn not_churn
0 Female 4543 1139 3404
1 Male 5457 899 4558
# count how many customer that churn based on their demographic
q='''
SELECT geography,
COUNT (CASE WHEN exited = 1 THEN 1 END) AS Churn,
COUNT (CASE WHEN exited = 0 THEN 1 END) AS Not_Churn
FROM customer_churn_records
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df
geography churn not_churn
0 Spain 413 2064
1 France 811 4203
2 Germany 814 1695
# count how many customer that churn based on their group age
q='''
SELECT
CASE
WHEN age <= 20 THEN 'Group <= 20'
WHEN age >= 21 AND age <= 40 THEN 'Group 21-40'
WHEN age >= 41 AND age <= 60 THEN 'Group 41-60'
ELSE 'Group > 60'
END AS age_category,
COUNT(CASE WHEN exited = 1 then 1 end) as Churn,
COUNT(CASE WHEN exited = 0 then 1 end) as Not_Churn
FROM customer_churn_records
GROUP BY 1
ORDER BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df
age_category churn not_churn
0 Group <= 20 5 84
1 Group 21-40 682 5648
2 Group 41-60 1236 1881
3 Group > 60 115 349
# count total customer that is churn/exited or not based on how long
# they've been as customer
q='''
WITH temp_tenure AS(
SELECT tenure,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(tenure) as Average_tenure
FROM temp_tenure
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status average_tenure
0 Not Churn 5.032781
1 Churn 4.934740
# count how many customer that churn based on the amount of credit score
# (average, highest and lowest)
q='''
WITH temp_creditscore AS(
SELECT creditscore,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(creditscore) as avg_creditscore,
MAX(creditscore) as max_creditscore,
MIN(creditscore) as min_creditscore
FROM temp_creditscore
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status avg_creditscore max_creditscore min_creditscore
0 Not Churn 651.837855 850 405
1 Churn 645.414622 850 350
# count how many customer that churn based on how many bank product they use
q='''
WITH temp_bankprod AS(
SELECT numofproducts,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(numofproducts) as avg_numofproducts
FROM temp_bankprod
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status avg_numofproducts
0 Not Churn 1.544210
1 Churn 1.475466
# count how many customer that churn based on the average score of customer
# satisfaction score towards the bank
q='''
WITH temp_satisfaction AS(
SELECT satisfaction_score,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(satisfaction_score) as satisfaction_level
FROM temp_satisfaction
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status satisfaction_level
0 Not Churn 3.017960
1 Churn 2.997547
# count how many customer that churn based on the amount of complain filed by customer
q='''
WITH temp_complain AS(
SELECT complain,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
COUNT(complain) as complain
FROM temp_complain
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status complain
0 Not Churn 7962
1 Churn 2038
# count how many customer that churn based on credit card ownership
q='''
WITH temp_hascrcard AS(
SELECT hascrcard,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
COUNT (CASE WHEN hascrcard = 1 THEN 1 END) AS has_creditcard,
COUNT (CASE WHEN hascrcard = 0 THEN 1 END) AS no_creditcard
FROM temp_hascrcard
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df
status has_creditcard no_creditcard
0 Not Churn 5630 2332
1 Churn 1425 613
# count how many customer that churn based on the their card type
q='''
WITH temp_card AS(
SELECT card_type,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
COUNT(CASE WHEN card_type = 'PLATINUM' THEN 1 END) as PLATINUM,
COUNT(CASE WHEN card_type = 'DIAMOND' THEN 1 END) as DIAMOND,
COUNT(CASE WHEN card_type = 'GOLD' THEN 1 END) as GOLD,
COUNT(CASE WHEN card_type = 'SILVER' THEN 1 END) as SILVER
FROM temp_card
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status platinum diamond gold silver
0 Not Churn 1987 1961 2020 1994
1 Churn 508 546 482 502
# count how many customer that churn based on the amount of point customer earned
q='''
WITH temp_point AS(
SELECT point_earned,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(point_earned) as avg_point,
MAX(point_earned) as max_point,
MIN(point_earned) as min_point
FROM temp_point
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status avg_point max_point min_point
0 Not Churn 607.044084 1000 119
1 Churn 604.448479 1000 163
# count how many customer that churn based on customer estimated salary
q='''
WITH temp_salary AS(
SELECT estimatedsalary,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(estimatedsalary) as avg_salary,
MAX(estimatedsalary) as max_salary,
MIN(estimatedsalary) as min_salary
FROM temp_salary
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status avg_salary max_salary min_salary
0 Not Churn 99726.853103 199992.0 90.07
1 Churn 101509.908819 199808.0 11.58
# count how many customer that churn based on the amount of customer's bank balance
# (average, highest, lowest)
q='''
WITH temp_balance AS(
SELECT balance,
CASE
WHEN exited = 1 THEN 'Churn'
ELSE 'Not Churn'
END AS STATUS
from customer_churn_records
)
SELECT STATUS,
AVG(balance) as avg_balance,
MAX(balance) as max_balance,
MIN(balance) as min_balance
FROM temp_balance
GROUP BY 1
'''
df = pd.read_sql(q,engine_postgresql)
df.head()
status avg_balance max_balance min_balance
0 Not Churn 72742.446747 221532 0
1 Churn 91109.104024 250898 0