Sachin Shekhar (SachinShekhar@outlook.com)
Shivam Sharma (< email redacted >)
Akanksha (< email redacted >)
# Importing essential libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.feature_selection import RFE
from sklearn.model_selection import KFold, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, roc_auc_score
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from imblearn.over_sampling import SMOTENC
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)
# Loading data
df = pd.read_csv('telecom_churn_data.csv')
df.head()
| mobile_number | circle_id | loc_og_t2o_mou | std_og_t2o_mou | loc_ic_t2o_mou | last_date_of_month_6 | last_date_of_month_7 | last_date_of_month_8 | last_date_of_month_9 | arpu_6 | arpu_7 | arpu_8 | arpu_9 | onnet_mou_6 | onnet_mou_7 | onnet_mou_8 | onnet_mou_9 | offnet_mou_6 | offnet_mou_7 | offnet_mou_8 | offnet_mou_9 | roam_ic_mou_6 | roam_ic_mou_7 | roam_ic_mou_8 | roam_ic_mou_9 | roam_og_mou_6 | roam_og_mou_7 | roam_og_mou_8 | roam_og_mou_9 | loc_og_t2t_mou_6 | loc_og_t2t_mou_7 | loc_og_t2t_mou_8 | loc_og_t2t_mou_9 | loc_og_t2m_mou_6 | loc_og_t2m_mou_7 | loc_og_t2m_mou_8 | loc_og_t2m_mou_9 | loc_og_t2f_mou_6 | loc_og_t2f_mou_7 | loc_og_t2f_mou_8 | loc_og_t2f_mou_9 | loc_og_t2c_mou_6 | loc_og_t2c_mou_7 | loc_og_t2c_mou_8 | loc_og_t2c_mou_9 | loc_og_mou_6 | loc_og_mou_7 | loc_og_mou_8 | loc_og_mou_9 | std_og_t2t_mou_6 | std_og_t2t_mou_7 | std_og_t2t_mou_8 | std_og_t2t_mou_9 | std_og_t2m_mou_6 | std_og_t2m_mou_7 | std_og_t2m_mou_8 | std_og_t2m_mou_9 | std_og_t2f_mou_6 | std_og_t2f_mou_7 | std_og_t2f_mou_8 | std_og_t2f_mou_9 | std_og_t2c_mou_6 | std_og_t2c_mou_7 | std_og_t2c_mou_8 | std_og_t2c_mou_9 | std_og_mou_6 | std_og_mou_7 | std_og_mou_8 | std_og_mou_9 | isd_og_mou_6 | isd_og_mou_7 | isd_og_mou_8 | isd_og_mou_9 | spl_og_mou_6 | spl_og_mou_7 | spl_og_mou_8 | spl_og_mou_9 | og_others_6 | og_others_7 | og_others_8 | og_others_9 | total_og_mou_6 | total_og_mou_7 | total_og_mou_8 | total_og_mou_9 | loc_ic_t2t_mou_6 | loc_ic_t2t_mou_7 | loc_ic_t2t_mou_8 | loc_ic_t2t_mou_9 | loc_ic_t2m_mou_6 | loc_ic_t2m_mou_7 | loc_ic_t2m_mou_8 | loc_ic_t2m_mou_9 | loc_ic_t2f_mou_6 | loc_ic_t2f_mou_7 | loc_ic_t2f_mou_8 | loc_ic_t2f_mou_9 | loc_ic_mou_6 | loc_ic_mou_7 | loc_ic_mou_8 | loc_ic_mou_9 | std_ic_t2t_mou_6 | std_ic_t2t_mou_7 | std_ic_t2t_mou_8 | std_ic_t2t_mou_9 | std_ic_t2m_mou_6 | std_ic_t2m_mou_7 | std_ic_t2m_mou_8 | std_ic_t2m_mou_9 | std_ic_t2f_mou_6 | std_ic_t2f_mou_7 | std_ic_t2f_mou_8 | std_ic_t2f_mou_9 | std_ic_t2o_mou_6 | std_ic_t2o_mou_7 | std_ic_t2o_mou_8 | std_ic_t2o_mou_9 | std_ic_mou_6 | std_ic_mou_7 | std_ic_mou_8 | std_ic_mou_9 | total_ic_mou_6 | total_ic_mou_7 | total_ic_mou_8 | total_ic_mou_9 | spl_ic_mou_6 | spl_ic_mou_7 | spl_ic_mou_8 | spl_ic_mou_9 | isd_ic_mou_6 | isd_ic_mou_7 | isd_ic_mou_8 | isd_ic_mou_9 | ic_others_6 | ic_others_7 | ic_others_8 | ic_others_9 | total_rech_num_6 | total_rech_num_7 | total_rech_num_8 | total_rech_num_9 | total_rech_amt_6 | total_rech_amt_7 | total_rech_amt_8 | total_rech_amt_9 | max_rech_amt_6 | max_rech_amt_7 | max_rech_amt_8 | max_rech_amt_9 | date_of_last_rech_6 | date_of_last_rech_7 | date_of_last_rech_8 | date_of_last_rech_9 | last_day_rch_amt_6 | last_day_rch_amt_7 | last_day_rch_amt_8 | last_day_rch_amt_9 | date_of_last_rech_data_6 | date_of_last_rech_data_7 | date_of_last_rech_data_8 | date_of_last_rech_data_9 | total_rech_data_6 | total_rech_data_7 | total_rech_data_8 | total_rech_data_9 | max_rech_data_6 | max_rech_data_7 | max_rech_data_8 | max_rech_data_9 | count_rech_2g_6 | count_rech_2g_7 | count_rech_2g_8 | count_rech_2g_9 | count_rech_3g_6 | count_rech_3g_7 | count_rech_3g_8 | count_rech_3g_9 | av_rech_amt_data_6 | av_rech_amt_data_7 | av_rech_amt_data_8 | av_rech_amt_data_9 | vol_2g_mb_6 | vol_2g_mb_7 | vol_2g_mb_8 | vol_2g_mb_9 | vol_3g_mb_6 | vol_3g_mb_7 | vol_3g_mb_8 | vol_3g_mb_9 | arpu_3g_6 | arpu_3g_7 | arpu_3g_8 | arpu_3g_9 | arpu_2g_6 | arpu_2g_7 | arpu_2g_8 | arpu_2g_9 | night_pck_user_6 | night_pck_user_7 | night_pck_user_8 | night_pck_user_9 | monthly_2g_6 | monthly_2g_7 | monthly_2g_8 | monthly_2g_9 | sachet_2g_6 | sachet_2g_7 | sachet_2g_8 | sachet_2g_9 | monthly_3g_6 | monthly_3g_7 | monthly_3g_8 | monthly_3g_9 | sachet_3g_6 | sachet_3g_7 | sachet_3g_8 | sachet_3g_9 | fb_user_6 | fb_user_7 | fb_user_8 | fb_user_9 | aon | aug_vbc_3g | jul_vbc_3g | jun_vbc_3g | sep_vbc_3g | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7000842753 | 109 | 0.0 | 0.0 | 0.0 | 6/30/2014 | 7/31/2014 | 8/31/2014 | 9/30/2014 | 197.385 | 214.816 | 213.803 | 21.100 | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.0 | NaN | 0.00 | 0.00 | 0.00 | 0.00 | NaN | NaN | 0.16 | NaN | NaN | NaN | 4.13 | NaN | NaN | NaN | 1.15 | NaN | NaN | NaN | 5.44 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.00 | NaN | 0.00 | 0.00 | 5.44 | 0.00 | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.0 | NaN | 4 | 3 | 2 | 6 | 362 | 252 | 252 | 0 | 252 | 252 | 252 | 0 | 6/21/2014 | 7/16/2014 | 8/8/2014 | 9/28/2014 | 252 | 252 | 252 | 0 | 6/21/2014 | 7/16/2014 | 8/8/2014 | NaN | 1.0 | 1.0 | 1.0 | NaN | 252.0 | 252.0 | 252.0 | NaN | 0.0 | 0.0 | 0.0 | NaN | 1.0 | 1.0 | 1.0 | NaN | 252.0 | 252.0 | 252.0 | NaN | 30.13 | 1.32 | 5.75 | 0.0 | 83.57 | 150.76 | 109.61 | 0.00 | 212.17 | 212.17 | 212.17 | NaN | 212.17 | 212.17 | 212.17 | NaN | 0.0 | 0.0 | 0.0 | NaN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1.0 | 1.0 | 1.0 | NaN | 968 | 30.4 | 0.0 | 101.20 | 3.58 |
| 1 | 7001865778 | 109 | 0.0 | 0.0 | 0.0 | 6/30/2014 | 7/31/2014 | 8/31/2014 | 9/30/2014 | 34.047 | 355.074 | 268.321 | 86.285 | 24.11 | 78.68 | 7.68 | 18.34 | 15.74 | 99.84 | 304.76 | 53.76 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 23.88 | 74.56 | 7.68 | 18.34 | 11.51 | 75.94 | 291.86 | 53.76 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 2.91 | 0.00 | 0.00 | 35.39 | 150.51 | 299.54 | 72.11 | 0.23 | 4.11 | 0.00 | 0.00 | 0.00 | 0.46 | 0.13 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.23 | 4.58 | 0.13 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 4.68 | 23.43 | 12.76 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 40.31 | 178.53 | 312.44 | 72.11 | 1.61 | 29.91 | 29.23 | 116.09 | 17.48 | 65.38 | 375.58 | 56.93 | 0.00 | 8.93 | 3.61 | 0.00 | 19.09 | 104.23 | 408.43 | 173.03 | 0.00 | 0.00 | 2.35 | 0.00 | 5.90 | 0.00 | 12.49 | 15.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 5.90 | 0.00 | 14.84 | 15.01 | 26.83 | 104.23 | 423.28 | 188.04 | 0.00 | 0.0 | 0.0 | 0.00 | 1.83 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 4 | 9 | 11 | 5 | 74 | 384 | 283 | 121 | 44 | 154 | 65 | 50 | 6/29/2014 | 7/31/2014 | 8/28/2014 | 9/30/2014 | 44 | 23 | 30 | 0 | NaN | 7/25/2014 | 8/10/2014 | NaN | NaN | 1.0 | 2.0 | NaN | NaN | 154.0 | 25.0 | NaN | NaN | 1.0 | 2.0 | NaN | NaN | 0.0 | 0.0 | NaN | NaN | 154.0 | 50.0 | NaN | 0.00 | 108.07 | 365.47 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | NaN | 0.00 | 0.00 | NaN | NaN | 28.61 | 7.60 | NaN | NaN | 0.0 | 0.0 | NaN | 0 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | 1.0 | 1.0 | NaN | 1006 | 0.0 | 0.0 | 0.00 | 0.00 |
| 2 | 7001625959 | 109 | 0.0 | 0.0 | 0.0 | 6/30/2014 | 7/31/2014 | 8/31/2014 | 9/30/2014 | 167.690 | 189.058 | 210.226 | 290.714 | 11.54 | 55.24 | 37.26 | 74.81 | 143.33 | 220.59 | 208.36 | 118.91 | 0.0 | 0.00 | 0.00 | 38.49 | 0.0 | 0.00 | 0.00 | 70.94 | 7.19 | 28.74 | 13.58 | 14.39 | 29.34 | 16.86 | 38.46 | 28.16 | 24.11 | 21.79 | 15.61 | 22.24 | 0.0 | 135.54 | 45.76 | 0.48 | 60.66 | 67.41 | 67.66 | 64.81 | 4.34 | 26.49 | 22.58 | 8.76 | 41.81 | 67.41 | 75.53 | 9.28 | 1.48 | 14.76 | 22.83 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 47.64 | 108.68 | 120.94 | 18.04 | 0.0 | 0.0 | 0.0 | 0.0 | 46.56 | 236.84 | 96.84 | 42.08 | 0.45 | 0.0 | 0.0 | 0.0 | 155.33 | 412.94 | 285.46 | 124.94 | 115.69 | 71.11 | 67.46 | 148.23 | 14.38 | 15.44 | 38.89 | 38.98 | 99.48 | 122.29 | 49.63 | 158.19 | 229.56 | 208.86 | 155.99 | 345.41 | 72.41 | 71.29 | 28.69 | 49.44 | 45.18 | 177.01 | 167.09 | 118.18 | 21.73 | 58.34 | 43.23 | 3.86 | 0.0 | 0.0 | 0.0 | 0.0 | 139.33 | 306.66 | 239.03 | 171.49 | 370.04 | 519.53 | 395.03 | 517.74 | 0.21 | 0.0 | 0.0 | 0.45 | 0.00 | 0.85 | 0.0 | 0.01 | 0.93 | 3.14 | 0.0 | 0.36 | 5 | 4 | 2 | 7 | 168 | 315 | 116 | 358 | 86 | 200 | 86 | 100 | 6/17/2014 | 7/24/2014 | 8/14/2014 | 9/29/2014 | 0 | 200 | 86 | 0 | NaN | NaN | NaN | 9/17/2014 | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | 46.0 | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 46.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 8.42 | NaN | NaN | NaN | 2.84 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | 1.0 | 1103 | 0.0 | 0.0 | 4.17 | 0.00 |
| 3 | 7001204172 | 109 | 0.0 | 0.0 | 0.0 | 6/30/2014 | 7/31/2014 | 8/31/2014 | 9/30/2014 | 221.338 | 251.102 | 508.054 | 389.500 | 99.91 | 54.39 | 310.98 | 241.71 | 123.31 | 109.01 | 71.68 | 113.54 | 0.0 | 54.86 | 44.38 | 0.00 | 0.0 | 28.09 | 39.04 | 0.00 | 73.68 | 34.81 | 10.61 | 15.49 | 107.43 | 83.21 | 22.46 | 65.46 | 1.91 | 0.65 | 4.91 | 2.06 | 0.0 | 0.00 | 0.00 | 0.00 | 183.03 | 118.68 | 37.99 | 83.03 | 26.23 | 14.89 | 289.58 | 226.21 | 2.99 | 1.73 | 6.53 | 9.99 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 29.23 | 16.63 | 296.11 | 236.21 | 0.0 | 0.0 | 0.0 | 0.0 | 10.96 | 0.00 | 18.09 | 43.29 | 0.00 | 0.0 | 0.0 | 0.0 | 223.23 | 135.31 | 352.21 | 362.54 | 62.08 | 19.98 | 8.04 | 41.73 | 113.96 | 64.51 | 20.28 | 52.86 | 57.43 | 27.09 | 19.84 | 65.59 | 233.48 | 111.59 | 48.18 | 160.19 | 43.48 | 66.44 | 0.00 | 129.84 | 1.33 | 38.56 | 4.94 | 13.98 | 1.18 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 45.99 | 105.01 | 4.94 | 143.83 | 280.08 | 216.61 | 53.13 | 305.38 | 0.59 | 0.0 | 0.0 | 0.55 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.80 | 10 | 11 | 18 | 14 | 230 | 310 | 601 | 410 | 60 | 50 | 50 | 50 | 6/28/2014 | 7/31/2014 | 8/31/2014 | 9/30/2014 | 30 | 50 | 50 | 30 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | 2491 | 0.0 | 0.0 | 0.00 | 0.00 |
| 4 | 7000142493 | 109 | 0.0 | 0.0 | 0.0 | 6/30/2014 | 7/31/2014 | 8/31/2014 | 9/30/2014 | 261.636 | 309.876 | 238.174 | 163.426 | 50.31 | 149.44 | 83.89 | 58.78 | 76.96 | 91.88 | 124.26 | 45.81 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 50.31 | 149.44 | 83.89 | 58.78 | 67.64 | 91.88 | 124.26 | 37.89 | 0.00 | 0.00 | 0.00 | 1.93 | 0.0 | 0.00 | 0.00 | 0.00 | 117.96 | 241.33 | 208.16 | 98.61 | 0.00 | 0.00 | 0.00 | 0.00 | 9.31 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.31 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 5.98 | 0.00 | 0.0 | 0.0 | 0.0 | 127.28 | 241.33 | 208.16 | 104.59 | 105.68 | 88.49 | 233.81 | 154.56 | 106.84 | 109.54 | 104.13 | 48.24 | 1.50 | 0.00 | 0.00 | 0.00 | 214.03 | 198.04 | 337.94 | 202.81 | 0.00 | 0.00 | 0.86 | 2.31 | 1.93 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 1.93 | 0.25 | 0.86 | 2.31 | 216.44 | 198.29 | 338.81 | 205.31 | 0.00 | 0.0 | 0.0 | 0.18 | 0.00 | 0.00 | 0.0 | 0.00 | 0.48 | 0.00 | 0.0 | 0.00 | 5 | 6 | 3 | 4 | 196 | 350 | 287 | 200 | 56 | 110 | 110 | 50 | 6/26/2014 | 7/28/2014 | 8/9/2014 | 9/28/2014 | 50 | 110 | 110 | 50 | 6/4/2014 | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | 56.0 | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 56.0 | NaN | NaN | NaN | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | NaN | NaN | NaN | 0.00 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | NaN | NaN | NaN | 1526 | 0.0 | 0.0 | 0.00 | 0.00 |
df.shape
(99999, 226)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 99999 entries, 0 to 99998 Columns: 226 entries, mobile_number to sep_vbc_3g dtypes: float64(179), int64(35), object(12) memory usage: 172.4+ MB
df.describe()
| mobile_number | circle_id | loc_og_t2o_mou | std_og_t2o_mou | loc_ic_t2o_mou | arpu_6 | arpu_7 | arpu_8 | arpu_9 | onnet_mou_6 | onnet_mou_7 | onnet_mou_8 | onnet_mou_9 | offnet_mou_6 | offnet_mou_7 | offnet_mou_8 | offnet_mou_9 | roam_ic_mou_6 | roam_ic_mou_7 | roam_ic_mou_8 | roam_ic_mou_9 | roam_og_mou_6 | roam_og_mou_7 | roam_og_mou_8 | roam_og_mou_9 | loc_og_t2t_mou_6 | loc_og_t2t_mou_7 | loc_og_t2t_mou_8 | loc_og_t2t_mou_9 | loc_og_t2m_mou_6 | loc_og_t2m_mou_7 | loc_og_t2m_mou_8 | loc_og_t2m_mou_9 | loc_og_t2f_mou_6 | loc_og_t2f_mou_7 | loc_og_t2f_mou_8 | loc_og_t2f_mou_9 | loc_og_t2c_mou_6 | loc_og_t2c_mou_7 | loc_og_t2c_mou_8 | loc_og_t2c_mou_9 | loc_og_mou_6 | loc_og_mou_7 | loc_og_mou_8 | loc_og_mou_9 | std_og_t2t_mou_6 | std_og_t2t_mou_7 | std_og_t2t_mou_8 | std_og_t2t_mou_9 | std_og_t2m_mou_6 | std_og_t2m_mou_7 | std_og_t2m_mou_8 | std_og_t2m_mou_9 | std_og_t2f_mou_6 | std_og_t2f_mou_7 | std_og_t2f_mou_8 | std_og_t2f_mou_9 | std_og_t2c_mou_6 | std_og_t2c_mou_7 | std_og_t2c_mou_8 | std_og_t2c_mou_9 | std_og_mou_6 | std_og_mou_7 | std_og_mou_8 | std_og_mou_9 | isd_og_mou_6 | isd_og_mou_7 | isd_og_mou_8 | isd_og_mou_9 | spl_og_mou_6 | spl_og_mou_7 | spl_og_mou_8 | spl_og_mou_9 | og_others_6 | og_others_7 | og_others_8 | og_others_9 | total_og_mou_6 | total_og_mou_7 | total_og_mou_8 | total_og_mou_9 | loc_ic_t2t_mou_6 | loc_ic_t2t_mou_7 | loc_ic_t2t_mou_8 | loc_ic_t2t_mou_9 | loc_ic_t2m_mou_6 | loc_ic_t2m_mou_7 | loc_ic_t2m_mou_8 | loc_ic_t2m_mou_9 | loc_ic_t2f_mou_6 | loc_ic_t2f_mou_7 | loc_ic_t2f_mou_8 | loc_ic_t2f_mou_9 | loc_ic_mou_6 | loc_ic_mou_7 | loc_ic_mou_8 | loc_ic_mou_9 | std_ic_t2t_mou_6 | std_ic_t2t_mou_7 | std_ic_t2t_mou_8 | std_ic_t2t_mou_9 | std_ic_t2m_mou_6 | std_ic_t2m_mou_7 | std_ic_t2m_mou_8 | std_ic_t2m_mou_9 | std_ic_t2f_mou_6 | std_ic_t2f_mou_7 | std_ic_t2f_mou_8 | std_ic_t2f_mou_9 | std_ic_t2o_mou_6 | std_ic_t2o_mou_7 | std_ic_t2o_mou_8 | std_ic_t2o_mou_9 | std_ic_mou_6 | std_ic_mou_7 | std_ic_mou_8 | std_ic_mou_9 | total_ic_mou_6 | total_ic_mou_7 | total_ic_mou_8 | total_ic_mou_9 | spl_ic_mou_6 | spl_ic_mou_7 | spl_ic_mou_8 | spl_ic_mou_9 | isd_ic_mou_6 | isd_ic_mou_7 | isd_ic_mou_8 | isd_ic_mou_9 | ic_others_6 | ic_others_7 | ic_others_8 | ic_others_9 | total_rech_num_6 | total_rech_num_7 | total_rech_num_8 | total_rech_num_9 | total_rech_amt_6 | total_rech_amt_7 | total_rech_amt_8 | total_rech_amt_9 | max_rech_amt_6 | max_rech_amt_7 | max_rech_amt_8 | max_rech_amt_9 | last_day_rch_amt_6 | last_day_rch_amt_7 | last_day_rch_amt_8 | last_day_rch_amt_9 | total_rech_data_6 | total_rech_data_7 | total_rech_data_8 | total_rech_data_9 | max_rech_data_6 | max_rech_data_7 | max_rech_data_8 | max_rech_data_9 | count_rech_2g_6 | count_rech_2g_7 | count_rech_2g_8 | count_rech_2g_9 | count_rech_3g_6 | count_rech_3g_7 | count_rech_3g_8 | count_rech_3g_9 | av_rech_amt_data_6 | av_rech_amt_data_7 | av_rech_amt_data_8 | av_rech_amt_data_9 | vol_2g_mb_6 | vol_2g_mb_7 | vol_2g_mb_8 | vol_2g_mb_9 | vol_3g_mb_6 | vol_3g_mb_7 | vol_3g_mb_8 | vol_3g_mb_9 | arpu_3g_6 | arpu_3g_7 | arpu_3g_8 | arpu_3g_9 | arpu_2g_6 | arpu_2g_7 | arpu_2g_8 | arpu_2g_9 | night_pck_user_6 | night_pck_user_7 | night_pck_user_8 | night_pck_user_9 | monthly_2g_6 | monthly_2g_7 | monthly_2g_8 | monthly_2g_9 | sachet_2g_6 | sachet_2g_7 | sachet_2g_8 | sachet_2g_9 | monthly_3g_6 | monthly_3g_7 | monthly_3g_8 | monthly_3g_9 | sachet_3g_6 | sachet_3g_7 | sachet_3g_8 | sachet_3g_9 | fb_user_6 | fb_user_7 | fb_user_8 | fb_user_9 | aon | aug_vbc_3g | jul_vbc_3g | jun_vbc_3g | sep_vbc_3g | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 9.999900e+04 | 99999.0 | 98981.0 | 98981.0 | 98981.0 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.0 | 96140.0 | 94621.0 | 92254.0 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.0 | 96140.0 | 94621.0 | 92254.0 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 96062.000000 | 96140.000000 | 94621.000000 | 92254.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.00000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 25153.000000 | 25571.000000 | 26339.000000 | 25922.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 | 99999.000000 |
| mean | 7.001207e+09 | 109.0 | 0.0 | 0.0 | 0.0 | 282.987358 | 278.536648 | 279.154731 | 261.645069 | 132.395875 | 133.670805 | 133.018098 | 130.302327 | 197.935577 | 197.045133 | 196.574803 | 190.337222 | 9.950013 | 7.149898 | 7.292981 | 6.343841 | 13.911337 | 9.818732 | 9.971890 | 8.555519 | 47.100763 | 46.473010 | 45.887806 | 44.584446 | 93.342088 | 91.397131 | 91.755128 | 90.463192 | 3.751013 | 3.792985 | 3.677991 | 3.655123 | 1.123056 | 1.368500 | 1.433821 | 1.232726 | 144.201175 | 141.670476 | 141.328209 | 138.709970 | 79.829870 | 83.299598 | 83.282673 | 82.342919 | 87.299624 | 90.804137 | 89.838390 | 86.276622 | 1.129011 | 1.115010 | 1.067792 | 1.042362 | 0.0 | 0.0 | 0.0 | 0.0 | 168.261218 | 175.221436 | 174.191498 | 169.664466 | 0.798277 | 0.776572 | 0.791247 | 0.723892 | 3.916811 | 4.978279 | 5.053769 | 4.412767 | 0.454157 | 0.030235 | 0.033372 | 0.047456 | 305.133424 | 310.231175 | 304.119513 | 289.279198 | 47.922365 | 47.990520 | 47.211362 | 46.281794 | 107.475650 | 107.120493 | 108.460515 | 106.155471 | 12.084305 | 12.599697 | 11.751834 | 12.173105 | 167.491059 | 167.719540 | 167.432575 | 164.619293 | 9.575993 | 10.011904 | 9.883921 | 9.432479 | 20.722240 | 21.656415 | 21.183211 | 19.620913 | 2.156397 | 2.216923 | 2.085004 | 2.173419 | 0.0 | 0.0 | 0.0 | 0.0 | 32.457179 | 33.887833 | 33.154735 | 31.229344 | 200.130037 | 202.853055 | 198.750783 | 189.214260 | 0.061557 | 0.033585 | 0.040361 | 0.163137 | 7.460608 | 8.334936 | 8.442001 | 8.063003 | 0.854656 | 1.012960 | 0.970800 | 1.017162 | 7.558806 | 7.700367 | 7.212912 | 6.893019 | 327.514615 | 322.962970 | 324.157122 | 303.345673 | 104.637486 | 104.752398 | 107.728207 | 101.943889 | 63.156252 | 59.385804 | 62.641716 | 43.901249 | 2.463802 | 2.666419 | 2.651999 | 2.441170 | 126.393392 | 126.729459 | 125.717301 | 124.94144 | 1.864668 | 2.044699 | 2.016288 | 1.781807 | 0.599133 | 0.621720 | 0.635711 | 0.659363 | 192.600982 | 200.981292 | 197.526489 | 192.734315 | 51.904956 | 51.229937 | 50.170154 | 44.719701 | 121.396219 | 128.995847 | 135.410689 | 136.056613 | 89.555057 | 89.384120 | 91.173849 | 100.264116 | 86.398003 | 85.914450 | 86.599478 | 93.712026 | 0.025086 | 0.023034 | 0.020844 | 0.015971 | 0.079641 | 0.083221 | 0.081001 | 0.068781 | 0.389384 | 0.439634 | 0.450075 | 0.393104 | 0.075921 | 0.078581 | 0.082941 | 0.086341 | 0.074781 | 0.080401 | 0.084501 | 0.084581 | 0.914404 | 0.908764 | 0.890808 | 0.860968 | 1219.854749 | 68.170248 | 66.839062 | 60.021204 | 3.299373 |
| std | 6.956694e+05 | 0.0 | 0.0 | 0.0 | 0.0 | 328.439770 | 338.156291 | 344.474791 | 341.998630 | 297.207406 | 308.794148 | 308.951589 | 308.477668 | 316.851613 | 325.862803 | 327.170662 | 319.396092 | 72.825411 | 73.447948 | 68.402466 | 57.137537 | 71.443196 | 58.455762 | 64.713221 | 58.438186 | 150.856393 | 155.318705 | 151.184830 | 147.995390 | 162.780544 | 157.492308 | 156.537048 | 158.681454 | 14.230438 | 14.264986 | 13.270996 | 13.457549 | 5.448946 | 7.533445 | 6.783335 | 5.619021 | 251.751489 | 248.731086 | 245.914311 | 245.934517 | 252.476533 | 263.631042 | 265.486090 | 267.184991 | 255.617850 | 269.347911 | 271.757783 | 261.407396 | 7.984970 | 8.599406 | 7.905971 | 8.261770 | 0.0 | 0.0 | 0.0 | 0.0 | 389.948499 | 408.922934 | 411.633049 | 405.138658 | 25.765248 | 25.603052 | 25.544471 | 21.310751 | 14.936449 | 20.661570 | 17.855111 | 16.328227 | 4.125911 | 2.161717 | 2.323464 | 3.635466 | 463.419481 | 480.031178 | 478.150031 | 468.980002 | 140.258485 | 145.795055 | 137.239552 | 140.130610 | 171.713903 | 169.423620 | 169.723759 | 165.492803 | 40.140895 | 42.977442 | 39.125379 | 43.840776 | 254.124029 | 256.242707 | 250.025523 | 249.845070 | 54.330607 | 57.411971 | 55.073186 | 53.376273 | 80.793414 | 86.521393 | 83.683565 | 74.913050 | 16.495594 | 16.454061 | 15.812580 | 15.978601 | 0.0 | 0.0 | 0.0 | 0.0 | 106.283386 | 113.720168 | 110.127008 | 101.982303 | 291.651671 | 298.124954 | 289.321094 | 284.823024 | 0.160920 | 0.155725 | 0.146147 | 0.527860 | 59.722948 | 65.219829 | 63.813098 | 63.505379 | 11.955164 | 12.673099 | 13.284348 | 12.381172 | 7.078405 | 7.070422 | 7.203753 | 7.096261 | 398.019701 | 408.114237 | 416.540455 | 404.588583 | 120.614894 | 124.523970 | 126.902505 | 125.375109 | 97.356649 | 95.915385 | 104.431816 | 90.809712 | 2.789128 | 3.031593 | 3.074987 | 2.516339 | 108.477235 | 109.765267 | 109.437851 | 111.36376 | 2.570254 | 2.768332 | 2.720132 | 2.214701 | 1.274428 | 1.394524 | 1.422827 | 1.411513 | 192.646318 | 196.791224 | 191.301305 | 188.400286 | 213.356445 | 212.302217 | 212.347892 | 198.653570 | 544.247227 | 541.494013 | 558.775335 | 577.394194 | 193.124653 | 195.893924 | 188.180936 | 216.291992 | 172.767523 | 176.379871 | 168.247852 | 171.384224 | 0.156391 | 0.150014 | 0.142863 | 0.125366 | 0.295058 | 0.304395 | 0.299568 | 0.278120 | 1.497320 | 1.636230 | 1.630263 | 1.347140 | 0.363371 | 0.387231 | 0.384947 | 0.384978 | 0.568344 | 0.628334 | 0.660234 | 0.650457 | 0.279772 | 0.287950 | 0.311885 | 0.345987 | 954.733842 | 267.580450 | 271.201856 | 253.938223 | 32.408353 |
| min | 7.000000e+09 | 109.0 | 0.0 | 0.0 | 0.0 | -2258.709000 | -2014.045000 | -945.808000 | -1899.505000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.500000 | 0.500000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -30.820000 | -26.040000 | -24.490000 | -71.090000 | -35.830000 | -15.480000 | -55.830000 | -45.740000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 180.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 7.000606e+09 | 109.0 | 0.0 | 0.0 | 0.0 | 93.411500 | 86.980500 | 84.126000 | 62.685000 | 7.380000 | 6.660000 | 6.460000 | 5.330000 | 34.730000 | 32.190000 | 31.630000 | 27.130000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.660000 | 1.630000 | 1.600000 | 1.360000 | 9.880000 | 10.025000 | 9.810000 | 8.810000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 17.110000 | 17.480000 | 17.110000 | 15.560000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 44.740000 | 43.010000 | 38.580000 | 25.510000 | 2.990000 | 3.230000 | 3.280000 | 3.290000 | 17.290000 | 18.590000 | 18.930000 | 18.560000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 30.390000 | 32.460000 | 32.740000 | 32.290000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.010000 | 0.000000 | 38.530000 | 41.190000 | 38.290000 | 32.370000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 3.000000 | 3.000000 | 3.000000 | 3.000000 | 109.000000 | 100.000000 | 90.000000 | 52.000000 | 30.000000 | 30.000000 | 30.000000 | 28.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 25.000000 | 25.000000 | 25.000000 | 25.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 82.000000 | 92.000000 | 87.000000 | 69.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 467.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 7.001205e+09 | 109.0 | 0.0 | 0.0 | 0.0 | 197.704000 | 191.640000 | 192.080000 | 176.849000 | 34.310000 | 32.330000 | 32.360000 | 29.840000 | 96.310000 | 91.735000 | 92.140000 | 87.290000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 11.910000 | 11.610000 | 11.730000 | 11.260000 | 41.030000 | 40.430000 | 40.360000 | 39.120000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 65.110000 | 63.685000 | 63.730000 | 61.840000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 3.950000 | 3.635000 | 3.310000 | 2.500000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 11.640000 | 11.090000 | 10.410000 | 8.410000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 145.140000 | 141.530000 | 138.610000 | 125.460000 | 15.690000 | 15.740000 | 16.030000 | 15.660000 | 56.490000 | 57.080000 | 58.240000 | 56.610000 | 0.880000 | 0.930000 | 0.930000 | 0.960000 | 92.160000 | 92.550000 | 93.830000 | 91.640000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.030000 | 2.040000 | 2.030000 | 1.740000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 5.890000 | 5.960000 | 5.880000 | 5.380000 | 114.740000 | 116.340000 | 114.660000 | 105.890000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 6.000000 | 6.000000 | 5.000000 | 5.000000 | 230.000000 | 220.000000 | 225.000000 | 200.000000 | 110.000000 | 110.000000 | 98.000000 | 61.000000 | 30.000000 | 30.000000 | 30.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 2.000000 | 145.000000 | 145.000000 | 145.000000 | 145.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 154.000000 | 154.000000 | 154.000000 | 164.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.480000 | 0.420000 | 0.880000 | 2.605000 | 10.830000 | 8.810000 | 9.270000 | 14.800000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 863.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 75% | 7.001812e+09 | 109.0 | 0.0 | 0.0 | 0.0 | 371.060000 | 365.344500 | 369.370500 | 353.466500 | 118.740000 | 115.595000 | 115.860000 | 112.130000 | 231.860000 | 226.815000 | 228.260000 | 220.505000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 40.960000 | 39.910000 | 40.110000 | 39.280000 | 110.390000 | 107.560000 | 109.090000 | 106.810000 | 2.080000 | 2.090000 | 2.040000 | 1.940000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 168.270000 | 164.382500 | 166.110000 | 162.225000 | 30.807500 | 31.132500 | 30.580000 | 28.230000 | 53.290000 | 54.040000 | 52.490000 | 48.560000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 144.837500 | 150.615000 | 147.940000 | 142.105000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.430000 | 3.710000 | 3.990000 | 3.230000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 372.860000 | 378.570000 | 369.900000 | 353.480000 | 46.840000 | 45.810000 | 46.290000 | 45.180000 | 132.387500 | 130.960000 | 133.930000 | 130.490000 | 8.140000 | 8.282500 | 8.110000 | 8.140000 | 208.075000 | 205.837500 | 207.280000 | 202.737500 | 4.060000 | 4.230000 | 4.080000 | 3.510000 | 15.030000 | 15.740000 | 15.360000 | 14.260000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 26.930000 | 28.310000 | 27.710000 | 25.690000 | 251.670000 | 250.660000 | 248.990000 | 236.320000 | 0.000000 | 0.000000 | 0.000000 | 0.060000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 9.000000 | 10.000000 | 9.000000 | 9.000000 | 437.500000 | 428.000000 | 434.500000 | 415.000000 | 120.000000 | 128.000000 | 144.000000 | 144.000000 | 110.000000 | 110.000000 | 130.000000 | 50.000000 | 3.000000 | 3.000000 | 3.000000 | 3.000000 | 177.000000 | 177.000000 | 179.000000 | 179.00000 | 2.000000 | 2.000000 | 2.000000 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 252.000000 | 252.000000 | 252.000000 | 252.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 122.070000 | 119.560000 | 122.070000 | 140.010000 | 122.070000 | 122.070000 | 122.070000 | 140.010000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1807.500000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| max | 7.002411e+09 | 109.0 | 0.0 | 0.0 | 0.0 | 27731.088000 | 35145.834000 | 33543.624000 | 38805.617000 | 7376.710000 | 8157.780000 | 10752.560000 | 10427.460000 | 8362.360000 | 9667.130000 | 14007.340000 | 10310.760000 | 13724.380000 | 15371.040000 | 13095.360000 | 8464.030000 | 3775.110000 | 2812.040000 | 5337.040000 | 4428.460000 | 6431.330000 | 7400.660000 | 10752.560000 | 10389.240000 | 4729.740000 | 4557.140000 | 4961.330000 | 4429.880000 | 1466.030000 | 1196.430000 | 928.490000 | 927.410000 | 342.860000 | 916.240000 | 502.090000 | 339.840000 | 10643.380000 | 7674.780000 | 11039.910000 | 11099.260000 | 7366.580000 | 8133.660000 | 8014.430000 | 9382.580000 | 8314.760000 | 9284.740000 | 13950.040000 | 10223.430000 | 628.560000 | 544.630000 | 516.910000 | 808.490000 | 0.0 | 0.0 | 0.0 | 0.0 | 8432.990000 | 10936.730000 | 13980.060000 | 11495.310000 | 5900.660000 | 5490.280000 | 5681.540000 | 4244.530000 | 1023.210000 | 2372.510000 | 1390.880000 | 1635.710000 | 800.890000 | 370.130000 | 394.930000 | 787.790000 | 10674.030000 | 11365.310000 | 14043.060000 | 11517.730000 | 6626.930000 | 9324.660000 | 10696.230000 | 10598.830000 | 4693.860000 | 4455.830000 | 6274.190000 | 5463.780000 | 1872.340000 | 1983.010000 | 2433.060000 | 4318.280000 | 7454.630000 | 9669.910000 | 10830.160000 | 10796.290000 | 5459.560000 | 5800.930000 | 4309.290000 | 3819.830000 | 5647.160000 | 6141.880000 | 5645.860000 | 5689.760000 | 1351.110000 | 1136.080000 | 1394.890000 | 1431.960000 | 0.0 | 0.0 | 0.0 | 0.0 | 5712.110000 | 6745.760000 | 5957.140000 | 5956.660000 | 7716.140000 | 9699.010000 | 10830.380000 | 10796.590000 | 19.760000 | 21.330000 | 16.860000 | 62.380000 | 6789.410000 | 5289.540000 | 4127.010000 | 5057.740000 | 1362.940000 | 1495.940000 | 2327.510000 | 1005.230000 | 307.000000 | 138.000000 | 196.000000 | 131.000000 | 35190.000000 | 40335.000000 | 45320.000000 | 37235.000000 | 4010.000000 | 4010.000000 | 4449.000000 | 3399.000000 | 4010.000000 | 4010.000000 | 4449.000000 | 3399.000000 | 61.000000 | 54.000000 | 60.000000 | 84.000000 | 1555.000000 | 1555.000000 | 1555.000000 | 1555.00000 | 42.000000 | 48.000000 | 44.000000 | 40.000000 | 29.000000 | 35.000000 | 45.000000 | 49.000000 | 7546.000000 | 4365.000000 | 4076.000000 | 4061.000000 | 10285.900000 | 7873.550000 | 11117.610000 | 8993.950000 | 45735.400000 | 28144.120000 | 30036.060000 | 39221.270000 | 6362.280000 | 4980.900000 | 3716.900000 | 13884.310000 | 6433.760000 | 4809.360000 | 3483.170000 | 3467.170000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 4.000000 | 5.000000 | 5.000000 | 4.000000 | 42.000000 | 48.000000 | 44.000000 | 40.000000 | 14.000000 | 16.000000 | 16.000000 | 11.000000 | 29.000000 | 35.000000 | 41.000000 | 49.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 4337.000000 | 12916.220000 | 9165.600000 | 11166.210000 | 2618.570000 |
# Average recharge amount in Good phase (months 6 & 7)
df['avg_rech_amt_good_phase'] = df.apply(lambda x: (x['total_rech_amt_6'] + x['total_rech_amt_7'])/2, axis=1)
# Calculating threshold for High Value Customers
hvc_threshold = np.percentile(df['avg_rech_amt_good_phase'], 70)
hvc_threshold
368.5
# Filtering High Value Customers
df = df[df['avg_rech_amt_good_phase'] >= hvc_threshold]
df.shape
(30011, 227)
# Dropping temporary variables
df.drop(['avg_rech_amt_good_phase'], axis=1, inplace=True)
df_missing = df.isnull().mean() * 100
df_missing[df_missing > 40]
date_of_last_rech_data_6 62.023925 date_of_last_rech_data_7 61.140915 date_of_last_rech_data_8 60.834361 date_of_last_rech_data_9 61.810669 total_rech_data_6 62.023925 total_rech_data_7 61.140915 total_rech_data_8 60.834361 total_rech_data_9 61.810669 max_rech_data_6 62.023925 max_rech_data_7 61.140915 max_rech_data_8 60.834361 max_rech_data_9 61.810669 count_rech_2g_6 62.023925 count_rech_2g_7 61.140915 count_rech_2g_8 60.834361 count_rech_2g_9 61.810669 count_rech_3g_6 62.023925 count_rech_3g_7 61.140915 count_rech_3g_8 60.834361 count_rech_3g_9 61.810669 av_rech_amt_data_6 62.023925 av_rech_amt_data_7 61.140915 av_rech_amt_data_8 60.834361 av_rech_amt_data_9 61.810669 arpu_3g_6 62.023925 arpu_3g_7 61.140915 arpu_3g_8 60.834361 arpu_3g_9 61.810669 arpu_2g_6 62.023925 arpu_2g_7 61.140915 arpu_2g_8 60.834361 arpu_2g_9 61.810669 night_pck_user_6 62.023925 night_pck_user_7 61.140915 night_pck_user_8 60.834361 night_pck_user_9 61.810669 fb_user_6 62.023925 fb_user_7 61.140915 fb_user_8 60.834361 fb_user_9 61.810669 dtype: float64
# Removing features with more than 40% missing values
df.drop(df_missing[df_missing > 40].index, axis=1, inplace=True)
df.shape
(30011, 186)
# Deleting the rows having more than 50% missing values
df.drop(df[(df.isnull().sum(axis=1)) > (len(df.columns)//2)].index, inplace=True)
df.shape
(29897, 186)
df_missing = df.isnull().mean() * 100
df_missing[df_missing > 0].sort_values(ascending=False)
loc_ic_mou_9 5.318259
roam_og_mou_9 5.318259
loc_og_t2t_mou_9 5.318259
loc_ic_t2f_mou_9 5.318259
loc_og_t2m_mou_9 5.318259
...
std_ic_t2m_mou_7 0.632170
last_date_of_month_8 0.474964
date_of_last_rech_7 0.327792
date_of_last_rech_6 0.207379
last_date_of_month_7 0.053517
Length: 123, dtype: float64
Looks like MOU features have large number of missing values
# Deleting those rows with missing values in MOU columns
df.dropna(subset=filter(lambda x: 'mou' in x, df_missing[df_missing > 0].index), inplace=True)
df.shape
(27991, 186)
df_missing = df.isnull().mean() * 100
df_missing[df_missing > 0].sort_values(ascending=False)
date_of_last_rech_9 1.021757 date_of_last_rech_8 1.011039 date_of_last_rech_7 0.203637 date_of_last_rech_6 0.096460 dtype: float64
Date columns needs to be removed as they aren't important for our analysis.
# Dropping date columns
df.drop([c for c in df.columns if 'date' in c], axis=1, inplace=True)
# Dropping mobile_number column as it can't be used in analysis
df.drop('mobile_number', axis=1, inplace=True)
df.shape
(27991, 177)
Now, there's no missing values in the Data Frame.
# Dropping features with only one category as they add no value to the model building
df_nunique = df.nunique()
df.drop(df_nunique[df_nunique < 2].index, axis=1, inplace=True)
df.shape
(27991, 165)
df.duplicated().sum()
0
# Check for possible wrong data type
df.select_dtypes(include=['object']).columns
Index([], dtype='object')
# Calculating target variable based on usage in the last month
def calculate_target(x):
if ((x['total_ic_mou_9'] == 0) & (x['total_og_mou_9'] == 0) & (x['vol_2g_mb_9'] == 0) & (x['vol_3g_mb_9'] == 0)):
return 1
else:
return 0
df['churn'] = df.apply(calculate_target, axis=1)
# Making sure that `churn` variable isn't treated as numerical variable
df['churn'] = df['churn'].astype('category')
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 27991 entries, 8 to 99997 Columns: 166 entries, arpu_6 to churn dtypes: category(1), float64(132), int64(33) memory usage: 35.5 MB
plt.figure(figsize=(8,8))
plt.pie(df['churn'].value_counts(), labels=['No Churn', 'Churn'], autopct='%.2f%%', colors=sns.color_palette('bright'))
plt.title('Target Variable Data Imbalance', fontsize=20)
plt.show()
We'll remove columns for churn month after raw EDA
[i for i in df.columns if i[-1:] not in ['6', '7', '8', '9']]
['aon', 'aug_vbc_3g', 'jul_vbc_3g', 'jun_vbc_3g', 'sep_vbc_3g', 'churn']
# Making `vbc_3g` feature names consistent
df.rename(columns={'jun_vbc_3g': 'vbc_3g_6','jul_vbc_3g': 'vbc_3g_7','aug_vbc_3g': 'vbc_3g_8', 'sep_vbc_3g': 'vbc_3g_9'}, inplace=True)
[i for i in df.columns if i[-1:] not in ['6', '7', '8', '9']]
['aon', 'churn']
# Listing primary available predictors that have been repeated for multiple months
unique_columns = map(lambda x: x[0:-2], [i for i in df.columns if i[-1:] in ['6', '7', '8', '9']])
unique_columns = list(set(unique_columns))
unique_columns
['og_others', 'isd_ic_mou', 'isd_og_mou', 'loc_og_mou', 'monthly_3g', 'roam_og_mou', 'std_ic_t2f_mou', 'std_og_t2m_mou', 'roam_ic_mou', 'last_day_rch_amt', 'std_og_t2f_mou', 'total_og_mou', 'loc_og_t2c_mou', 'loc_og_t2f_mou', 'offnet_mou', 'std_ic_t2t_mou', 'total_rech_num', 'std_ic_mou', 'sachet_3g', 'loc_ic_t2t_mou', 'ic_others', 'loc_ic_t2f_mou', 'sachet_2g', 'spl_ic_mou', 'spl_og_mou', 'total_rech_amt', 'loc_og_t2t_mou', 'std_og_t2t_mou', 'max_rech_amt', 'vbc_3g', 'vol_3g_mb', 'total_ic_mou', 'loc_ic_t2m_mou', 'arpu', 'std_ic_t2m_mou', 'vol_2g_mb', 'monthly_2g', 'onnet_mou', 'loc_og_t2m_mou', 'loc_ic_mou', 'std_og_mou']
len(unique_columns)
41
There are 41 predictor variables that have been repeated across four months. There's one predictor variable that hasn't been repeated.
unrepeated_columns = ['aon']
def treat_outliers(col):
q1 = np.percentile(df[col], 25)
q3 = np.percentile(df[col], 75)
IQR = q3-q1
lwr_bound = q1-(1.5*IQR)
upr_bound = q3+(1.5*IQR)
df[col] = df[col].apply(lambda x: lwr_bound if x < lwr_bound else upr_bound if x > upr_bound else x)
for col_set in unique_columns:
for col in [col_set+'_6', col_set+'_7', col_set+'_8', col_set+'_9']:
treat_outliers(col)
for col in unique_columns:
df[col+'_good'] = (df[col+'_6'] + df[col+'_7'])/2
df[col+'_action'] = df[col+'_8']
df[col+'_churn'] = df[col+'_9']
df.drop([col+'_'+str(i) for i in [6, 7, 8, 9]], axis=1, inplace=True)
df.head()
| aon | churn | og_others_good | og_others_action | og_others_churn | isd_ic_mou_good | isd_ic_mou_action | isd_ic_mou_churn | isd_og_mou_good | isd_og_mou_action | isd_og_mou_churn | loc_og_mou_good | loc_og_mou_action | loc_og_mou_churn | monthly_3g_good | monthly_3g_action | monthly_3g_churn | roam_og_mou_good | roam_og_mou_action | roam_og_mou_churn | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_ic_t2f_mou_churn | std_og_t2m_mou_good | std_og_t2m_mou_action | std_og_t2m_mou_churn | roam_ic_mou_good | roam_ic_mou_action | roam_ic_mou_churn | last_day_rch_amt_good | last_day_rch_amt_action | last_day_rch_amt_churn | std_og_t2f_mou_good | std_og_t2f_mou_action | std_og_t2f_mou_churn | total_og_mou_good | total_og_mou_action | total_og_mou_churn | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2c_mou_churn | loc_og_t2f_mou_good | loc_og_t2f_mou_action | loc_og_t2f_mou_churn | offnet_mou_good | offnet_mou_action | offnet_mou_churn | std_ic_t2t_mou_good | std_ic_t2t_mou_action | std_ic_t2t_mou_churn | total_rech_num_good | total_rech_num_action | total_rech_num_churn | std_ic_mou_good | std_ic_mou_action | std_ic_mou_churn | sachet_3g_good | sachet_3g_action | sachet_3g_churn | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | loc_ic_t2t_mou_churn | ic_others_good | ic_others_action | ic_others_churn | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | loc_ic_t2f_mou_churn | sachet_2g_good | sachet_2g_action | sachet_2g_churn | spl_ic_mou_good | spl_ic_mou_action | spl_ic_mou_churn | spl_og_mou_good | spl_og_mou_action | spl_og_mou_churn | total_rech_amt_good | total_rech_amt_action | total_rech_amt_churn | loc_og_t2t_mou_good | loc_og_t2t_mou_action | loc_og_t2t_mou_churn | std_og_t2t_mou_good | std_og_t2t_mou_action | std_og_t2t_mou_churn | max_rech_amt_good | max_rech_amt_action | max_rech_amt_churn | vbc_3g_good | vbc_3g_action | vbc_3g_churn | vol_3g_mb_good | vol_3g_mb_action | vol_3g_mb_churn | total_ic_mou_good | total_ic_mou_action | total_ic_mou_churn | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | loc_ic_t2m_mou_churn | arpu_good | arpu_action | arpu_churn | std_ic_t2m_mou_good | std_ic_t2m_mou_action | std_ic_t2m_mou_churn | vol_2g_mb_good | vol_2g_mb_action | vol_2g_mb_churn | monthly_2g_good | monthly_2g_action | monthly_2g_churn | onnet_mou_good | onnet_mou_action | onnet_mou_churn | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_og_t2m_mou_churn | loc_ic_mou_good | loc_ic_mou_action | loc_ic_mou_churn | std_og_mou_good | std_og_mou_action | std_og_mou_churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 333.135 | 63.04 | 60.71 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 11.8650 | 75.69 | 74.13 | 0.0 | 0.0 | 0.0 | 25.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.010 | 171.56 | 142.18 | 0.0 | 0.325 | 0.0 | 0.000 | 0.00 | 0.00 | 87.645 | 136.48 | 108.71 | 0.2900 | 0.1000 | 0.00 | 20.0 | 14.0 | 15.0 | 13.5450 | 0.750 | 13.53 | 0.0 | 0.0 | 0.0 | 16.840 | 0.31 | 4.03 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 10.23 | 0.00 | 519.0 | 120.0 | 186.0 | 219.75375 | 12.49 | 26.13 | 124.9950 | 22.58 | 7.33 | 122.0 | 30.0 | 36.0 | 4.03125 | 21.03 | 0.0 | 6.76875 | 1.425 | 0.0 | 66.175 | 16.5400 | 34.91 | 35.780 | 15.48 | 17.34 | 435.4720 | 137.362 | 166.787 | 13.255 | 0.6500 | 13.5300 | 20.64375 | 0.03 | 0.0 | 0.0 | 0.0 | 0.0 | 382.3600 | 35.08 | 33.46 | 75.770 | 50.54 | 34.58 | 52.630 | 15.79 | 21.38 | 136.870 | 98.28 | 81.46 |
| 13 | 2607 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.395 | 353.99 | 119.69 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.7 | 0.0 | 154.1500 | 156.94 | 96.01 | 0.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 549.750 | 1015.26 | 446.09 | 0.0 | 0.325 | 0.0 | 2.270 | 12.90 | 5.29 | 266.295 | 482.46 | 214.06 | 27.0125 | 24.0125 | 21.20 | 5.0 | 11.0 | 7.0 | 91.4850 | 111.305 | 103.81 | 0.0 | 0.0 | 0.0 | 37.730 | 52.58 | 24.98 | 0.15 | 0.2 | 0.125 | 2.405 | 7.4900 | 8.510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 4.78 | 0.00 | 380.0 | 717.0 | 353.0 | 27.91000 | 36.01 | 6.14 | 263.8775 | 370.75 | 230.38 | 110.0 | 130.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 514.170 | 849.6075 | 631.86 | 80.705 | 195.18 | 104.79 | 349.2585 | 593.260 | 322.732 | 49.050 | 67.2475 | 61.2425 | 0.00000 | 0.02 | 0.0 | 0.0 | 0.0 | 0.0 | 305.0750 | 534.24 | 244.81 | 99.205 | 294.46 | 108.24 | 120.845 | 255.26 | 138.29 | 420.355 | 655.18 | 326.39 |
| 16 | 511 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 265.775 | 205.93 | 233.04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.255 | 0.7 | 0.0 | 43.7500 | 18.29 | 13.79 | 0.0 | 0.0 | 0.0 | 100.0 | 130.0 | 220.0 | 0.0 | 0.0 | 0.0 | 316.410 | 233.38 | 255.74 | 0.0 | 0.000 | 0.0 | 2.240 | 10.26 | 4.66 | 263.015 | 162.76 | 224.39 | 27.0125 | 6.3300 | 16.66 | 8.0 | 2.0 | 1.0 | 105.8075 | 82.440 | 46.24 | 0.0 | 0.0 | 0.0 | 56.385 | 28.89 | 50.23 | 0.00 | 0.0 | 0.000 | 8.695 | 31.6375 | 31.275 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.63 | 5.91 | 0.00 | 459.0 | 160.0 | 220.0 | 53.88500 | 67.38 | 26.88 | 5.2350 | 3.23 | 4.46 | 110.0 | 130.0 | 220.0 | 1.22500 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 373.210 | 294.4600 | 334.56 | 188.250 | 150.16 | 172.86 | 365.4220 | 187.894 | 206.490 | 45.880 | 67.2475 | 29.5800 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 62.2600 | 70.61 | 31.34 | 209.640 | 128.28 | 201.49 | 253.345 | 211.78 | 288.31 | 48.990 | 21.53 | 22.69 |
| 17 | 667 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.770 | 0.00 | 45.79 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.7750 | 0.00 | 78.51 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 625.170 | 0.00 | 631.08 | 0.0 | 0.000 | 0.0 | 0.000 | 0.00 | 0.00 | 30.820 | 5.54 | 87.89 | 4.3150 | 0.0000 | 0.00 | 10.5 | 4.0 | 10.0 | 4.9550 | 0.000 | 1.63 | 0.0 | 0.0 | 0.0 | 17.270 | 0.00 | 40.91 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.710 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.29 | 0.00 | 2.64 | 408.0 | 30.0 | 335.0 | 19.49500 | 0.00 | 36.41 | 221.2925 | 0.00 | 309.45 | 55.0 | 30.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 47.095 | 0.0000 | 87.13 | 24.860 | 0.00 | 43.86 | 354.4940 | 25.499 | 257.583 | 0.640 | 0.0000 | 1.6300 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 413.7025 | 7.79 | 558.51 | 29.270 | 0.00 | 9.38 | 42.135 | 0.00 | 85.49 | 574.635 | 0.00 | 582.63 |
| 21 | 720 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 95.565 | 129.74 | 137.53 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 539.6925 | 444.08 | 381.95 | 0.0 | 0.0 | 0.0 | 25.0 | 50.0 | 30.0 | 0.0 | 0.0 | 0.0 | 944.565 | 1068.54 | 1031.53 | 0.0 | 0.000 | 0.0 | 0.375 | 1.58 | 0.00 | 827.305 | 983.39 | 869.89 | 10.0200 | 0.1800 | 16.74 | 24.0 | 25.0 | 17.0 | 62.9850 | 101.930 | 103.81 | 0.0 | 0.0 | 0.0 | 6.335 | 19.54 | 17.99 | 0.00 | 0.0 | 0.000 | 3.725 | 10.3900 | 8.410 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 640.0 | 718.0 | 680.0 | 5.32000 | 23.34 | 29.98 | 111.9350 | 61.79 | 131.64 | 50.0 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 169.490 | 261.0400 | 252.61 | 96.430 | 129.16 | 113.46 | 556.1030 | 637.760 | 578.596 | 52.960 | 67.2475 | 61.2425 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 117.2600 | 85.14 | 161.63 | 89.870 | 104.81 | 107.54 | 106.505 | 159.11 | 139.88 | 848.990 | 938.79 | 893.99 |
df['total_mou_good'] = df['total_og_mou_good'] + df['total_ic_mou_good']
df['total_mou_action'] = df['total_og_mou_action'] + df['total_ic_mou_action']
df['total_mou_churn'] = df['total_og_mou_churn'] + df['total_ic_mou_churn']
unique_columns.append('total_mou')
df.head()
| aon | churn | og_others_good | og_others_action | og_others_churn | isd_ic_mou_good | isd_ic_mou_action | isd_ic_mou_churn | isd_og_mou_good | isd_og_mou_action | isd_og_mou_churn | loc_og_mou_good | loc_og_mou_action | loc_og_mou_churn | monthly_3g_good | monthly_3g_action | monthly_3g_churn | roam_og_mou_good | roam_og_mou_action | roam_og_mou_churn | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_ic_t2f_mou_churn | std_og_t2m_mou_good | std_og_t2m_mou_action | std_og_t2m_mou_churn | roam_ic_mou_good | roam_ic_mou_action | roam_ic_mou_churn | last_day_rch_amt_good | last_day_rch_amt_action | last_day_rch_amt_churn | std_og_t2f_mou_good | std_og_t2f_mou_action | std_og_t2f_mou_churn | total_og_mou_good | total_og_mou_action | total_og_mou_churn | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2c_mou_churn | loc_og_t2f_mou_good | loc_og_t2f_mou_action | loc_og_t2f_mou_churn | offnet_mou_good | offnet_mou_action | offnet_mou_churn | std_ic_t2t_mou_good | std_ic_t2t_mou_action | std_ic_t2t_mou_churn | total_rech_num_good | total_rech_num_action | total_rech_num_churn | std_ic_mou_good | std_ic_mou_action | std_ic_mou_churn | sachet_3g_good | sachet_3g_action | sachet_3g_churn | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | loc_ic_t2t_mou_churn | ic_others_good | ic_others_action | ic_others_churn | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | loc_ic_t2f_mou_churn | sachet_2g_good | sachet_2g_action | sachet_2g_churn | spl_ic_mou_good | spl_ic_mou_action | spl_ic_mou_churn | spl_og_mou_good | spl_og_mou_action | spl_og_mou_churn | total_rech_amt_good | total_rech_amt_action | total_rech_amt_churn | loc_og_t2t_mou_good | loc_og_t2t_mou_action | loc_og_t2t_mou_churn | std_og_t2t_mou_good | std_og_t2t_mou_action | std_og_t2t_mou_churn | max_rech_amt_good | max_rech_amt_action | max_rech_amt_churn | vbc_3g_good | vbc_3g_action | vbc_3g_churn | vol_3g_mb_good | vol_3g_mb_action | vol_3g_mb_churn | total_ic_mou_good | total_ic_mou_action | total_ic_mou_churn | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | loc_ic_t2m_mou_churn | arpu_good | arpu_action | arpu_churn | std_ic_t2m_mou_good | std_ic_t2m_mou_action | std_ic_t2m_mou_churn | vol_2g_mb_good | vol_2g_mb_action | vol_2g_mb_churn | monthly_2g_good | monthly_2g_action | monthly_2g_churn | onnet_mou_good | onnet_mou_action | onnet_mou_churn | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_og_t2m_mou_churn | loc_ic_mou_good | loc_ic_mou_action | loc_ic_mou_churn | std_og_mou_good | std_og_mou_action | std_og_mou_churn | total_mou_good | total_mou_action | total_mou_churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 333.135 | 63.04 | 60.71 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 11.8650 | 75.69 | 74.13 | 0.0 | 0.0 | 0.0 | 25.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.010 | 171.56 | 142.18 | 0.0 | 0.325 | 0.0 | 0.000 | 0.00 | 0.00 | 87.645 | 136.48 | 108.71 | 0.2900 | 0.1000 | 0.00 | 20.0 | 14.0 | 15.0 | 13.5450 | 0.750 | 13.53 | 0.0 | 0.0 | 0.0 | 16.840 | 0.31 | 4.03 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 10.23 | 0.00 | 519.0 | 120.0 | 186.0 | 219.75375 | 12.49 | 26.13 | 124.9950 | 22.58 | 7.33 | 122.0 | 30.0 | 36.0 | 4.03125 | 21.03 | 0.0 | 6.76875 | 1.425 | 0.0 | 66.175 | 16.5400 | 34.91 | 35.780 | 15.48 | 17.34 | 435.4720 | 137.362 | 166.787 | 13.255 | 0.6500 | 13.5300 | 20.64375 | 0.03 | 0.0 | 0.0 | 0.0 | 0.0 | 382.3600 | 35.08 | 33.46 | 75.770 | 50.54 | 34.58 | 52.630 | 15.79 | 21.38 | 136.870 | 98.28 | 81.46 | 536.185 | 188.1000 | 177.09 |
| 13 | 2607 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.395 | 353.99 | 119.69 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.7 | 0.0 | 154.1500 | 156.94 | 96.01 | 0.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 549.750 | 1015.26 | 446.09 | 0.0 | 0.325 | 0.0 | 2.270 | 12.90 | 5.29 | 266.295 | 482.46 | 214.06 | 27.0125 | 24.0125 | 21.20 | 5.0 | 11.0 | 7.0 | 91.4850 | 111.305 | 103.81 | 0.0 | 0.0 | 0.0 | 37.730 | 52.58 | 24.98 | 0.15 | 0.2 | 0.125 | 2.405 | 7.4900 | 8.510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 4.78 | 0.00 | 380.0 | 717.0 | 353.0 | 27.91000 | 36.01 | 6.14 | 263.8775 | 370.75 | 230.38 | 110.0 | 130.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 514.170 | 849.6075 | 631.86 | 80.705 | 195.18 | 104.79 | 349.2585 | 593.260 | 322.732 | 49.050 | 67.2475 | 61.2425 | 0.00000 | 0.02 | 0.0 | 0.0 | 0.0 | 0.0 | 305.0750 | 534.24 | 244.81 | 99.205 | 294.46 | 108.24 | 120.845 | 255.26 | 138.29 | 420.355 | 655.18 | 326.39 | 1063.920 | 1864.8675 | 1077.95 |
| 16 | 511 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 265.775 | 205.93 | 233.04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.255 | 0.7 | 0.0 | 43.7500 | 18.29 | 13.79 | 0.0 | 0.0 | 0.0 | 100.0 | 130.0 | 220.0 | 0.0 | 0.0 | 0.0 | 316.410 | 233.38 | 255.74 | 0.0 | 0.000 | 0.0 | 2.240 | 10.26 | 4.66 | 263.015 | 162.76 | 224.39 | 27.0125 | 6.3300 | 16.66 | 8.0 | 2.0 | 1.0 | 105.8075 | 82.440 | 46.24 | 0.0 | 0.0 | 0.0 | 56.385 | 28.89 | 50.23 | 0.00 | 0.0 | 0.000 | 8.695 | 31.6375 | 31.275 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.63 | 5.91 | 0.00 | 459.0 | 160.0 | 220.0 | 53.88500 | 67.38 | 26.88 | 5.2350 | 3.23 | 4.46 | 110.0 | 130.0 | 220.0 | 1.22500 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 373.210 | 294.4600 | 334.56 | 188.250 | 150.16 | 172.86 | 365.4220 | 187.894 | 206.490 | 45.880 | 67.2475 | 29.5800 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 62.2600 | 70.61 | 31.34 | 209.640 | 128.28 | 201.49 | 253.345 | 211.78 | 288.31 | 48.990 | 21.53 | 22.69 | 689.620 | 527.8400 | 590.30 |
| 17 | 667 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.770 | 0.00 | 45.79 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.7750 | 0.00 | 78.51 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 625.170 | 0.00 | 631.08 | 0.0 | 0.000 | 0.0 | 0.000 | 0.00 | 0.00 | 30.820 | 5.54 | 87.89 | 4.3150 | 0.0000 | 0.00 | 10.5 | 4.0 | 10.0 | 4.9550 | 0.000 | 1.63 | 0.0 | 0.0 | 0.0 | 17.270 | 0.00 | 40.91 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.710 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.29 | 0.00 | 2.64 | 408.0 | 30.0 | 335.0 | 19.49500 | 0.00 | 36.41 | 221.2925 | 0.00 | 309.45 | 55.0 | 30.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 47.095 | 0.0000 | 87.13 | 24.860 | 0.00 | 43.86 | 354.4940 | 25.499 | 257.583 | 0.640 | 0.0000 | 1.6300 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 413.7025 | 7.79 | 558.51 | 29.270 | 0.00 | 9.38 | 42.135 | 0.00 | 85.49 | 574.635 | 0.00 | 582.63 | 672.265 | 0.0000 | 718.21 |
| 21 | 720 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 95.565 | 129.74 | 137.53 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 539.6925 | 444.08 | 381.95 | 0.0 | 0.0 | 0.0 | 25.0 | 50.0 | 30.0 | 0.0 | 0.0 | 0.0 | 944.565 | 1068.54 | 1031.53 | 0.0 | 0.000 | 0.0 | 0.375 | 1.58 | 0.00 | 827.305 | 983.39 | 869.89 | 10.0200 | 0.1800 | 16.74 | 24.0 | 25.0 | 17.0 | 62.9850 | 101.930 | 103.81 | 0.0 | 0.0 | 0.0 | 6.335 | 19.54 | 17.99 | 0.00 | 0.0 | 0.000 | 3.725 | 10.3900 | 8.410 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 640.0 | 718.0 | 680.0 | 5.32000 | 23.34 | 29.98 | 111.9350 | 61.79 | 131.64 | 50.0 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 169.490 | 261.0400 | 252.61 | 96.430 | 129.16 | 113.46 | 556.1030 | 637.760 | 578.596 | 52.960 | 67.2475 | 61.2425 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 117.2600 | 85.14 | 161.63 | 89.870 | 104.81 | 107.54 | 106.505 | 159.11 | 139.88 | 848.990 | 938.79 | 893.99 | 1114.055 | 1329.5800 | 1284.14 |
mou_decrease¶Whether Minutes of Usage dropped from good phase to action phase
df['mou_decrease'] = df.apply(lambda x: 1 if (x['total_mou_action'] - x['total_mou_good']) < 0 else 0, axis=1)
df['mou_decrease'] = df['mou_decrease'].astype('category')
df.head()
| aon | churn | og_others_good | og_others_action | og_others_churn | isd_ic_mou_good | isd_ic_mou_action | isd_ic_mou_churn | isd_og_mou_good | isd_og_mou_action | isd_og_mou_churn | loc_og_mou_good | loc_og_mou_action | loc_og_mou_churn | monthly_3g_good | monthly_3g_action | monthly_3g_churn | roam_og_mou_good | roam_og_mou_action | roam_og_mou_churn | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_ic_t2f_mou_churn | std_og_t2m_mou_good | std_og_t2m_mou_action | std_og_t2m_mou_churn | roam_ic_mou_good | roam_ic_mou_action | roam_ic_mou_churn | last_day_rch_amt_good | last_day_rch_amt_action | last_day_rch_amt_churn | std_og_t2f_mou_good | std_og_t2f_mou_action | std_og_t2f_mou_churn | total_og_mou_good | total_og_mou_action | total_og_mou_churn | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2c_mou_churn | loc_og_t2f_mou_good | loc_og_t2f_mou_action | loc_og_t2f_mou_churn | offnet_mou_good | offnet_mou_action | offnet_mou_churn | std_ic_t2t_mou_good | std_ic_t2t_mou_action | std_ic_t2t_mou_churn | total_rech_num_good | total_rech_num_action | total_rech_num_churn | std_ic_mou_good | std_ic_mou_action | std_ic_mou_churn | sachet_3g_good | sachet_3g_action | sachet_3g_churn | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | loc_ic_t2t_mou_churn | ic_others_good | ic_others_action | ic_others_churn | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | loc_ic_t2f_mou_churn | sachet_2g_good | sachet_2g_action | sachet_2g_churn | spl_ic_mou_good | spl_ic_mou_action | spl_ic_mou_churn | spl_og_mou_good | spl_og_mou_action | spl_og_mou_churn | total_rech_amt_good | total_rech_amt_action | total_rech_amt_churn | loc_og_t2t_mou_good | loc_og_t2t_mou_action | loc_og_t2t_mou_churn | std_og_t2t_mou_good | std_og_t2t_mou_action | std_og_t2t_mou_churn | max_rech_amt_good | max_rech_amt_action | max_rech_amt_churn | vbc_3g_good | vbc_3g_action | vbc_3g_churn | vol_3g_mb_good | vol_3g_mb_action | vol_3g_mb_churn | total_ic_mou_good | total_ic_mou_action | total_ic_mou_churn | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | loc_ic_t2m_mou_churn | arpu_good | arpu_action | arpu_churn | std_ic_t2m_mou_good | std_ic_t2m_mou_action | std_ic_t2m_mou_churn | vol_2g_mb_good | vol_2g_mb_action | vol_2g_mb_churn | monthly_2g_good | monthly_2g_action | monthly_2g_churn | onnet_mou_good | onnet_mou_action | onnet_mou_churn | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_og_t2m_mou_churn | loc_ic_mou_good | loc_ic_mou_action | loc_ic_mou_churn | std_og_mou_good | std_og_mou_action | std_og_mou_churn | total_mou_good | total_mou_action | total_mou_churn | mou_decrease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 333.135 | 63.04 | 60.71 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 11.8650 | 75.69 | 74.13 | 0.0 | 0.0 | 0.0 | 25.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.010 | 171.56 | 142.18 | 0.0 | 0.325 | 0.0 | 0.000 | 0.00 | 0.00 | 87.645 | 136.48 | 108.71 | 0.2900 | 0.1000 | 0.00 | 20.0 | 14.0 | 15.0 | 13.5450 | 0.750 | 13.53 | 0.0 | 0.0 | 0.0 | 16.840 | 0.31 | 4.03 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 10.23 | 0.00 | 519.0 | 120.0 | 186.0 | 219.75375 | 12.49 | 26.13 | 124.9950 | 22.58 | 7.33 | 122.0 | 30.0 | 36.0 | 4.03125 | 21.03 | 0.0 | 6.76875 | 1.425 | 0.0 | 66.175 | 16.5400 | 34.91 | 35.780 | 15.48 | 17.34 | 435.4720 | 137.362 | 166.787 | 13.255 | 0.6500 | 13.5300 | 20.64375 | 0.03 | 0.0 | 0.0 | 0.0 | 0.0 | 382.3600 | 35.08 | 33.46 | 75.770 | 50.54 | 34.58 | 52.630 | 15.79 | 21.38 | 136.870 | 98.28 | 81.46 | 536.185 | 188.1000 | 177.09 | 1 |
| 13 | 2607 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.395 | 353.99 | 119.69 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.7 | 0.0 | 154.1500 | 156.94 | 96.01 | 0.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 549.750 | 1015.26 | 446.09 | 0.0 | 0.325 | 0.0 | 2.270 | 12.90 | 5.29 | 266.295 | 482.46 | 214.06 | 27.0125 | 24.0125 | 21.20 | 5.0 | 11.0 | 7.0 | 91.4850 | 111.305 | 103.81 | 0.0 | 0.0 | 0.0 | 37.730 | 52.58 | 24.98 | 0.15 | 0.2 | 0.125 | 2.405 | 7.4900 | 8.510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 4.78 | 0.00 | 380.0 | 717.0 | 353.0 | 27.91000 | 36.01 | 6.14 | 263.8775 | 370.75 | 230.38 | 110.0 | 130.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 514.170 | 849.6075 | 631.86 | 80.705 | 195.18 | 104.79 | 349.2585 | 593.260 | 322.732 | 49.050 | 67.2475 | 61.2425 | 0.00000 | 0.02 | 0.0 | 0.0 | 0.0 | 0.0 | 305.0750 | 534.24 | 244.81 | 99.205 | 294.46 | 108.24 | 120.845 | 255.26 | 138.29 | 420.355 | 655.18 | 326.39 | 1063.920 | 1864.8675 | 1077.95 | 0 |
| 16 | 511 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 265.775 | 205.93 | 233.04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.255 | 0.7 | 0.0 | 43.7500 | 18.29 | 13.79 | 0.0 | 0.0 | 0.0 | 100.0 | 130.0 | 220.0 | 0.0 | 0.0 | 0.0 | 316.410 | 233.38 | 255.74 | 0.0 | 0.000 | 0.0 | 2.240 | 10.26 | 4.66 | 263.015 | 162.76 | 224.39 | 27.0125 | 6.3300 | 16.66 | 8.0 | 2.0 | 1.0 | 105.8075 | 82.440 | 46.24 | 0.0 | 0.0 | 0.0 | 56.385 | 28.89 | 50.23 | 0.00 | 0.0 | 0.000 | 8.695 | 31.6375 | 31.275 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.63 | 5.91 | 0.00 | 459.0 | 160.0 | 220.0 | 53.88500 | 67.38 | 26.88 | 5.2350 | 3.23 | 4.46 | 110.0 | 130.0 | 220.0 | 1.22500 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 373.210 | 294.4600 | 334.56 | 188.250 | 150.16 | 172.86 | 365.4220 | 187.894 | 206.490 | 45.880 | 67.2475 | 29.5800 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 62.2600 | 70.61 | 31.34 | 209.640 | 128.28 | 201.49 | 253.345 | 211.78 | 288.31 | 48.990 | 21.53 | 22.69 | 689.620 | 527.8400 | 590.30 | 1 |
| 17 | 667 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.770 | 0.00 | 45.79 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.7750 | 0.00 | 78.51 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 625.170 | 0.00 | 631.08 | 0.0 | 0.000 | 0.0 | 0.000 | 0.00 | 0.00 | 30.820 | 5.54 | 87.89 | 4.3150 | 0.0000 | 0.00 | 10.5 | 4.0 | 10.0 | 4.9550 | 0.000 | 1.63 | 0.0 | 0.0 | 0.0 | 17.270 | 0.00 | 40.91 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.710 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.29 | 0.00 | 2.64 | 408.0 | 30.0 | 335.0 | 19.49500 | 0.00 | 36.41 | 221.2925 | 0.00 | 309.45 | 55.0 | 30.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 47.095 | 0.0000 | 87.13 | 24.860 | 0.00 | 43.86 | 354.4940 | 25.499 | 257.583 | 0.640 | 0.0000 | 1.6300 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 413.7025 | 7.79 | 558.51 | 29.270 | 0.00 | 9.38 | 42.135 | 0.00 | 85.49 | 574.635 | 0.00 | 582.63 | 672.265 | 0.0000 | 718.21 | 1 |
| 21 | 720 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 95.565 | 129.74 | 137.53 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 539.6925 | 444.08 | 381.95 | 0.0 | 0.0 | 0.0 | 25.0 | 50.0 | 30.0 | 0.0 | 0.0 | 0.0 | 944.565 | 1068.54 | 1031.53 | 0.0 | 0.000 | 0.0 | 0.375 | 1.58 | 0.00 | 827.305 | 983.39 | 869.89 | 10.0200 | 0.1800 | 16.74 | 24.0 | 25.0 | 17.0 | 62.9850 | 101.930 | 103.81 | 0.0 | 0.0 | 0.0 | 6.335 | 19.54 | 17.99 | 0.00 | 0.0 | 0.000 | 3.725 | 10.3900 | 8.410 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 640.0 | 718.0 | 680.0 | 5.32000 | 23.34 | 29.98 | 111.9350 | 61.79 | 131.64 | 50.0 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 169.490 | 261.0400 | 252.61 | 96.430 | 129.16 | 113.46 | 556.1030 | 637.760 | 578.596 | 52.960 | 67.2475 | 61.2425 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 117.2600 | 85.14 | 161.63 | 89.870 | 104.81 | 107.54 | 106.505 | 159.11 | 139.88 | 848.990 | 938.79 | 893.99 | 1114.055 | 1329.5800 | 1284.14 | 0 |
arpu_decrease¶Whether Average Revenue Per Customer dropped from good phase to action phase
df['arpu_decrease'] = df.apply(lambda x: 1 if (x['arpu_action'] - x['arpu_good']) < 0 else 0, axis=1)
df['arpu_decrease'] = df['arpu_decrease'].astype('category')
df.head()
| aon | churn | og_others_good | og_others_action | og_others_churn | isd_ic_mou_good | isd_ic_mou_action | isd_ic_mou_churn | isd_og_mou_good | isd_og_mou_action | isd_og_mou_churn | loc_og_mou_good | loc_og_mou_action | loc_og_mou_churn | monthly_3g_good | monthly_3g_action | monthly_3g_churn | roam_og_mou_good | roam_og_mou_action | roam_og_mou_churn | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_ic_t2f_mou_churn | std_og_t2m_mou_good | std_og_t2m_mou_action | std_og_t2m_mou_churn | roam_ic_mou_good | roam_ic_mou_action | roam_ic_mou_churn | last_day_rch_amt_good | last_day_rch_amt_action | last_day_rch_amt_churn | std_og_t2f_mou_good | std_og_t2f_mou_action | std_og_t2f_mou_churn | total_og_mou_good | total_og_mou_action | total_og_mou_churn | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2c_mou_churn | loc_og_t2f_mou_good | loc_og_t2f_mou_action | loc_og_t2f_mou_churn | offnet_mou_good | offnet_mou_action | offnet_mou_churn | std_ic_t2t_mou_good | std_ic_t2t_mou_action | std_ic_t2t_mou_churn | total_rech_num_good | total_rech_num_action | total_rech_num_churn | std_ic_mou_good | std_ic_mou_action | std_ic_mou_churn | sachet_3g_good | sachet_3g_action | sachet_3g_churn | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | loc_ic_t2t_mou_churn | ic_others_good | ic_others_action | ic_others_churn | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | loc_ic_t2f_mou_churn | sachet_2g_good | sachet_2g_action | sachet_2g_churn | spl_ic_mou_good | spl_ic_mou_action | spl_ic_mou_churn | spl_og_mou_good | spl_og_mou_action | spl_og_mou_churn | total_rech_amt_good | total_rech_amt_action | total_rech_amt_churn | loc_og_t2t_mou_good | loc_og_t2t_mou_action | loc_og_t2t_mou_churn | std_og_t2t_mou_good | std_og_t2t_mou_action | std_og_t2t_mou_churn | max_rech_amt_good | max_rech_amt_action | max_rech_amt_churn | vbc_3g_good | vbc_3g_action | vbc_3g_churn | vol_3g_mb_good | vol_3g_mb_action | vol_3g_mb_churn | total_ic_mou_good | total_ic_mou_action | total_ic_mou_churn | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | loc_ic_t2m_mou_churn | arpu_good | arpu_action | arpu_churn | std_ic_t2m_mou_good | std_ic_t2m_mou_action | std_ic_t2m_mou_churn | vol_2g_mb_good | vol_2g_mb_action | vol_2g_mb_churn | monthly_2g_good | monthly_2g_action | monthly_2g_churn | onnet_mou_good | onnet_mou_action | onnet_mou_churn | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_og_t2m_mou_churn | loc_ic_mou_good | loc_ic_mou_action | loc_ic_mou_churn | std_og_mou_good | std_og_mou_action | std_og_mou_churn | total_mou_good | total_mou_action | total_mou_churn | mou_decrease | arpu_decrease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 333.135 | 63.04 | 60.71 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 11.8650 | 75.69 | 74.13 | 0.0 | 0.0 | 0.0 | 25.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.010 | 171.56 | 142.18 | 0.0 | 0.325 | 0.0 | 0.000 | 0.00 | 0.00 | 87.645 | 136.48 | 108.71 | 0.2900 | 0.1000 | 0.00 | 20.0 | 14.0 | 15.0 | 13.5450 | 0.750 | 13.53 | 0.0 | 0.0 | 0.0 | 16.840 | 0.31 | 4.03 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 10.23 | 0.00 | 519.0 | 120.0 | 186.0 | 219.75375 | 12.49 | 26.13 | 124.9950 | 22.58 | 7.33 | 122.0 | 30.0 | 36.0 | 4.03125 | 21.03 | 0.0 | 6.76875 | 1.425 | 0.0 | 66.175 | 16.5400 | 34.91 | 35.780 | 15.48 | 17.34 | 435.4720 | 137.362 | 166.787 | 13.255 | 0.6500 | 13.5300 | 20.64375 | 0.03 | 0.0 | 0.0 | 0.0 | 0.0 | 382.3600 | 35.08 | 33.46 | 75.770 | 50.54 | 34.58 | 52.630 | 15.79 | 21.38 | 136.870 | 98.28 | 81.46 | 536.185 | 188.1000 | 177.09 | 1 | 1 |
| 13 | 2607 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.395 | 353.99 | 119.69 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.7 | 0.0 | 154.1500 | 156.94 | 96.01 | 0.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 549.750 | 1015.26 | 446.09 | 0.0 | 0.325 | 0.0 | 2.270 | 12.90 | 5.29 | 266.295 | 482.46 | 214.06 | 27.0125 | 24.0125 | 21.20 | 5.0 | 11.0 | 7.0 | 91.4850 | 111.305 | 103.81 | 0.0 | 0.0 | 0.0 | 37.730 | 52.58 | 24.98 | 0.15 | 0.2 | 0.125 | 2.405 | 7.4900 | 8.510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 4.78 | 0.00 | 380.0 | 717.0 | 353.0 | 27.91000 | 36.01 | 6.14 | 263.8775 | 370.75 | 230.38 | 110.0 | 130.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 514.170 | 849.6075 | 631.86 | 80.705 | 195.18 | 104.79 | 349.2585 | 593.260 | 322.732 | 49.050 | 67.2475 | 61.2425 | 0.00000 | 0.02 | 0.0 | 0.0 | 0.0 | 0.0 | 305.0750 | 534.24 | 244.81 | 99.205 | 294.46 | 108.24 | 120.845 | 255.26 | 138.29 | 420.355 | 655.18 | 326.39 | 1063.920 | 1864.8675 | 1077.95 | 0 | 0 |
| 16 | 511 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 265.775 | 205.93 | 233.04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.255 | 0.7 | 0.0 | 43.7500 | 18.29 | 13.79 | 0.0 | 0.0 | 0.0 | 100.0 | 130.0 | 220.0 | 0.0 | 0.0 | 0.0 | 316.410 | 233.38 | 255.74 | 0.0 | 0.000 | 0.0 | 2.240 | 10.26 | 4.66 | 263.015 | 162.76 | 224.39 | 27.0125 | 6.3300 | 16.66 | 8.0 | 2.0 | 1.0 | 105.8075 | 82.440 | 46.24 | 0.0 | 0.0 | 0.0 | 56.385 | 28.89 | 50.23 | 0.00 | 0.0 | 0.000 | 8.695 | 31.6375 | 31.275 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.63 | 5.91 | 0.00 | 459.0 | 160.0 | 220.0 | 53.88500 | 67.38 | 26.88 | 5.2350 | 3.23 | 4.46 | 110.0 | 130.0 | 220.0 | 1.22500 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 373.210 | 294.4600 | 334.56 | 188.250 | 150.16 | 172.86 | 365.4220 | 187.894 | 206.490 | 45.880 | 67.2475 | 29.5800 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 62.2600 | 70.61 | 31.34 | 209.640 | 128.28 | 201.49 | 253.345 | 211.78 | 288.31 | 48.990 | 21.53 | 22.69 | 689.620 | 527.8400 | 590.30 | 1 | 1 |
| 17 | 667 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.770 | 0.00 | 45.79 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.7750 | 0.00 | 78.51 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 625.170 | 0.00 | 631.08 | 0.0 | 0.000 | 0.0 | 0.000 | 0.00 | 0.00 | 30.820 | 5.54 | 87.89 | 4.3150 | 0.0000 | 0.00 | 10.5 | 4.0 | 10.0 | 4.9550 | 0.000 | 1.63 | 0.0 | 0.0 | 0.0 | 17.270 | 0.00 | 40.91 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.710 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.29 | 0.00 | 2.64 | 408.0 | 30.0 | 335.0 | 19.49500 | 0.00 | 36.41 | 221.2925 | 0.00 | 309.45 | 55.0 | 30.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 47.095 | 0.0000 | 87.13 | 24.860 | 0.00 | 43.86 | 354.4940 | 25.499 | 257.583 | 0.640 | 0.0000 | 1.6300 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 413.7025 | 7.79 | 558.51 | 29.270 | 0.00 | 9.38 | 42.135 | 0.00 | 85.49 | 574.635 | 0.00 | 582.63 | 672.265 | 0.0000 | 718.21 | 1 | 1 |
| 21 | 720 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 95.565 | 129.74 | 137.53 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 539.6925 | 444.08 | 381.95 | 0.0 | 0.0 | 0.0 | 25.0 | 50.0 | 30.0 | 0.0 | 0.0 | 0.0 | 944.565 | 1068.54 | 1031.53 | 0.0 | 0.000 | 0.0 | 0.375 | 1.58 | 0.00 | 827.305 | 983.39 | 869.89 | 10.0200 | 0.1800 | 16.74 | 24.0 | 25.0 | 17.0 | 62.9850 | 101.930 | 103.81 | 0.0 | 0.0 | 0.0 | 6.335 | 19.54 | 17.99 | 0.00 | 0.0 | 0.000 | 3.725 | 10.3900 | 8.410 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 640.0 | 718.0 | 680.0 | 5.32000 | 23.34 | 29.98 | 111.9350 | 61.79 | 131.64 | 50.0 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 169.490 | 261.0400 | 252.61 | 96.430 | 129.16 | 113.46 | 556.1030 | 637.760 | 578.596 | 52.960 | 67.2475 | 61.2425 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 117.2600 | 85.14 | 161.63 | 89.870 | 104.81 | 107.54 | 106.505 | 159.11 | 139.88 | 848.990 | 938.79 | 893.99 | 1114.055 | 1329.5800 | 1284.14 | 0 | 0 |
rech_amt_decrease¶Whether Recharge Amount dropped from good phase to action phase
df['rech_amt_decrease'] = df.apply(lambda x: 1 if (x['total_rech_amt_action'] - x['total_rech_amt_good']) < 0 else 0, axis=1)
df['rech_amt_decrease'] = df['rech_amt_decrease'].astype('category')
df.head()
| aon | churn | og_others_good | og_others_action | og_others_churn | isd_ic_mou_good | isd_ic_mou_action | isd_ic_mou_churn | isd_og_mou_good | isd_og_mou_action | isd_og_mou_churn | loc_og_mou_good | loc_og_mou_action | loc_og_mou_churn | monthly_3g_good | monthly_3g_action | monthly_3g_churn | roam_og_mou_good | roam_og_mou_action | roam_og_mou_churn | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_ic_t2f_mou_churn | std_og_t2m_mou_good | std_og_t2m_mou_action | std_og_t2m_mou_churn | roam_ic_mou_good | roam_ic_mou_action | roam_ic_mou_churn | last_day_rch_amt_good | last_day_rch_amt_action | last_day_rch_amt_churn | std_og_t2f_mou_good | std_og_t2f_mou_action | std_og_t2f_mou_churn | total_og_mou_good | total_og_mou_action | total_og_mou_churn | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2c_mou_churn | loc_og_t2f_mou_good | loc_og_t2f_mou_action | loc_og_t2f_mou_churn | offnet_mou_good | offnet_mou_action | offnet_mou_churn | std_ic_t2t_mou_good | std_ic_t2t_mou_action | std_ic_t2t_mou_churn | total_rech_num_good | total_rech_num_action | total_rech_num_churn | std_ic_mou_good | std_ic_mou_action | std_ic_mou_churn | sachet_3g_good | sachet_3g_action | sachet_3g_churn | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | loc_ic_t2t_mou_churn | ic_others_good | ic_others_action | ic_others_churn | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | loc_ic_t2f_mou_churn | sachet_2g_good | sachet_2g_action | sachet_2g_churn | spl_ic_mou_good | spl_ic_mou_action | spl_ic_mou_churn | spl_og_mou_good | spl_og_mou_action | spl_og_mou_churn | total_rech_amt_good | total_rech_amt_action | total_rech_amt_churn | loc_og_t2t_mou_good | loc_og_t2t_mou_action | loc_og_t2t_mou_churn | std_og_t2t_mou_good | std_og_t2t_mou_action | std_og_t2t_mou_churn | max_rech_amt_good | max_rech_amt_action | max_rech_amt_churn | vbc_3g_good | vbc_3g_action | vbc_3g_churn | vol_3g_mb_good | vol_3g_mb_action | vol_3g_mb_churn | total_ic_mou_good | total_ic_mou_action | total_ic_mou_churn | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | loc_ic_t2m_mou_churn | arpu_good | arpu_action | arpu_churn | std_ic_t2m_mou_good | std_ic_t2m_mou_action | std_ic_t2m_mou_churn | vol_2g_mb_good | vol_2g_mb_action | vol_2g_mb_churn | monthly_2g_good | monthly_2g_action | monthly_2g_churn | onnet_mou_good | onnet_mou_action | onnet_mou_churn | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_og_t2m_mou_churn | loc_ic_mou_good | loc_ic_mou_action | loc_ic_mou_churn | std_og_mou_good | std_og_mou_action | std_og_mou_churn | total_mou_good | total_mou_action | total_mou_churn | mou_decrease | arpu_decrease | rech_amt_decrease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 333.135 | 63.04 | 60.71 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 11.8650 | 75.69 | 74.13 | 0.0 | 0.0 | 0.0 | 25.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.010 | 171.56 | 142.18 | 0.0 | 0.325 | 0.0 | 0.000 | 0.00 | 0.00 | 87.645 | 136.48 | 108.71 | 0.2900 | 0.1000 | 0.00 | 20.0 | 14.0 | 15.0 | 13.5450 | 0.750 | 13.53 | 0.0 | 0.0 | 0.0 | 16.840 | 0.31 | 4.03 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 10.23 | 0.00 | 519.0 | 120.0 | 186.0 | 219.75375 | 12.49 | 26.13 | 124.9950 | 22.58 | 7.33 | 122.0 | 30.0 | 36.0 | 4.03125 | 21.03 | 0.0 | 6.76875 | 1.425 | 0.0 | 66.175 | 16.5400 | 34.91 | 35.780 | 15.48 | 17.34 | 435.4720 | 137.362 | 166.787 | 13.255 | 0.6500 | 13.5300 | 20.64375 | 0.03 | 0.0 | 0.0 | 0.0 | 0.0 | 382.3600 | 35.08 | 33.46 | 75.770 | 50.54 | 34.58 | 52.630 | 15.79 | 21.38 | 136.870 | 98.28 | 81.46 | 536.185 | 188.1000 | 177.09 | 1 | 1 | 1 |
| 13 | 2607 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.395 | 353.99 | 119.69 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.7 | 0.0 | 154.1500 | 156.94 | 96.01 | 0.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 549.750 | 1015.26 | 446.09 | 0.0 | 0.325 | 0.0 | 2.270 | 12.90 | 5.29 | 266.295 | 482.46 | 214.06 | 27.0125 | 24.0125 | 21.20 | 5.0 | 11.0 | 7.0 | 91.4850 | 111.305 | 103.81 | 0.0 | 0.0 | 0.0 | 37.730 | 52.58 | 24.98 | 0.15 | 0.2 | 0.125 | 2.405 | 7.4900 | 8.510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 4.78 | 0.00 | 380.0 | 717.0 | 353.0 | 27.91000 | 36.01 | 6.14 | 263.8775 | 370.75 | 230.38 | 110.0 | 130.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 514.170 | 849.6075 | 631.86 | 80.705 | 195.18 | 104.79 | 349.2585 | 593.260 | 322.732 | 49.050 | 67.2475 | 61.2425 | 0.00000 | 0.02 | 0.0 | 0.0 | 0.0 | 0.0 | 305.0750 | 534.24 | 244.81 | 99.205 | 294.46 | 108.24 | 120.845 | 255.26 | 138.29 | 420.355 | 655.18 | 326.39 | 1063.920 | 1864.8675 | 1077.95 | 0 | 0 | 0 |
| 16 | 511 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 265.775 | 205.93 | 233.04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.255 | 0.7 | 0.0 | 43.7500 | 18.29 | 13.79 | 0.0 | 0.0 | 0.0 | 100.0 | 130.0 | 220.0 | 0.0 | 0.0 | 0.0 | 316.410 | 233.38 | 255.74 | 0.0 | 0.000 | 0.0 | 2.240 | 10.26 | 4.66 | 263.015 | 162.76 | 224.39 | 27.0125 | 6.3300 | 16.66 | 8.0 | 2.0 | 1.0 | 105.8075 | 82.440 | 46.24 | 0.0 | 0.0 | 0.0 | 56.385 | 28.89 | 50.23 | 0.00 | 0.0 | 0.000 | 8.695 | 31.6375 | 31.275 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.63 | 5.91 | 0.00 | 459.0 | 160.0 | 220.0 | 53.88500 | 67.38 | 26.88 | 5.2350 | 3.23 | 4.46 | 110.0 | 130.0 | 220.0 | 1.22500 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 373.210 | 294.4600 | 334.56 | 188.250 | 150.16 | 172.86 | 365.4220 | 187.894 | 206.490 | 45.880 | 67.2475 | 29.5800 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 62.2600 | 70.61 | 31.34 | 209.640 | 128.28 | 201.49 | 253.345 | 211.78 | 288.31 | 48.990 | 21.53 | 22.69 | 689.620 | 527.8400 | 590.30 | 1 | 1 | 1 |
| 17 | 667 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.770 | 0.00 | 45.79 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.7750 | 0.00 | 78.51 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 625.170 | 0.00 | 631.08 | 0.0 | 0.000 | 0.0 | 0.000 | 0.00 | 0.00 | 30.820 | 5.54 | 87.89 | 4.3150 | 0.0000 | 0.00 | 10.5 | 4.0 | 10.0 | 4.9550 | 0.000 | 1.63 | 0.0 | 0.0 | 0.0 | 17.270 | 0.00 | 40.91 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.710 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.29 | 0.00 | 2.64 | 408.0 | 30.0 | 335.0 | 19.49500 | 0.00 | 36.41 | 221.2925 | 0.00 | 309.45 | 55.0 | 30.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 47.095 | 0.0000 | 87.13 | 24.860 | 0.00 | 43.86 | 354.4940 | 25.499 | 257.583 | 0.640 | 0.0000 | 1.6300 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 413.7025 | 7.79 | 558.51 | 29.270 | 0.00 | 9.38 | 42.135 | 0.00 | 85.49 | 574.635 | 0.00 | 582.63 | 672.265 | 0.0000 | 718.21 | 1 | 1 | 1 |
| 21 | 720 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 95.565 | 129.74 | 137.53 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 539.6925 | 444.08 | 381.95 | 0.0 | 0.0 | 0.0 | 25.0 | 50.0 | 30.0 | 0.0 | 0.0 | 0.0 | 944.565 | 1068.54 | 1031.53 | 0.0 | 0.000 | 0.0 | 0.375 | 1.58 | 0.00 | 827.305 | 983.39 | 869.89 | 10.0200 | 0.1800 | 16.74 | 24.0 | 25.0 | 17.0 | 62.9850 | 101.930 | 103.81 | 0.0 | 0.0 | 0.0 | 6.335 | 19.54 | 17.99 | 0.00 | 0.0 | 0.000 | 3.725 | 10.3900 | 8.410 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 640.0 | 718.0 | 680.0 | 5.32000 | 23.34 | 29.98 | 111.9350 | 61.79 | 131.64 | 50.0 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 169.490 | 261.0400 | 252.61 | 96.430 | 129.16 | 113.46 | 556.1030 | 637.760 | 578.596 | 52.960 | 67.2475 | 61.2425 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 117.2600 | 85.14 | 161.63 | 89.870 | 104.81 | 107.54 | 106.505 | 159.11 | 139.88 | 848.990 | 938.79 | 893.99 | 1114.055 | 1329.5800 | 1284.14 | 0 | 0 | 0 |
rech_num_decrease¶Whether Number of Recharge dropped from good phase to action phase
df['rech_num_decrease'] = df.apply(lambda x: 1 if (x['total_rech_num_action'] - x['total_rech_num_good']) < 0 else 0, axis=1)
df['rech_num_decrease'] = df['rech_num_decrease'].astype('category')
df.head()
| aon | churn | og_others_good | og_others_action | og_others_churn | isd_ic_mou_good | isd_ic_mou_action | isd_ic_mou_churn | isd_og_mou_good | isd_og_mou_action | isd_og_mou_churn | loc_og_mou_good | loc_og_mou_action | loc_og_mou_churn | monthly_3g_good | monthly_3g_action | monthly_3g_churn | roam_og_mou_good | roam_og_mou_action | roam_og_mou_churn | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_ic_t2f_mou_churn | std_og_t2m_mou_good | std_og_t2m_mou_action | std_og_t2m_mou_churn | roam_ic_mou_good | roam_ic_mou_action | roam_ic_mou_churn | last_day_rch_amt_good | last_day_rch_amt_action | last_day_rch_amt_churn | std_og_t2f_mou_good | std_og_t2f_mou_action | std_og_t2f_mou_churn | total_og_mou_good | total_og_mou_action | total_og_mou_churn | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2c_mou_churn | loc_og_t2f_mou_good | loc_og_t2f_mou_action | loc_og_t2f_mou_churn | offnet_mou_good | offnet_mou_action | offnet_mou_churn | std_ic_t2t_mou_good | std_ic_t2t_mou_action | std_ic_t2t_mou_churn | total_rech_num_good | total_rech_num_action | total_rech_num_churn | std_ic_mou_good | std_ic_mou_action | std_ic_mou_churn | sachet_3g_good | sachet_3g_action | sachet_3g_churn | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | loc_ic_t2t_mou_churn | ic_others_good | ic_others_action | ic_others_churn | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | loc_ic_t2f_mou_churn | sachet_2g_good | sachet_2g_action | sachet_2g_churn | spl_ic_mou_good | spl_ic_mou_action | spl_ic_mou_churn | spl_og_mou_good | spl_og_mou_action | spl_og_mou_churn | total_rech_amt_good | total_rech_amt_action | total_rech_amt_churn | loc_og_t2t_mou_good | loc_og_t2t_mou_action | loc_og_t2t_mou_churn | std_og_t2t_mou_good | std_og_t2t_mou_action | std_og_t2t_mou_churn | max_rech_amt_good | max_rech_amt_action | max_rech_amt_churn | vbc_3g_good | vbc_3g_action | vbc_3g_churn | vol_3g_mb_good | vol_3g_mb_action | vol_3g_mb_churn | total_ic_mou_good | total_ic_mou_action | total_ic_mou_churn | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | loc_ic_t2m_mou_churn | arpu_good | arpu_action | arpu_churn | std_ic_t2m_mou_good | std_ic_t2m_mou_action | std_ic_t2m_mou_churn | vol_2g_mb_good | vol_2g_mb_action | vol_2g_mb_churn | monthly_2g_good | monthly_2g_action | monthly_2g_churn | onnet_mou_good | onnet_mou_action | onnet_mou_churn | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_og_t2m_mou_churn | loc_ic_mou_good | loc_ic_mou_action | loc_ic_mou_churn | std_og_mou_good | std_og_mou_action | std_og_mou_churn | total_mou_good | total_mou_action | total_mou_churn | mou_decrease | arpu_decrease | rech_amt_decrease | rech_num_decrease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 333.135 | 63.04 | 60.71 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 11.8650 | 75.69 | 74.13 | 0.0 | 0.0 | 0.0 | 25.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.010 | 171.56 | 142.18 | 0.0 | 0.325 | 0.0 | 0.000 | 0.00 | 0.00 | 87.645 | 136.48 | 108.71 | 0.2900 | 0.1000 | 0.00 | 20.0 | 14.0 | 15.0 | 13.5450 | 0.750 | 13.53 | 0.0 | 0.0 | 0.0 | 16.840 | 0.31 | 4.03 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 10.23 | 0.00 | 519.0 | 120.0 | 186.0 | 219.75375 | 12.49 | 26.13 | 124.9950 | 22.58 | 7.33 | 122.0 | 30.0 | 36.0 | 4.03125 | 21.03 | 0.0 | 6.76875 | 1.425 | 0.0 | 66.175 | 16.5400 | 34.91 | 35.780 | 15.48 | 17.34 | 435.4720 | 137.362 | 166.787 | 13.255 | 0.6500 | 13.5300 | 20.64375 | 0.03 | 0.0 | 0.0 | 0.0 | 0.0 | 382.3600 | 35.08 | 33.46 | 75.770 | 50.54 | 34.58 | 52.630 | 15.79 | 21.38 | 136.870 | 98.28 | 81.46 | 536.185 | 188.1000 | 177.09 | 1 | 1 | 1 | 1 |
| 13 | 2607 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 129.395 | 353.99 | 119.69 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.7 | 0.0 | 154.1500 | 156.94 | 96.01 | 0.0 | 0.0 | 0.0 | 80.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 549.750 | 1015.26 | 446.09 | 0.0 | 0.325 | 0.0 | 2.270 | 12.90 | 5.29 | 266.295 | 482.46 | 214.06 | 27.0125 | 24.0125 | 21.20 | 5.0 | 11.0 | 7.0 | 91.4850 | 111.305 | 103.81 | 0.0 | 0.0 | 0.0 | 37.730 | 52.58 | 24.98 | 0.15 | 0.2 | 0.125 | 2.405 | 7.4900 | 8.510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 4.78 | 0.00 | 380.0 | 717.0 | 353.0 | 27.91000 | 36.01 | 6.14 | 263.8775 | 370.75 | 230.38 | 110.0 | 130.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 514.170 | 849.6075 | 631.86 | 80.705 | 195.18 | 104.79 | 349.2585 | 593.260 | 322.732 | 49.050 | 67.2475 | 61.2425 | 0.00000 | 0.02 | 0.0 | 0.0 | 0.0 | 0.0 | 305.0750 | 534.24 | 244.81 | 99.205 | 294.46 | 108.24 | 120.845 | 255.26 | 138.29 | 420.355 | 655.18 | 326.39 | 1063.920 | 1864.8675 | 1077.95 | 0 | 0 | 0 | 0 |
| 16 | 511 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 265.775 | 205.93 | 233.04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.255 | 0.7 | 0.0 | 43.7500 | 18.29 | 13.79 | 0.0 | 0.0 | 0.0 | 100.0 | 130.0 | 220.0 | 0.0 | 0.0 | 0.0 | 316.410 | 233.38 | 255.74 | 0.0 | 0.000 | 0.0 | 2.240 | 10.26 | 4.66 | 263.015 | 162.76 | 224.39 | 27.0125 | 6.3300 | 16.66 | 8.0 | 2.0 | 1.0 | 105.8075 | 82.440 | 46.24 | 0.0 | 0.0 | 0.0 | 56.385 | 28.89 | 50.23 | 0.00 | 0.0 | 0.000 | 8.695 | 31.6375 | 31.275 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.63 | 5.91 | 0.00 | 459.0 | 160.0 | 220.0 | 53.88500 | 67.38 | 26.88 | 5.2350 | 3.23 | 4.46 | 110.0 | 130.0 | 220.0 | 1.22500 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 373.210 | 294.4600 | 334.56 | 188.250 | 150.16 | 172.86 | 365.4220 | 187.894 | 206.490 | 45.880 | 67.2475 | 29.5800 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 62.2600 | 70.61 | 31.34 | 209.640 | 128.28 | 201.49 | 253.345 | 211.78 | 288.31 | 48.990 | 21.53 | 22.69 | 689.620 | 527.8400 | 590.30 | 1 | 1 | 1 | 1 |
| 17 | 667 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.770 | 0.00 | 45.79 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 0.7750 | 0.00 | 78.51 | 0.0 | 0.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 625.170 | 0.00 | 631.08 | 0.0 | 0.000 | 0.0 | 0.000 | 0.00 | 0.00 | 30.820 | 5.54 | 87.89 | 4.3150 | 0.0000 | 0.00 | 10.5 | 4.0 | 10.0 | 4.9550 | 0.000 | 1.63 | 0.0 | 0.0 | 0.0 | 17.270 | 0.00 | 40.91 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0000 | 0.710 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.29 | 0.00 | 2.64 | 408.0 | 30.0 | 335.0 | 19.49500 | 0.00 | 36.41 | 221.2925 | 0.00 | 309.45 | 55.0 | 30.0 | 130.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 47.095 | 0.0000 | 87.13 | 24.860 | 0.00 | 43.86 | 354.4940 | 25.499 | 257.583 | 0.640 | 0.0000 | 1.6300 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 413.7025 | 7.79 | 558.51 | 29.270 | 0.00 | 9.38 | 42.135 | 0.00 | 85.49 | 574.635 | 0.00 | 582.63 | 672.265 | 0.0000 | 718.21 | 1 | 1 | 1 | 1 |
| 21 | 720 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 95.565 | 129.74 | 137.53 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000 | 0.0 | 0.0 | 539.6925 | 444.08 | 381.95 | 0.0 | 0.0 | 0.0 | 25.0 | 50.0 | 30.0 | 0.0 | 0.0 | 0.0 | 944.565 | 1068.54 | 1031.53 | 0.0 | 0.000 | 0.0 | 0.375 | 1.58 | 0.00 | 827.305 | 983.39 | 869.89 | 10.0200 | 0.1800 | 16.74 | 24.0 | 25.0 | 17.0 | 62.9850 | 101.930 | 103.81 | 0.0 | 0.0 | 0.0 | 6.335 | 19.54 | 17.99 | 0.00 | 0.0 | 0.000 | 3.725 | 10.3900 | 8.410 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 640.0 | 718.0 | 680.0 | 5.32000 | 23.34 | 29.98 | 111.9350 | 61.79 | 131.64 | 50.0 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.0 | 0.00000 | 0.000 | 0.0 | 169.490 | 261.0400 | 252.61 | 96.430 | 129.16 | 113.46 | 556.1030 | 637.760 | 578.596 | 52.960 | 67.2475 | 61.2425 | 0.00000 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 117.2600 | 85.14 | 161.63 | 89.870 | 104.81 | 107.54 | 106.505 | 159.11 | 139.88 | 848.990 | 938.79 | 893.99 | 1114.055 | 1329.5800 | 1284.14 | 0 | 0 | 0 | 0 |
# Helper functions for Numerical Univariate Analysis
def num_uni_box_analysis(var, friendly_name=None):
if friendly_name is None:
friendly_name = var
ax = sns.boxplot(data=df, x=var)
ax.set_xticks(list(df[var].quantile([0,0.25,0.5,0.75,0.95,1])))
ax.set_xlabel(friendly_name)
ax.set_title(f'{friendly_name} Distribution Univariate Analysis', fontsize=20)
ax.figure.set_size_inches(16,8)
return ax
def num_uni_hist_analysis(var, friendly_name=None, bins=25):
if friendly_name is None:
friendly_name = var
ax = sns.histplot(data=df ,x=var, stat='percent', bins=bins)
ax.set_xlabel(friendly_name)
ax.set_title(f'{friendly_name} Distribution Histogram', fontsize=20)
ax.figure.set_size_inches(16,8)
return ax
# Helper functions for Numerical Segmented Univariate Analysis
def num_uni_box_seg_analysis(var, friendly_name=None):
if friendly_name is None:
friendly_name = var
ax = sns.boxplot(data=df, x='churn', y=var)
ax.legend(['No', 'Yes'])
ax.set_ylabel(friendly_name)
ax.set_title(f'{friendly_name} Distribution Univariate Analysis', fontsize=20)
ax.figure.set_size_inches(16,8)
return ax
def num_uni_hist_seg_analysis(var, friendly_name=None, bins=25):
if friendly_name is None:
friendly_name = var
ax = sns.histplot(data=df ,x=var, hue='churn', stat='percent', multiple='fill', bins=bins)
ax.legend(['No', 'Yes'])
ax.set_xlabel(friendly_name)
ax.set_title(f'{friendly_name} Distribution Histogram (Normalised)', fontsize=20)
ax.figure.set_size_inches(16,8)
return ax
# Helper function for Categorical Univariate Analysis
def cat_uni_analysis(col, friendly_name=None):
if friendly_name is None:
friendly_name = col
tmp_df = df[col].value_counts(normalize=True).mul(100).rename('Percent').reset_index().rename(columns={'index': col})
ax = sns.barplot(data=tmp_df, x=col, y='Percent')
for p in ax.patches:
txt = str(p.get_height().round(2)) + '%'
txt_x = p.get_x() + (p.get_width()/2)
txt_y = p.get_height()
ax.annotate(txt, (txt_x, txt_y), size=11, ha='center', va='bottom')
ax.set_title(f'{friendly_name} Univariate Analysis (Normalised)', fontsize=20)
ax.set_xlabel(friendly_name)
ax.figure.set_size_inches(16,8)
return ax
# Helper function for Categorical Segmented Univariate Analysis
def cat_seg_analysis(col, friendly_name=None):
if friendly_name is None:
friendly_name = col
tmp_df = df.groupby(col)['churn'].value_counts(normalize=True).mul(100).rename('Percent').reset_index().rename({'level_1': 'churn'}, axis=1).replace(0,'No').replace(1,'Yes')
ax = sns.barplot(data=tmp_df, x=col, y='Percent', hue='churn')
for p in ax.patches:
txt = str(p.get_height().round(2)) + '%'
txt_x = p.get_x() + (p.get_width()/2)
txt_y = p.get_height()
ax.annotate(txt, (txt_x, txt_y), size=11, ha='center', va='bottom')
ax.set_title(f'{friendly_name} Segmented Univariate Analysis (Normalised)', fontsize=20)
ax.set_xlabel(friendly_name)
ax.figure.set_size_inches(16,8)
return ax
plt.figure(figsize=(24, 12*len(unique_columns)))
for i in range(len(unique_columns)*2):
col=unique_columns[i//2]
plt.subplot(len(unique_columns), 2, i+1)
if i%2 == 0:
ax = sns.boxplot(data=df[[col+'_good', col+'_action', col+'_churn']])
ax.set_title(col + ' Unsegmented Analysis')
else:
ax = sns.boxplot(data=pd.melt(df[[col+'_good', col+'_action', col+'_churn', 'churn']], 'churn'), x='variable', y='value', hue='churn')
ax.set_title(col + ' Segmented Analysis')
ax.set_xticklabels(['Good Phase', 'Action Phase', 'Churn Phase'])
plt.tight_layout()
# Useless numerical variables with negligible variance or huge data imbalance
useless_columns = ['roam_og_mou', 'roam_ic_mou', 'std_og_t2f_mou', 'spl_ic_mou', 'og_others', 'isd_ic_mou', 'isd_og_mou', 'sachet_3g', 'sachet_2g', 'monthly_3g', 'monthly_2g']
# Removing useless features
for col in useless_columns:
df.drop([col+'_good', col+'_action', col+'_churn'], axis=1, inplace=True)
unique_columns = [i for i in unique_columns if i not in useless_columns]
df.shape
(27991, 99)
num_uni_box_analysis('aon', 'Age on Network')
plt.show()
num_uni_box_seg_analysis('aon', 'Age on Network')
plt.show()
num_uni_hist_analysis('aon', 'Age on Network')
plt.show()
num_uni_hist_seg_analysis('aon', 'Age on Network')
plt.show()
cat_uni_analysis('mou_decrease', 'Decrease in Minutes of Usage')
plt.show()
cat_seg_analysis('mou_decrease', 'Decrease in Minutes of Usage')
plt.show()
cat_uni_analysis('arpu_decrease', 'Decrease in Average Revenue per Customer')
plt.show()
cat_seg_analysis('arpu_decrease', 'Decrease in Average Revenue per Customer')
plt.show()
cat_uni_analysis('rech_amt_decrease', 'Decrease in Recharge Amount')
plt.show()
cat_seg_analysis('rech_amt_decrease', 'Decrease in Recharge Amount')
plt.show()
cat_uni_analysis('rech_num_decrease', 'Decrease in No. of Recharges')
plt.show()
cat_seg_analysis('rech_num_decrease', 'Decrease in No. of Recharges')
plt.show()
Assessments:
# Removing churn phase data
df.drop([i+'_churn' for i in unique_columns], axis=1, inplace=True)
# Dropping few derived columns that are not required in further analysis
df.drop(['total_mou'+'_'+p for p in ['good', 'action']], axis=1, inplace=True)
unique_columns = [i for i in unique_columns if i != 'total_mou']
df.shape
(27991, 66)
df.head()
| aon | churn | loc_og_mou_good | loc_og_mou_action | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_og_t2m_mou_good | std_og_t2m_mou_action | last_day_rch_amt_good | last_day_rch_amt_action | total_og_mou_good | total_og_mou_action | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2f_mou_good | loc_og_t2f_mou_action | offnet_mou_good | offnet_mou_action | std_ic_t2t_mou_good | std_ic_t2t_mou_action | total_rech_num_good | total_rech_num_action | std_ic_mou_good | std_ic_mou_action | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | ic_others_good | ic_others_action | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | spl_og_mou_good | spl_og_mou_action | total_rech_amt_good | total_rech_amt_action | loc_og_t2t_mou_good | loc_og_t2t_mou_action | std_og_t2t_mou_good | std_og_t2t_mou_action | max_rech_amt_good | max_rech_amt_action | vbc_3g_good | vbc_3g_action | vol_3g_mb_good | vol_3g_mb_action | total_ic_mou_good | total_ic_mou_action | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | arpu_good | arpu_action | std_ic_t2m_mou_good | std_ic_t2m_mou_action | vol_2g_mb_good | vol_2g_mb_action | onnet_mou_good | onnet_mou_action | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_ic_mou_good | loc_ic_mou_action | std_og_mou_good | std_og_mou_action | mou_decrease | arpu_decrease | rech_amt_decrease | rech_num_decrease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 315 | 0 | 333.135 | 63.04 | 0.000 | 0.0 | 11.8650 | 75.69 | 25.0 | 10.0 | 470.010 | 171.56 | 0.0 | 0.325 | 0.000 | 0.00 | 87.645 | 136.48 | 0.2900 | 0.1000 | 20.0 | 14.0 | 13.5450 | 0.750 | 16.840 | 0.31 | 0.00 | 0.0 | 0.000 | 0.0000 | 0.00 | 10.23 | 519.0 | 120.0 | 219.75375 | 12.49 | 124.9950 | 22.58 | 122.0 | 30.0 | 4.03125 | 21.03 | 6.76875 | 1.425 | 66.175 | 16.5400 | 35.780 | 15.48 | 435.4720 | 137.362 | 13.255 | 0.6500 | 20.64375 | 0.03 | 382.3600 | 35.08 | 75.770 | 50.54 | 52.630 | 15.79 | 136.870 | 98.28 | 1 | 1 | 1 | 1 |
| 13 | 2607 | 0 | 129.395 | 353.99 | 0.000 | 0.7 | 154.1500 | 156.94 | 80.0 | 0.0 | 549.750 | 1015.26 | 0.0 | 0.325 | 2.270 | 12.90 | 266.295 | 482.46 | 27.0125 | 24.0125 | 5.0 | 11.0 | 91.4850 | 111.305 | 37.730 | 52.58 | 0.15 | 0.2 | 2.405 | 7.4900 | 0.00 | 4.78 | 380.0 | 717.0 | 27.91000 | 36.01 | 263.8775 | 370.75 | 110.0 | 130.0 | 0.00000 | 0.00 | 0.00000 | 0.000 | 514.170 | 849.6075 | 80.705 | 195.18 | 349.2585 | 593.260 | 49.050 | 67.2475 | 0.00000 | 0.02 | 305.0750 | 534.24 | 99.205 | 294.46 | 120.845 | 255.26 | 420.355 | 655.18 | 0 | 0 | 0 | 0 |
| 16 | 511 | 0 | 265.775 | 205.93 | 0.255 | 0.7 | 43.7500 | 18.29 | 100.0 | 130.0 | 316.410 | 233.38 | 0.0 | 0.000 | 2.240 | 10.26 | 263.015 | 162.76 | 27.0125 | 6.3300 | 8.0 | 2.0 | 105.8075 | 82.440 | 56.385 | 28.89 | 0.00 | 0.0 | 8.695 | 31.6375 | 1.63 | 5.91 | 459.0 | 160.0 | 53.88500 | 67.38 | 5.2350 | 3.23 | 110.0 | 130.0 | 1.22500 | 0.00 | 0.00000 | 0.000 | 373.210 | 294.4600 | 188.250 | 150.16 | 365.4220 | 187.894 | 45.880 | 67.2475 | 0.00000 | 0.00 | 62.2600 | 70.61 | 209.640 | 128.28 | 253.345 | 211.78 | 48.990 | 21.53 | 1 | 1 | 1 | 1 |
| 17 | 667 | 0 | 48.770 | 0.00 | 0.000 | 0.0 | 0.7750 | 0.00 | 15.0 | 0.0 | 625.170 | 0.00 | 0.0 | 0.000 | 0.000 | 0.00 | 30.820 | 5.54 | 4.3150 | 0.0000 | 10.5 | 4.0 | 4.9550 | 0.000 | 17.270 | 0.00 | 0.00 | 0.0 | 0.000 | 0.0000 | 1.29 | 0.00 | 408.0 | 30.0 | 19.49500 | 0.00 | 221.2925 | 0.00 | 55.0 | 30.0 | 0.00000 | 0.00 | 0.00000 | 0.000 | 47.095 | 0.0000 | 24.860 | 0.00 | 354.4940 | 25.499 | 0.640 | 0.0000 | 0.00000 | 0.00 | 413.7025 | 7.79 | 29.270 | 0.00 | 42.135 | 0.00 | 574.635 | 0.00 | 1 | 1 | 1 | 1 |
| 21 | 720 | 0 | 95.565 | 129.74 | 0.000 | 0.0 | 539.6925 | 444.08 | 25.0 | 50.0 | 944.565 | 1068.54 | 0.0 | 0.000 | 0.375 | 1.58 | 827.305 | 983.39 | 10.0200 | 0.1800 | 24.0 | 25.0 | 62.9850 | 101.930 | 6.335 | 19.54 | 0.00 | 0.0 | 3.725 | 10.3900 | 0.00 | 0.00 | 640.0 | 718.0 | 5.32000 | 23.34 | 111.9350 | 61.79 | 50.0 | 50.0 | 0.00000 | 0.00 | 0.00000 | 0.000 | 169.490 | 261.0400 | 96.430 | 129.16 | 556.1030 | 637.760 | 52.960 | 67.2475 | 0.00000 | 0.00 | 117.2600 | 85.14 | 89.870 | 104.81 | 106.505 | 159.11 | 848.990 | 938.79 | 0 | 0 | 0 | 0 |
df_train, df_test = train_test_split(df, train_size=0.7, random_state=100)
df_train.shape
(19593, 66)
df_test.shape
(8398, 66)
# Extract Target Variable
y_train = df_train.pop('churn')
X_train = df_train
# Fitting SMOTE-NC to the train set
smote_nc = SMOTENC(categorical_features=list(map(lambda x: 'decrease' in x, X_train.columns)), random_state=42)
X_train, y_train = smote_nc.fit_resample(X_train, y_train)
X_train.shape
(37862, 65)
y_train.shape
(37862,)
num_vars = ['aon']
for col in unique_columns:
num_vars = num_vars + [col+'_good', col+'_action']
# Learn Min & Max values
scaler = MinMaxScaler()
scaler.fit(X_train[num_vars])
MinMaxScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MinMaxScaler()
# Scale
X_train[num_vars] = scaler.transform(X_train[num_vars])
X_train.head()
| aon | loc_og_mou_good | loc_og_mou_action | std_ic_t2f_mou_good | std_ic_t2f_mou_action | std_og_t2m_mou_good | std_og_t2m_mou_action | last_day_rch_amt_good | last_day_rch_amt_action | total_og_mou_good | total_og_mou_action | loc_og_t2c_mou_good | loc_og_t2c_mou_action | loc_og_t2f_mou_good | loc_og_t2f_mou_action | offnet_mou_good | offnet_mou_action | std_ic_t2t_mou_good | std_ic_t2t_mou_action | total_rech_num_good | total_rech_num_action | std_ic_mou_good | std_ic_mou_action | loc_ic_t2t_mou_good | loc_ic_t2t_mou_action | ic_others_good | ic_others_action | loc_ic_t2f_mou_good | loc_ic_t2f_mou_action | spl_og_mou_good | spl_og_mou_action | total_rech_amt_good | total_rech_amt_action | loc_og_t2t_mou_good | loc_og_t2t_mou_action | std_og_t2t_mou_good | std_og_t2t_mou_action | max_rech_amt_good | max_rech_amt_action | vbc_3g_good | vbc_3g_action | vol_3g_mb_good | vol_3g_mb_action | total_ic_mou_good | total_ic_mou_action | loc_ic_t2m_mou_good | loc_ic_t2m_mou_action | arpu_good | arpu_action | std_ic_t2m_mou_good | std_ic_t2m_mou_action | vol_2g_mb_good | vol_2g_mb_action | onnet_mou_good | onnet_mou_action | loc_og_t2m_mou_good | loc_og_t2m_mou_action | loc_ic_mou_good | loc_ic_mou_action | std_og_mou_good | std_og_mou_action | mou_decrease | arpu_decrease | rech_amt_decrease | rech_num_decrease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.439266 | 0.000753 | 0.000000 | 0.568966 | 0.0 | 0.000000 | 0.000000 | 0.942056 | 0.841402 | 0.000352 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000045 | 0.000000 | 0.030171 | 0.000000 | 0.125000 | 0.08 | 0.018155 | 0.000000 | 0.030531 | 0.090721 | 1.000000 | 0.0 | 0.005611 | 0.019281 | 0.000000 | 0.000000 | 0.060720 | 0.168168 | 0.002726 | 0.000000 | 0.000000 | 0.000000 | 0.740625 | 0.724138 | 1.0 | 1.0 | 1.0 | 1.0 | 0.033216 | 0.043420 | 0.043528 | 0.044735 | 0.367054 | 0.308416 | 0.002318 | 0.000000 | 0.523953 | 1.0 | 0.000725 | 0.000000 | 0.000089 | 0.000000 | 0.036193 | 0.052577 | 0.000000 | 0.000000 | 0 | 1 | 1 | 1 |
| 1 | 0.223376 | 0.484550 | 0.520507 | 0.000000 | 0.0 | 0.000000 | 0.005922 | 0.280374 | 0.434057 | 0.227779 | 0.249945 | 0.0 | 1.0 | 0.608680 | 1.000000 | 0.270714 | 0.346164 | 0.000000 | 0.000000 | 0.267857 | 0.24 | 0.005157 | 0.005391 | 0.312619 | 0.516148 | 0.000000 | 0.0 | 0.294869 | 0.255709 | 0.139497 | 0.886982 | 0.214168 | 0.408408 | 0.539007 | 0.440392 | 0.000410 | 0.000000 | 0.581250 | 0.373563 | 1.0 | 0.0 | 1.0 | 0.0 | 0.382417 | 0.448019 | 0.564860 | 0.614799 | 0.516485 | 0.450727 | 0.008641 | 0.008922 | 0.523953 | 0.0 | 0.145381 | 0.131155 | 0.514664 | 0.579382 | 0.459395 | 0.541649 | 0.000130 | 0.002075 | 0 | 1 | 0 | 1 |
| 2 | 0.074137 | 0.036821 | 0.005858 | 0.000000 | 0.0 | 0.049880 | 1.000000 | 0.000000 | 0.166945 | 0.451439 | 0.551327 | 1.0 | 0.0 | 0.000000 | 0.000000 | 0.061752 | 0.574698 | 0.079778 | 0.256533 | 0.669643 | 0.64 | 0.033626 | 0.299717 | 0.032237 | 0.000173 | 0.000000 | 0.0 | 0.038391 | 0.000000 | 0.582320 | 1.000000 | 0.186397 | 0.457791 | 0.058213 | 0.000000 | 0.631673 | 1.000000 | 0.109375 | 0.143678 | 0.0 | 0.0 | 0.0 | 0.0 | 0.049520 | 0.049882 | 0.067733 | 0.019539 | 0.461139 | 0.551748 | 0.025994 | 0.404327 | 0.000000 | 0.0 | 0.585271 | 0.508687 | 0.033893 | 0.009046 | 0.053944 | 0.012841 | 0.532326 | 0.744378 | 0 | 0 | 0 | 1 |
| 3 | 0.579812 | 1.000000 | 1.000000 | 0.568966 | 0.0 | 0.002677 | 0.000000 | 0.676636 | 0.434057 | 0.687304 | 1.000000 | 1.0 | 1.0 | 0.036166 | 0.103101 | 0.884923 | 1.000000 | 0.006108 | 0.199063 | 0.553571 | 0.52 | 0.035513 | 0.042945 | 0.585951 | 0.503443 | 0.000000 | 0.0 | 0.035585 | 0.000000 | 0.606650 | 1.000000 | 0.931278 | 1.000000 | 0.521616 | 0.405325 | 0.000000 | 0.004073 | 0.740625 | 0.724138 | 0.0 | 0.0 | 1.0 | 1.0 | 0.666923 | 0.734916 | 0.807062 | 1.000000 | 0.901167 | 1.000000 | 0.047843 | 0.000000 | 0.641117 | 1.0 | 0.138713 | 0.118805 | 1.000000 | 1.000000 | 0.753886 | 0.883094 | 0.000988 | 0.001192 | 0 | 0 | 0 | 1 |
| 4 | 0.121227 | 0.328143 | 0.300437 | 1.000000 | 0.0 | 0.101234 | 0.197667 | 0.330841 | 0.661102 | 0.251824 | 0.222433 | 0.0 | 0.0 | 0.074503 | 0.000000 | 0.238420 | 0.263962 | 0.624896 | 1.000000 | 0.428571 | 0.32 | 0.559695 | 0.380576 | 0.243881 | 0.163888 | 0.583333 | 1.0 | 0.022148 | 0.027815 | 0.374047 | 0.000000 | 0.268298 | 0.407741 | 0.359301 | 0.256736 | 0.265714 | 0.172650 | 0.656250 | 0.568966 | 1.0 | 1.0 | 1.0 | 1.0 | 0.588922 | 0.582398 | 0.370852 | 0.404463 | 0.492980 | 0.509685 | 0.648869 | 0.195992 | 0.560406 | 1.0 | 0.243332 | 0.160179 | 0.363204 | 0.362740 | 0.303008 | 0.306655 | 0.121722 | 0.119777 | 1 | 1 | 1 | 1 |
X_train_pca = X_train[num_vars].copy()
pca = PCA(random_state=42)
pca.fit(X_train_pca)
PCA(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
PCA(random_state=42)
pca.components_
array([[ 0.07336113, 0.15964022, 0.19426862, ..., 0.20760663,
-0.12861914, 0.01011346],
[-0.03377833, 0.0097072 , 0.03149767, ..., 0.01475655,
0.25204477, 0.30253814],
[-0.05105141, -0.1562654 , -0.13065015, ..., -0.11793345,
0.03544626, 0.11383457],
...,
[-0.00116137, -0.23425505, 0.32667731, ..., 0.08765565,
0.38786571, -0.23735821],
[-0.00169729, -0.14672795, 0.09875055, ..., 0.520221 ,
-0.18195387, 0.12803139],
[-0.00067344, 0.51554079, -0.38051691, ..., 0.18301135,
0.3148663 , -0.21369201]])
pca.explained_variance_ratio_
array([2.11122456e-01, 1.45584838e-01, 7.94797021e-02, 6.16009695e-02,
5.39088167e-02, 4.62920669e-02, 4.39221279e-02, 3.06820837e-02,
2.81091998e-02, 2.29584650e-02, 2.16429002e-02, 1.95213907e-02,
1.90167818e-02, 1.69256524e-02, 1.50889467e-02, 1.30643786e-02,
1.23308193e-02, 1.19547998e-02, 1.14146754e-02, 1.00386598e-02,
9.81173001e-03, 9.50723891e-03, 8.82044018e-03, 8.11904421e-03,
7.49962780e-03, 6.90580286e-03, 6.74601241e-03, 6.29822005e-03,
5.83295291e-03, 4.52271230e-03, 4.40614721e-03, 4.33308617e-03,
4.14525124e-03, 3.79351625e-03, 3.64783933e-03, 3.18520592e-03,
2.89762609e-03, 2.69207318e-03, 2.60851376e-03, 2.37011180e-03,
2.08448829e-03, 1.97898619e-03, 1.91867584e-03, 1.61837923e-03,
1.43560381e-03, 1.35566382e-03, 9.98824756e-04, 9.70046922e-04,
8.77691698e-04, 6.81438273e-04, 5.48528065e-04, 4.93316389e-04,
4.62360328e-04, 3.63470751e-04, 3.57522334e-04, 3.01620543e-04,
2.42118232e-04, 1.87151018e-04, 1.59123721e-04, 8.93422371e-05,
7.27652434e-05])
# Scree Plot
pca_variance_cumulative = np.cumsum(pca.explained_variance_ratio_)
plt.figure(figsize=(16,10))
plt.plot(range(1,len(pca_variance_cumulative)+1), pca_variance_cumulative)
plt.show()
25 Components explain more than 90% variance.
# Picking 25 PCA components
pca = IncrementalPCA(n_components=25)
X_train_pca = pca.fit_transform(X_train_pca)
X_train_pca = pd.DataFrame(X_train_pca, columns=['PC'+str(i) for i in range(1,26)])
X_train_pca.head()
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 | PC13 | PC14 | PC15 | PC16 | PC17 | PC18 | PC19 | PC20 | PC21 | PC22 | PC23 | PC24 | PC25 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.392464 | -1.742748 | 1.586044 | -0.042025 | -0.121037 | 0.157551 | -0.516437 | -0.080412 | 0.491018 | 0.423980 | 0.004110 | 0.370351 | 0.035072 | 0.333412 | -0.318460 | 0.199650 | -0.245777 | -0.173004 | 0.666346 | 0.194335 | -0.002042 | 0.049495 | 0.360444 | 0.044814 | 0.035534 |
| 1 | 1.384197 | -0.762394 | -0.122334 | -0.251210 | -0.760379 | -0.217034 | 0.218068 | 0.255680 | -0.286098 | -0.146520 | 0.637099 | -0.238962 | -0.760972 | 0.042142 | 0.376890 | 0.280950 | 0.180151 | 0.041385 | 0.009923 | -0.583209 | 0.422560 | -0.694506 | -0.201579 | 0.222967 | 0.154847 |
| 2 | -0.673997 | 1.229669 | 0.525259 | -0.168532 | -0.506183 | -0.898676 | 0.264304 | 0.048469 | -0.144008 | -0.346142 | -0.583583 | -0.046268 | 0.031594 | -0.241607 | 0.093328 | -0.052417 | 0.465217 | -0.164916 | 0.270171 | 0.006156 | 0.018946 | 0.001294 | 0.119512 | 0.013196 | -0.060196 |
| 3 | 2.520927 | 0.219104 | 0.031615 | 0.150144 | -1.617824 | 0.200662 | 0.868365 | 0.219290 | -0.905711 | 0.570897 | 0.456307 | -0.453094 | 0.617976 | -0.276454 | -0.674340 | 0.604019 | 0.022137 | 0.065132 | 0.030868 | 0.303848 | 0.041241 | -0.105478 | 0.256652 | 0.883187 | -0.396427 |
| 4 | 1.540153 | -0.478254 | 1.290122 | 0.079554 | 0.672998 | 0.401171 | 0.031713 | -0.806560 | 0.377509 | 0.561806 | -0.036438 | 0.134148 | 0.151481 | -0.187351 | -0.215492 | 0.296445 | 0.053143 | 0.335259 | -0.064859 | 0.529772 | 0.405330 | 0.008008 | 0.487486 | 0.047578 | 0.379151 |
plt.figure(figsize=[24,24])
sns.heatmap(np.corrcoef(X_train_pca.transpose()), annot=True, cmap='Blues')
plt.show()
Zero correlation, thanks to PCA.
While training models, our main emphasis is on Sensitivity/ Recall than Accurancy because our main focus is predicting churn cases than non-churn cases.
# Creating function to easily calculate VIF
def calculate_vif(x_df):
vif_df = pd.DataFrame({'Feature': x_df.columns, 'VIF': [ variance_inflation_factor(x_df.values, i) for i in range(x_df.shape[1])]})
vif_df['VIF'] = vif_df['VIF'].round(2)
vif_df = vif_df.sort_values(by='VIF', ascending=False)
return vif_df
# Creating function to easily train Logistics Regression models
def train_model(y_df, x_df):
# Preparing for intercept
x_df_sm = sm.add_constant(x_df)
# Training model
lr = sm.GLM(y_df, x_df_sm, sm.families.Binomial())
lr_model = lr.fit()
return lr_model
# Creating function to easily select features
def auto_select_features(n, x_df=X_train):
logreg = LogisticRegression(max_iter=1000)
selector = RFE(logreg, n_features_to_select=n)
selector = selector.fit(x_df, y_train)
top_features_df = pd.DataFrame({'Feature': x_df.columns, 'Selected': selector.support_, 'Rank': selector.ranking_})
selected_vars = top_features_df[top_features_df['Selected'] == True]['Feature'].to_list()
return selected_vars
# Creating function to easily evaluate performance of model over multiple probability cutoffs
def model_performance(model, x_df, y_df=y_train, lib='sm'):
prob = [float(x)/10 for x in range(10)]
y_final = y_df.to_frame()
if lib == 'sm':
x_df_sm = sm.add_constant(x_df)
y_final['churn_prob'] = model.predict(x_df_sm)
else:
y_final['churn_prob'] = model.predict_proba(x_df)[:,1]
for i in prob:
y_final[i] = y_final['churn_prob'].map(lambda x: 1 if x > i else 0)
performance_df = pd.DataFrame( columns = ['probability_cutoff','accuracy','sensitivity','specificity'])
for i in prob:
cm = confusion_matrix(y_final['churn'], y_final[i])
total=sum(sum(cm))
accuracy = (cm[0,0]+cm[1,1])/total
sensitivity = cm[1,1]/(cm[1,0]+cm[1,1])
specificity = cm[0,0]/(cm[0,0]+cm[0,1])
performance_df.loc[i] =[ i ,accuracy,sensitivity,specificity]
return performance_df
# Creating function to draw ROC curve
def roc(model, x_df, y_df=y_train, lib='sm'):
y_final = y_df.to_frame()
if lib == 'sm':
x_df_sm = sm.add_constant(x_df)
y_final['churn_prob'] = model.predict(x_df_sm)
else:
y_final['churn_prob'] = model.predict_proba(x_df)[:,1]
fpr, tpr, threshold = roc_curve( y_final['churn'], y_final['churn_prob'], drop_intermediate = False )
auc_score = roc_auc_score( y_final['churn'], y_final['churn_prob'])
plt.figure(figsize=(8, 8))
plt.plot( fpr, tpr, label='ROC curve (area = %0.2f)' % auc_score )
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate or [1 - True Negative Rate]')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()
return None
# Auto selecting 15 features
X_train_1 = X_train[auto_select_features(15)]
# Training Logistic Regression Model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37846 |
| Model Family: | Binomial | Df Model: | 15 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -14693. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 29387. |
| Time: | 22:20:48 | Pearson chi2: | 6.22e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4567 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4491 | 0.034 | 13.053 | 0.000 | 0.382 | 0.517 |
| std_og_t2m_mou_good | 2.2325 | 0.068 | 32.695 | 0.000 | 2.099 | 2.366 |
| std_og_t2m_mou_action | -3.0590 | 0.115 | -26.553 | 0.000 | -3.285 | -2.833 |
| last_day_rch_amt_action | -1.6380 | 0.070 | -23.562 | 0.000 | -1.774 | -1.502 |
| total_og_mou_action | -7.7027 | 0.283 | -27.240 | 0.000 | -8.257 | -7.149 |
| offnet_mou_action | 4.0173 | 0.151 | 26.571 | 0.000 | 3.721 | 4.314 |
| loc_ic_t2t_mou_good | 3.2288 | 0.210 | 15.358 | 0.000 | 2.817 | 3.641 |
| loc_ic_t2t_mou_action | -2.6098 | 0.139 | -18.750 | 0.000 | -2.883 | -2.337 |
| std_og_t2t_mou_good | 1.9969 | 0.068 | 29.554 | 0.000 | 1.864 | 2.129 |
| std_og_t2t_mou_action | -3.0147 | 0.120 | -25.219 | 0.000 | -3.249 | -2.780 |
| loc_ic_t2m_mou_good | 4.7021 | 0.454 | 10.356 | 0.000 | 3.812 | 5.592 |
| loc_ic_t2m_mou_action | -2.8968 | 0.180 | -16.093 | 0.000 | -3.250 | -2.544 |
| onnet_mou_action | 3.7515 | 0.147 | 25.549 | 0.000 | 3.464 | 4.039 |
| loc_og_t2m_mou_good | -2.2143 | 0.147 | -15.029 | 0.000 | -2.503 | -1.925 |
| loc_ic_mou_good | -7.1385 | 0.594 | -12.020 | 0.000 | -8.303 | -5.975 |
| std_og_mou_action | 3.4397 | 0.229 | 15.040 | 0.000 | 2.991 | 3.888 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 13 | loc_ic_mou_good | 36.84 |
| 9 | loc_ic_t2m_mou_good | 28.85 |
| 3 | total_og_mou_action | 26.99 |
| 14 | std_og_mou_action | 23.03 |
| 11 | onnet_mou_action | 11.55 |
| 4 | offnet_mou_action | 11.01 |
| 8 | std_og_t2t_mou_action | 10.67 |
| 5 | loc_ic_t2t_mou_good | 9.26 |
| 1 | std_og_t2m_mou_action | 8.92 |
| 10 | loc_ic_t2m_mou_action | 8.06 |
| 12 | loc_og_t2m_mou_good | 6.24 |
| 6 | loc_ic_t2t_mou_action | 5.38 |
| 7 | std_og_t2t_mou_good | 3.39 |
| 0 | std_og_t2m_mou_good | 3.13 |
| 2 | last_day_rch_amt_action | 1.91 |
# Dropping 'loc_ic_mou_good' because it has high VIF
X_train_1 = X_train_1.drop('loc_ic_mou_good', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37847 |
| Model Family: | Binomial | Df Model: | 14 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -14789. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 29578. |
| Time: | 22:20:49 | Pearson chi2: | 6.51e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4540 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4469 | 0.034 | 13.013 | 0.000 | 0.380 | 0.514 |
| std_og_t2m_mou_good | 2.2718 | 0.068 | 33.371 | 0.000 | 2.138 | 2.405 |
| std_og_t2m_mou_action | -3.0465 | 0.115 | -26.493 | 0.000 | -3.272 | -2.821 |
| last_day_rch_amt_action | -1.6410 | 0.069 | -23.710 | 0.000 | -1.777 | -1.505 |
| total_og_mou_action | -7.8446 | 0.283 | -27.762 | 0.000 | -8.398 | -7.291 |
| offnet_mou_action | 3.9904 | 0.151 | 26.445 | 0.000 | 3.695 | 4.286 |
| loc_ic_t2t_mou_good | 1.1043 | 0.116 | 9.503 | 0.000 | 0.877 | 1.332 |
| loc_ic_t2t_mou_action | -2.6940 | 0.137 | -19.631 | 0.000 | -2.963 | -2.425 |
| std_og_t2t_mou_good | 2.0182 | 0.067 | 30.100 | 0.000 | 1.887 | 2.150 |
| std_og_t2t_mou_action | -2.9703 | 0.119 | -24.945 | 0.000 | -3.204 | -2.737 |
| loc_ic_t2m_mou_good | -0.3792 | 0.169 | -2.240 | 0.025 | -0.711 | -0.047 |
| loc_ic_t2m_mou_action | -2.8245 | 0.178 | -15.896 | 0.000 | -3.173 | -2.476 |
| onnet_mou_action | 3.7157 | 0.147 | 25.356 | 0.000 | 3.428 | 4.003 |
| loc_og_t2m_mou_good | -2.0341 | 0.145 | -14.062 | 0.000 | -2.318 | -1.751 |
| std_og_mou_action | 3.5698 | 0.228 | 15.685 | 0.000 | 3.124 | 4.016 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 3 | total_og_mou_action | 26.93 |
| 13 | std_og_mou_action | 22.99 |
| 11 | onnet_mou_action | 11.54 |
| 4 | offnet_mou_action | 11.01 |
| 8 | std_og_t2t_mou_action | 10.66 |
| 9 | loc_ic_t2m_mou_good | 9.75 |
| 1 | std_og_t2m_mou_action | 8.92 |
| 10 | loc_ic_t2m_mou_action | 8.05 |
| 12 | loc_og_t2m_mou_good | 6.19 |
| 6 | loc_ic_t2t_mou_action | 5.35 |
| 5 | loc_ic_t2t_mou_good | 5.32 |
| 7 | std_og_t2t_mou_good | 3.39 |
| 0 | std_og_t2m_mou_good | 3.13 |
| 2 | last_day_rch_amt_action | 1.91 |
# Dropping 'total_og_mou_action' because it has high VIF
X_train_1 = X_train_1.drop('total_og_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37848 |
| Model Family: | Binomial | Df Model: | 13 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15287. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 30574. |
| Time: | 22:20:50 | Pearson chi2: | 5.51e+04 |
| No. Iterations: | 6 | Pseudo R-squ. (CS): | 0.4394 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.5403 | 0.034 | 16.071 | 0.000 | 0.474 | 0.606 |
| std_og_t2m_mou_good | 2.3211 | 0.068 | 34.189 | 0.000 | 2.188 | 2.454 |
| std_og_t2m_mou_action | -2.5191 | 0.107 | -23.570 | 0.000 | -2.729 | -2.310 |
| last_day_rch_amt_action | -1.4903 | 0.066 | -22.502 | 0.000 | -1.620 | -1.360 |
| offnet_mou_action | 2.4788 | 0.123 | 20.230 | 0.000 | 2.239 | 2.719 |
| loc_ic_t2t_mou_good | 1.1698 | 0.115 | 10.191 | 0.000 | 0.945 | 1.395 |
| loc_ic_t2t_mou_action | -3.2579 | 0.136 | -24.033 | 0.000 | -3.524 | -2.992 |
| std_og_t2t_mou_good | 2.0582 | 0.067 | 30.899 | 0.000 | 1.928 | 2.189 |
| std_og_t2t_mou_action | -1.7543 | 0.105 | -16.681 | 0.000 | -1.960 | -1.548 |
| loc_ic_t2m_mou_good | 0.4874 | 0.165 | 2.954 | 0.003 | 0.164 | 0.811 |
| loc_ic_t2m_mou_action | -4.2297 | 0.175 | -24.204 | 0.000 | -4.572 | -3.887 |
| onnet_mou_action | 1.5633 | 0.108 | 14.530 | 0.000 | 1.352 | 1.774 |
| loc_og_t2m_mou_good | -3.4691 | 0.133 | -26.157 | 0.000 | -3.729 | -3.209 |
| std_og_mou_action | -1.6838 | 0.118 | -14.237 | 0.000 | -1.916 | -1.452 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 12 | std_og_mou_action | 11.38 |
| 7 | std_og_t2t_mou_action | 9.70 |
| 8 | loc_ic_t2m_mou_good | 9.49 |
| 3 | offnet_mou_action | 9.26 |
| 1 | std_og_t2m_mou_action | 8.66 |
| 10 | onnet_mou_action | 8.47 |
| 9 | loc_ic_t2m_mou_action | 7.66 |
| 11 | loc_og_t2m_mou_good | 5.46 |
| 4 | loc_ic_t2t_mou_good | 5.31 |
| 5 | loc_ic_t2t_mou_action | 5.25 |
| 6 | std_og_t2t_mou_good | 3.37 |
| 0 | std_og_t2m_mou_good | 3.11 |
| 2 | last_day_rch_amt_action | 1.88 |
# Dropping 'std_og_mou_action' because it has high VIF
X_train_1 = X_train_1.drop('std_og_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37849 |
| Model Family: | Binomial | Df Model: | 12 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15390. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 30781. |
| Time: | 22:20:51 | Pearson chi2: | 5.28e+04 |
| No. Iterations: | 6 | Pseudo R-squ. (CS): | 0.4364 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.6332 | 0.033 | 19.177 | 0.000 | 0.568 | 0.698 |
| std_og_t2m_mou_good | 2.2238 | 0.067 | 33.074 | 0.000 | 2.092 | 2.356 |
| std_og_t2m_mou_action | -3.0080 | 0.101 | -29.793 | 0.000 | -3.206 | -2.810 |
| last_day_rch_amt_action | -1.4945 | 0.066 | -22.695 | 0.000 | -1.624 | -1.365 |
| offnet_mou_action | 1.9554 | 0.115 | 17.075 | 0.000 | 1.731 | 2.180 |
| loc_ic_t2t_mou_good | 1.1709 | 0.115 | 10.208 | 0.000 | 0.946 | 1.396 |
| loc_ic_t2t_mou_action | -3.1436 | 0.135 | -23.281 | 0.000 | -3.408 | -2.879 |
| std_og_t2t_mou_good | 2.0012 | 0.066 | 30.175 | 0.000 | 1.871 | 2.131 |
| std_og_t2t_mou_action | -2.1915 | 0.101 | -21.783 | 0.000 | -2.389 | -1.994 |
| loc_ic_t2m_mou_good | 0.3606 | 0.164 | 2.194 | 0.028 | 0.038 | 0.683 |
| loc_ic_t2m_mou_action | -4.1269 | 0.175 | -23.623 | 0.000 | -4.469 | -3.784 |
| onnet_mou_action | 0.9868 | 0.099 | 9.995 | 0.000 | 0.793 | 1.180 |
| loc_og_t2m_mou_good | -3.1443 | 0.129 | -24.397 | 0.000 | -3.397 | -2.892 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 8 | loc_ic_t2m_mou_good | 9.47 |
| 3 | offnet_mou_action | 8.76 |
| 7 | std_og_t2t_mou_action | 8.04 |
| 10 | onnet_mou_action | 7.75 |
| 9 | loc_ic_t2m_mou_action | 7.64 |
| 1 | std_og_t2m_mou_action | 6.98 |
| 4 | loc_ic_t2t_mou_good | 5.30 |
| 11 | loc_og_t2m_mou_good | 5.27 |
| 5 | loc_ic_t2t_mou_action | 5.24 |
| 6 | std_og_t2t_mou_good | 3.37 |
| 0 | std_og_t2m_mou_good | 3.11 |
| 2 | last_day_rch_amt_action | 1.88 |
# Dropping 'loc_ic_t2m_mou_good' because it has high VIF
X_train_1 = X_train_1.drop('loc_ic_t2m_mou_good', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37850 |
| Model Family: | Binomial | Df Model: | 11 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15393. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 30786. |
| Time: | 22:20:51 | Pearson chi2: | 5.24e+04 |
| No. Iterations: | 6 | Pseudo R-squ. (CS): | 0.4363 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.6401 | 0.033 | 19.478 | 0.000 | 0.576 | 0.705 |
| std_og_t2m_mou_good | 2.2429 | 0.067 | 33.635 | 0.000 | 2.112 | 2.374 |
| std_og_t2m_mou_action | -3.0093 | 0.101 | -29.848 | 0.000 | -3.207 | -2.812 |
| last_day_rch_amt_action | -1.4913 | 0.066 | -22.665 | 0.000 | -1.620 | -1.362 |
| offnet_mou_action | 1.9253 | 0.113 | 16.973 | 0.000 | 1.703 | 2.148 |
| loc_ic_t2t_mou_good | 1.2518 | 0.109 | 11.513 | 0.000 | 1.039 | 1.465 |
| loc_ic_t2t_mou_action | -3.2001 | 0.133 | -24.106 | 0.000 | -3.460 | -2.940 |
| std_og_t2t_mou_good | 2.0120 | 0.066 | 30.411 | 0.000 | 1.882 | 2.142 |
| std_og_t2t_mou_action | -2.1972 | 0.101 | -21.854 | 0.000 | -2.394 | -2.000 |
| loc_ic_t2m_mou_action | -3.9030 | 0.141 | -27.741 | 0.000 | -4.179 | -3.627 |
| onnet_mou_action | 0.9795 | 0.099 | 9.929 | 0.000 | 0.786 | 1.173 |
| loc_og_t2m_mou_good | -3.0284 | 0.117 | -25.908 | 0.000 | -3.258 | -2.799 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 3 | offnet_mou_action | 8.55 |
| 7 | std_og_t2t_mou_action | 8.04 |
| 9 | onnet_mou_action | 7.74 |
| 1 | std_og_t2m_mou_action | 6.98 |
| 5 | loc_ic_t2t_mou_action | 5.09 |
| 4 | loc_ic_t2t_mou_good | 4.82 |
| 10 | loc_og_t2m_mou_good | 4.30 |
| 8 | loc_ic_t2m_mou_action | 3.88 |
| 6 | std_og_t2t_mou_good | 3.33 |
| 0 | std_og_t2m_mou_good | 2.99 |
| 2 | last_day_rch_amt_action | 1.87 |
# Dropping 'offnet_mou_action' because it has high VIF
X_train_1 = X_train_1.drop('offnet_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37851 |
| Model Family: | Binomial | Df Model: | 10 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15541. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 31082. |
| Time: | 22:20:52 | Pearson chi2: | 4.99e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4318 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.6865 | 0.033 | 21.061 | 0.000 | 0.623 | 0.750 |
| std_og_t2m_mou_good | 2.2031 | 0.066 | 33.146 | 0.000 | 2.073 | 2.333 |
| std_og_t2m_mou_action | -1.7471 | 0.067 | -26.074 | 0.000 | -1.878 | -1.616 |
| last_day_rch_amt_action | -1.2213 | 0.063 | -19.532 | 0.000 | -1.344 | -1.099 |
| loc_ic_t2t_mou_good | 1.0706 | 0.107 | 9.960 | 0.000 | 0.860 | 1.281 |
| loc_ic_t2t_mou_action | -3.1508 | 0.132 | -23.894 | 0.000 | -3.409 | -2.892 |
| std_og_t2t_mou_good | 1.9246 | 0.066 | 29.364 | 0.000 | 1.796 | 2.053 |
| std_og_t2t_mou_action | -2.4040 | 0.099 | -24.322 | 0.000 | -2.598 | -2.210 |
| loc_ic_t2m_mou_action | -3.6945 | 0.140 | -26.349 | 0.000 | -3.969 | -3.420 |
| onnet_mou_action | 1.2026 | 0.097 | 12.448 | 0.000 | 1.013 | 1.392 |
| loc_og_t2m_mou_good | -2.4346 | 0.112 | -21.756 | 0.000 | -2.654 | -2.215 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 6 | std_og_t2t_mou_action | 7.83 |
| 8 | onnet_mou_action | 7.52 |
| 4 | loc_ic_t2t_mou_action | 5.09 |
| 3 | loc_ic_t2t_mou_good | 4.77 |
| 7 | loc_ic_t2m_mou_action | 3.79 |
| 9 | loc_og_t2m_mou_good | 3.39 |
| 5 | std_og_t2t_mou_good | 3.32 |
| 0 | std_og_t2m_mou_good | 2.99 |
| 1 | std_og_t2m_mou_action | 2.97 |
| 2 | last_day_rch_amt_action | 1.78 |
# Dropping 'std_og_t2t_mou_action' because it has high VIF
X_train_1 = X_train_1.drop('std_og_t2t_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37852 |
| Model Family: | Binomial | Df Model: | 9 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15847. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 31693. |
| Time: | 22:20:53 | Pearson chi2: | 4.85e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4226 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.8149 | 0.032 | 25.427 | 0.000 | 0.752 | 0.878 |
| std_og_t2m_mou_good | 2.2344 | 0.067 | 33.546 | 0.000 | 2.104 | 2.365 |
| std_og_t2m_mou_action | -2.0401 | 0.066 | -30.742 | 0.000 | -2.170 | -1.910 |
| last_day_rch_amt_action | -1.1225 | 0.062 | -18.227 | 0.000 | -1.243 | -1.002 |
| loc_ic_t2t_mou_good | 1.4322 | 0.106 | 13.461 | 0.000 | 1.224 | 1.641 |
| loc_ic_t2t_mou_action | -3.2029 | 0.134 | -23.966 | 0.000 | -3.465 | -2.941 |
| std_og_t2t_mou_good | 1.1211 | 0.051 | 21.812 | 0.000 | 1.020 | 1.222 |
| loc_ic_t2m_mou_action | -4.1652 | 0.143 | -29.071 | 0.000 | -4.446 | -3.884 |
| onnet_mou_action | -0.6325 | 0.062 | -10.210 | 0.000 | -0.754 | -0.511 |
| loc_og_t2m_mou_good | -2.2609 | 0.111 | -20.395 | 0.000 | -2.478 | -2.044 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 4 | loc_ic_t2t_mou_action | 5.07 |
| 3 | loc_ic_t2t_mou_good | 4.64 |
| 6 | loc_ic_t2m_mou_action | 3.74 |
| 8 | loc_og_t2m_mou_good | 3.34 |
| 7 | onnet_mou_action | 3.07 |
| 0 | std_og_t2m_mou_good | 2.95 |
| 1 | std_og_t2m_mou_action | 2.77 |
| 5 | std_og_t2t_mou_good | 2.72 |
| 2 | last_day_rch_amt_action | 1.76 |
# Dropping 'loc_ic_t2t_mou_action' because it has high VIF
X_train_1 = X_train_1.drop('loc_ic_t2t_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37853 |
| Model Family: | Binomial | Df Model: | 8 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -16181. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 32363. |
| Time: | 22:20:53 | Pearson chi2: | 5.12e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4123 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.8450 | 0.032 | 26.679 | 0.000 | 0.783 | 0.907 |
| std_og_t2m_mou_good | 2.4116 | 0.066 | 36.414 | 0.000 | 2.282 | 2.541 |
| std_og_t2m_mou_action | -2.2788 | 0.066 | -34.635 | 0.000 | -2.408 | -2.150 |
| last_day_rch_amt_action | -1.1553 | 0.061 | -19.026 | 0.000 | -1.274 | -1.036 |
| loc_ic_t2t_mou_good | -0.3587 | 0.070 | -5.092 | 0.000 | -0.497 | -0.221 |
| std_og_t2t_mou_good | 1.3804 | 0.050 | 27.509 | 0.000 | 1.282 | 1.479 |
| loc_ic_t2m_mou_action | -5.4277 | 0.141 | -38.489 | 0.000 | -5.704 | -5.151 |
| onnet_mou_action | -1.0093 | 0.060 | -16.934 | 0.000 | -1.126 | -0.893 |
| loc_og_t2m_mou_good | -1.9079 | 0.107 | -17.843 | 0.000 | -2.118 | -1.698 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 5 | loc_ic_t2m_mou_action | 3.32 |
| 7 | loc_og_t2m_mou_good | 3.32 |
| 0 | std_og_t2m_mou_good | 2.89 |
| 6 | onnet_mou_action | 2.84 |
| 1 | std_og_t2m_mou_action | 2.72 |
| 4 | std_og_t2t_mou_good | 2.52 |
| 3 | loc_ic_t2t_mou_good | 2.38 |
| 2 | last_day_rch_amt_action | 1.76 |
model_1 = train_model(y_train, X_train_1)
roc(model_1, X_train_1)
Area under ROC curve is 0.89 that is good.
train_perf_1 = model_performance(model_1, X_train_1)
train_perf_1
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.500000 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.676853 | 0.977920 | 0.375786 |
| 0.2 | 0.2 | 0.734483 | 0.961175 | 0.507791 |
| 0.3 | 0.3 | 0.771116 | 0.933812 | 0.608420 |
| 0.4 | 0.4 | 0.794702 | 0.894195 | 0.695209 |
| 0.5 | 0.5 | 0.805953 | 0.836195 | 0.775712 |
| 0.6 | 0.6 | 0.803708 | 0.756484 | 0.850932 |
| 0.7 | 0.7 | 0.776002 | 0.637156 | 0.914849 |
| 0.8 | 0.8 | 0.726956 | 0.498600 | 0.955311 |
| 0.9 | 0.9 | 0.618536 | 0.252549 | 0.984523 |
train_perf_1.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
Optimal cutoff can be 0.4 because we are trying to maximise sensitivity over accuracy.
train_cutoff_1 = 0.4
train_perf_1.loc[[train_cutoff_1]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.4 | 0.4 | 0.794702 | 0.894195 | 0.695209 |
# Auto selecting top 10 features
X_train_2 = X_train[auto_select_features(10)]
# Training Logistic Regression Model
train_model(y_train, X_train_2).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37851 |
| Model Family: | Binomial | Df Model: | 10 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15255. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 30511. |
| Time: | 22:21:14 | Pearson chi2: | 6.78e+04 |
| No. Iterations: | 6 | Pseudo R-squ. (CS): | 0.4404 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0729 | 0.030 | 2.431 | 0.015 | 0.014 | 0.132 |
| std_og_t2m_mou_good | 2.4460 | 0.065 | 37.352 | 0.000 | 2.318 | 2.574 |
| std_og_t2m_mou_action | -2.5769 | 0.112 | -22.946 | 0.000 | -2.797 | -2.357 |
| total_og_mou_action | -8.9402 | 0.272 | -32.881 | 0.000 | -9.473 | -8.407 |
| offnet_mou_action | 2.8813 | 0.144 | 20.015 | 0.000 | 2.599 | 3.163 |
| loc_ic_t2t_mou_action | -1.8339 | 0.101 | -18.149 | 0.000 | -2.032 | -1.636 |
| std_og_t2t_mou_good | 2.2438 | 0.065 | 34.505 | 0.000 | 2.116 | 2.371 |
| std_og_t2t_mou_action | -3.1269 | 0.118 | -26.580 | 0.000 | -3.357 | -2.896 |
| loc_ic_t2m_mou_action | -3.9062 | 0.141 | -27.699 | 0.000 | -4.183 | -3.630 |
| onnet_mou_action | 3.5405 | 0.147 | 24.110 | 0.000 | 3.253 | 3.828 |
| std_og_mou_action | 4.6211 | 0.215 | 21.490 | 0.000 | 4.200 | 5.043 |
calculate_vif(X_train_2)
| Feature | VIF | |
|---|---|---|
| 2 | total_og_mou_action | 23.47 |
| 9 | std_og_mou_action | 19.84 |
| 8 | onnet_mou_action | 10.86 |
| 6 | std_og_t2t_mou_action | 10.10 |
| 3 | offnet_mou_action | 9.16 |
| 1 | std_og_t2m_mou_action | 8.39 |
| 7 | loc_ic_t2m_mou_action | 3.39 |
| 5 | std_og_t2t_mou_good | 3.05 |
| 4 | loc_ic_t2t_mou_action | 2.82 |
| 0 | std_og_t2m_mou_good | 2.78 |
# Dropping 'total_og_mou_action' because it has high VIF
X_train_2 = X_train_2.drop('total_og_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_2).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37852 |
| Model Family: | Binomial | Df Model: | 9 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -15986. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 31972. |
| Time: | 22:21:14 | Pearson chi2: | 5.97e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4183 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2030 | 0.029 | 6.962 | 0.000 | 0.146 | 0.260 |
| std_og_t2m_mou_good | 2.4375 | 0.065 | 37.505 | 0.000 | 2.310 | 2.565 |
| std_og_t2m_mou_action | -1.6707 | 0.100 | -16.710 | 0.000 | -1.867 | -1.475 |
| offnet_mou_action | 0.5873 | 0.102 | 5.773 | 0.000 | 0.388 | 0.787 |
| loc_ic_t2t_mou_action | -2.4669 | 0.099 | -24.821 | 0.000 | -2.662 | -2.272 |
| std_og_t2t_mou_good | 2.2370 | 0.064 | 34.841 | 0.000 | 2.111 | 2.363 |
| std_og_t2t_mou_action | -1.8643 | 0.102 | -18.223 | 0.000 | -2.065 | -1.664 |
| loc_ic_t2m_mou_action | -5.6790 | 0.137 | -41.451 | 0.000 | -5.948 | -5.410 |
| onnet_mou_action | 1.1606 | 0.103 | 11.264 | 0.000 | 0.959 | 1.363 |
| std_og_mou_action | -1.1316 | 0.114 | -9.965 | 0.000 | -1.354 | -0.909 |
calculate_vif(X_train_2)
| Feature | VIF | |
|---|---|---|
| 8 | std_og_mou_action | 10.90 |
| 5 | std_og_t2t_mou_action | 9.43 |
| 7 | onnet_mou_action | 8.14 |
| 1 | std_og_t2m_mou_action | 7.94 |
| 2 | offnet_mou_action | 6.39 |
| 6 | loc_ic_t2m_mou_action | 3.13 |
| 4 | std_og_t2t_mou_good | 3.03 |
| 0 | std_og_t2m_mou_good | 2.77 |
| 3 | loc_ic_t2t_mou_action | 2.76 |
# Dropping 'std_og_mou_action' because it has high VIF
X_train_2 = X_train_2.drop('std_og_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_2).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37853 |
| Model Family: | Binomial | Df Model: | 8 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -16036. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 32073. |
| Time: | 22:21:15 | Pearson chi2: | 5.93e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4168 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2707 | 0.028 | 9.535 | 0.000 | 0.215 | 0.326 |
| std_og_t2m_mou_good | 2.3777 | 0.065 | 36.862 | 0.000 | 2.251 | 2.504 |
| std_og_t2m_mou_action | -2.0677 | 0.092 | -22.497 | 0.000 | -2.248 | -1.888 |
| offnet_mou_action | 0.3328 | 0.098 | 3.391 | 0.001 | 0.140 | 0.525 |
| loc_ic_t2t_mou_action | -2.3834 | 0.099 | -24.142 | 0.000 | -2.577 | -2.190 |
| std_og_t2t_mou_good | 2.2031 | 0.064 | 34.397 | 0.000 | 2.078 | 2.329 |
| std_og_t2t_mou_action | -2.1735 | 0.098 | -22.236 | 0.000 | -2.365 | -1.982 |
| loc_ic_t2m_mou_action | -5.5687 | 0.136 | -40.981 | 0.000 | -5.835 | -5.302 |
| onnet_mou_action | 0.7851 | 0.096 | 8.201 | 0.000 | 0.597 | 0.973 |
calculate_vif(X_train_2)
| Feature | VIF | |
|---|---|---|
| 5 | std_og_t2t_mou_action | 7.70 |
| 7 | onnet_mou_action | 7.52 |
| 2 | offnet_mou_action | 6.23 |
| 1 | std_og_t2m_mou_action | 5.70 |
| 6 | loc_ic_t2m_mou_action | 3.07 |
| 4 | std_og_t2t_mou_good | 3.03 |
| 0 | std_og_t2m_mou_good | 2.77 |
| 3 | loc_ic_t2t_mou_action | 2.71 |
# Dropping 'std_og_t2t_mou_action' because it has high VIF
X_train_2 = X_train_2.drop('std_og_t2t_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_2).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37854 |
| Model Family: | Binomial | Df Model: | 7 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -16289. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 32577. |
| Time: | 22:21:16 | Pearson chi2: | 6.20e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4090 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4130 | 0.028 | 14.941 | 0.000 | 0.359 | 0.467 |
| std_og_t2m_mou_good | 2.4231 | 0.065 | 37.475 | 0.000 | 2.296 | 2.550 |
| std_og_t2m_mou_action | -2.5698 | 0.089 | -28.804 | 0.000 | -2.745 | -2.395 |
| offnet_mou_action | 0.6723 | 0.097 | 6.934 | 0.000 | 0.482 | 0.862 |
| loc_ic_t2t_mou_action | -2.1407 | 0.099 | -21.563 | 0.000 | -2.335 | -1.946 |
| std_og_t2t_mou_good | 1.5206 | 0.051 | 29.630 | 0.000 | 1.420 | 1.621 |
| loc_ic_t2m_mou_action | -6.0111 | 0.138 | -43.627 | 0.000 | -6.281 | -5.741 |
| onnet_mou_action | -0.9050 | 0.060 | -14.964 | 0.000 | -1.023 | -0.786 |
calculate_vif(X_train_2)
| Feature | VIF | |
|---|---|---|
| 2 | offnet_mou_action | 5.90 |
| 1 | std_og_t2m_mou_action | 4.91 |
| 5 | loc_ic_t2m_mou_action | 3.04 |
| 6 | onnet_mou_action | 2.98 |
| 0 | std_og_t2m_mou_good | 2.69 |
| 4 | std_og_t2t_mou_good | 2.59 |
| 3 | loc_ic_t2t_mou_action | 2.55 |
# Dropping 'offnet_mou_action' because it has high VIF
X_train_2 = X_train_2.drop('offnet_mou_action', axis=1)
# Re-train model
train_model(y_train, X_train_2).summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37855 |
| Model Family: | Binomial | Df Model: | 6 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -16313. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 32626. |
| Time: | 22:21:16 | Pearson chi2: | 5.92e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4082 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4722 | 0.026 | 17.950 | 0.000 | 0.421 | 0.524 |
| std_og_t2m_mou_good | 2.4009 | 0.065 | 37.161 | 0.000 | 2.274 | 2.528 |
| std_og_t2m_mou_action | -2.1431 | 0.064 | -33.306 | 0.000 | -2.269 | -2.017 |
| loc_ic_t2t_mou_action | -2.1579 | 0.099 | -21.753 | 0.000 | -2.352 | -1.964 |
| std_og_t2t_mou_good | 1.4393 | 0.050 | 28.955 | 0.000 | 1.342 | 1.537 |
| loc_ic_t2m_mou_action | -5.7799 | 0.133 | -43.428 | 0.000 | -6.041 | -5.519 |
| onnet_mou_action | -0.8666 | 0.060 | -14.437 | 0.000 | -0.984 | -0.749 |
calculate_vif(X_train_2)
| Feature | VIF | |
|---|---|---|
| 5 | onnet_mou_action | 2.95 |
| 0 | std_og_t2m_mou_good | 2.65 |
| 1 | std_og_t2m_mou_action | 2.59 |
| 3 | std_og_t2t_mou_good | 2.57 |
| 2 | loc_ic_t2t_mou_action | 2.55 |
| 4 | loc_ic_t2m_mou_action | 2.29 |
model_2 = train_model(y_train, X_train_2)
roc(model_2, X_train_2)
Area under ROC curve is 0.89 that is good.
train_perf_2 = model_performance(model_2, X_train_2)
train_perf_2
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.500000 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.675479 | 0.973377 | 0.377582 |
| 0.2 | 0.2 | 0.732027 | 0.958217 | 0.505837 |
| 0.3 | 0.3 | 0.768317 | 0.936242 | 0.600391 |
| 0.4 | 0.4 | 0.798030 | 0.907718 | 0.688342 |
| 0.5 | 0.5 | 0.816307 | 0.863346 | 0.769267 |
| 0.6 | 0.6 | 0.808489 | 0.767471 | 0.849506 |
| 0.7 | 0.7 | 0.761714 | 0.608156 | 0.915271 |
| 0.8 | 0.8 | 0.722360 | 0.487771 | 0.956949 |
| 0.9 | 0.9 | 0.609265 | 0.233585 | 0.984945 |
train_perf_2.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
Optimal cutoff can be 0.4 because we are trying to maximise sensitivity over accuracy.
train_cutoff_2 = 0.4
train_perf_2.loc[[train_cutoff_2]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.4 | 0.4 | 0.79803 | 0.907718 | 0.688342 |
Models without interpretability.
Logistic Regression with PCA
X_train_3 = X_train_pca
# Creating KFold object with 5 splits
folds = KFold(n_splits=5, shuffle=True, random_state=4)
# Specifying hyperparameters to control regularisation
params = {"C": [0.01, 0.1, 1, 10, 100, 1000]}
lg = LogisticRegression()
# Specifing score as recall as we are more focused on acheiving the higher sensitivity than the accuracy
grid_search_lg = GridSearchCV(estimator = lg, param_grid = params, scoring= 'recall', cv = folds, verbose = 1, return_train_score=True)
# Fitting the model
grid_search_lg.fit(X_train_pca, y_train)
Fitting 5 folds for each of 6 candidates, totalling 30 fits
GridSearchCV(cv=KFold(n_splits=5, random_state=4, shuffle=True),
estimator=LogisticRegression(),
param_grid={'C': [0.01, 0.1, 1, 10, 100, 1000]},
return_train_score=True, scoring='recall', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=KFold(n_splits=5, random_state=4, shuffle=True),
estimator=LogisticRegression(),
param_grid={'C': [0.01, 0.1, 1, 10, 100, 1000]},
return_train_score=True, scoring='recall', verbose=1)LogisticRegression()
LogisticRegression()
# results of grid search CV
cv_results = pd.DataFrame(grid_search_lg.cv_results_)
cv_results
| mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_C | params | split0_test_score | split1_test_score | split2_test_score | split3_test_score | split4_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | split3_train_score | split4_train_score | mean_train_score | std_train_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.038329 | 0.006521 | 0.004342 | 0.000482 | 0.01 | {'C': 0.01} | 0.863904 | 0.870831 | 0.858095 | 0.85447 | 0.866614 | 0.862783 | 0.005862 | 1 | 0.866302 | 0.863063 | 0.863391 | 0.864019 | 0.860933 | 0.863542 | 0.001727 |
| 1 | 0.035730 | 0.000773 | 0.004010 | 0.000534 | 0.1 | {'C': 0.1} | 0.862567 | 0.867920 | 0.855428 | 0.85317 | 0.865566 | 0.860930 | 0.005719 | 2 | 0.862814 | 0.859896 | 0.861547 | 0.863223 | 0.859543 | 0.861405 | 0.001487 |
| 2 | 0.037409 | 0.001791 | 0.003714 | 0.000164 | 1 | {'C': 1} | 0.861765 | 0.866596 | 0.856228 | 0.85343 | 0.864780 | 0.860560 | 0.005006 | 3 | 0.862155 | 0.859566 | 0.861217 | 0.862494 | 0.859742 | 0.861035 | 0.001204 |
| 3 | 0.038531 | 0.001883 | 0.003837 | 0.000360 | 10 | {'C': 10} | 0.861497 | 0.866331 | 0.855428 | 0.85343 | 0.864780 | 0.860293 | 0.005076 | 4 | 0.862089 | 0.859368 | 0.861151 | 0.862428 | 0.859676 | 0.860942 | 0.001237 |
| 4 | 0.038309 | 0.003175 | 0.003868 | 0.000541 | 100 | {'C': 100} | 0.861230 | 0.866331 | 0.855428 | 0.85343 | 0.864780 | 0.860240 | 0.005064 | 5 | 0.862089 | 0.859368 | 0.861151 | 0.862428 | 0.859808 | 0.860969 | 0.001211 |
| 5 | 0.045172 | 0.007460 | 0.004252 | 0.000610 | 1000 | {'C': 1000} | 0.861230 | 0.866331 | 0.855428 | 0.85343 | 0.864780 | 0.860240 | 0.005064 | 5 | 0.862089 | 0.859368 | 0.861151 | 0.862428 | 0.859808 | 0.860969 | 0.001211 |
# plot of C versus train and validation scores
plt.figure(figsize=(12, 10))
plt.plot(cv_results['param_C'], cv_results['mean_test_score'])
plt.plot(cv_results['param_C'], cv_results['mean_train_score'])
plt.xlabel('C')
plt.ylabel('sensitivity')
plt.legend(['test result', 'train result'], loc='upper left')
plt.xscale('log')
# Best score with best C
best_score = grid_search_lg.best_score_
best_C = grid_search_lg.best_params_['C']
print(" The highest test sensitivity is {0} at C: {1}".format(best_score, best_C))
The highest test sensitivity is 0.8627828946525055 at C: 0.01
model_3 = grid_search_lg.best_estimator_
roc(model_3, X_train_3, lib='sklearn')
Area under ROC curve is 0.91 that is good.
train_perf_3 = model_performance(model_3, X_train_3, lib='sklearn')
train_perf_3
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.500000 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.704347 | 0.983836 | 0.424859 |
| 0.2 | 0.2 | 0.762612 | 0.960752 | 0.564471 |
| 0.3 | 0.3 | 0.796313 | 0.932967 | 0.659659 |
| 0.4 | 0.4 | 0.824864 | 0.906344 | 0.743384 |
| 0.5 | 0.5 | 0.835930 | 0.862923 | 0.808938 |
| 0.6 | 0.6 | 0.834557 | 0.802229 | 0.866885 |
| 0.7 | 0.7 | 0.817020 | 0.715493 | 0.918546 |
| 0.8 | 0.8 | 0.763113 | 0.567799 | 0.958428 |
| 0.9 | 0.9 | 0.635915 | 0.286356 | 0.985474 |
train_perf_3.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
Optimal cutoff can be 0.4 because we are trying to maximise sensitivity over accuracy.
train_cutoff_3 = 0.4
train_perf_3.loc[[train_cutoff_3]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.4 | 0.4 | 0.824864 | 0.906344 | 0.743384 |
Decesion Tree with PCA
X_train_4 = X_train_pca
dtree = DecisionTreeClassifier()
# Creating hyperparameter grid
params = {
'max_depth': range(5, 15, 5),
'min_samples_leaf': range(50, 150, 50),
'min_samples_split': range(50, 150, 50),
}
grid_search_dt = GridSearchCV(estimator = dtree, param_grid = params, scoring= 'recall', cv = 5, verbose = 1)
grid_search_dt.fit(X_train_4, y_train)
Fitting 5 folds for each of 8 candidates, totalling 40 fits
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
param_grid={'max_depth': range(5, 15, 5),
'min_samples_leaf': range(50, 150, 50),
'min_samples_split': range(50, 150, 50)},
scoring='recall', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
param_grid={'max_depth': range(5, 15, 5),
'min_samples_leaf': range(50, 150, 50),
'min_samples_split': range(50, 150, 50)},
scoring='recall', verbose=1)DecisionTreeClassifier()
DecisionTreeClassifier()
cv_results = pd.DataFrame(grid_search_dt.cv_results_)
cv_results
| mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_max_depth | param_min_samples_leaf | param_min_samples_split | params | split0_test_score | split1_test_score | split2_test_score | split3_test_score | split4_test_score | mean_test_score | std_test_score | rank_test_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.366884 | 0.024186 | 0.004141 | 0.000300 | 5 | 50 | 50 | {'max_depth': 5, 'min_samples_leaf': 50, 'min_... | 0.847332 | 0.851862 | 0.902536 | 0.828051 | 0.852879 | 0.856532 | 0.024687 | 5 |
| 1 | 0.343261 | 0.005531 | 0.004130 | 0.000283 | 5 | 50 | 100 | {'max_depth': 5, 'min_samples_leaf': 50, 'min_... | 0.847332 | 0.851862 | 0.902536 | 0.828051 | 0.852879 | 0.856532 | 0.024687 | 5 |
| 2 | 0.339829 | 0.001362 | 0.003939 | 0.000045 | 5 | 100 | 50 | {'max_depth': 5, 'min_samples_leaf': 100, 'min... | 0.841521 | 0.846844 | 0.902800 | 0.828843 | 0.853936 | 0.854789 | 0.025370 | 7 |
| 3 | 0.343904 | 0.006597 | 0.004173 | 0.000214 | 5 | 100 | 100 | {'max_depth': 5, 'min_samples_leaf': 100, 'min... | 0.841521 | 0.846844 | 0.902800 | 0.828843 | 0.853936 | 0.854789 | 0.025370 | 7 |
| 4 | 0.558816 | 0.009316 | 0.004671 | 0.000379 | 10 | 50 | 50 | {'max_depth': 10, 'min_samples_leaf': 50, 'min... | 0.889065 | 0.880116 | 0.893819 | 0.875594 | 0.892235 | 0.886166 | 0.007102 | 1 |
| 5 | 0.570881 | 0.018938 | 0.004177 | 0.000186 | 10 | 50 | 100 | {'max_depth': 10, 'min_samples_leaf': 50, 'min... | 0.889065 | 0.880116 | 0.893819 | 0.875594 | 0.892235 | 0.886166 | 0.007102 | 1 |
| 6 | 0.536892 | 0.013234 | 0.004166 | 0.000256 | 10 | 100 | 50 | {'max_depth': 10, 'min_samples_leaf': 100, 'mi... | 0.879820 | 0.856615 | 0.893291 | 0.879028 | 0.870312 | 0.875813 | 0.012089 | 3 |
| 7 | 0.525380 | 0.010356 | 0.003997 | 0.000064 | 10 | 100 | 100 | {'max_depth': 10, 'min_samples_leaf': 100, 'mi... | 0.879820 | 0.856615 | 0.893291 | 0.879028 | 0.870312 | 0.875813 | 0.012089 | 3 |
# Best sensitivity score on 0.5 cutoff
grid_search_dt.best_score_
0.8861658681359241
model_4 = grid_search_dt.best_estimator_
model_4
DecisionTreeClassifier(max_depth=10, min_samples_leaf=50, min_samples_split=50)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(max_depth=10, min_samples_leaf=50, min_samples_split=50)
roc(model_4, X_train_4, lib='sklearn')
Area under ROC curve is 0.96 which is good.
train_perf_4 = model_performance(model_4, X_train_4, lib='sklearn')
train_perf_4
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.764909 | 1.000000 | 0.529819 |
| 0.1 | 0.1 | 0.829248 | 0.994242 | 0.664254 |
| 0.2 | 0.2 | 0.862210 | 0.980561 | 0.743859 |
| 0.3 | 0.3 | 0.884000 | 0.956738 | 0.811262 |
| 0.4 | 0.4 | 0.892953 | 0.933548 | 0.852359 |
| 0.5 | 0.5 | 0.894591 | 0.911785 | 0.877397 |
| 0.6 | 0.6 | 0.892425 | 0.888648 | 0.896202 |
| 0.7 | 0.7 | 0.881279 | 0.843220 | 0.919339 |
| 0.8 | 0.8 | 0.842586 | 0.729227 | 0.955945 |
| 0.9 | 0.9 | 0.786012 | 0.598278 | 0.973747 |
train_perf_4.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
Optimal cutoff can be 0.2 because we are trying to maximise sensitivity over accuracy.
train_cutoff_4 = 0.1
train_perf_4.loc[[train_cutoff_4]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.1 | 0.1 | 0.829248 | 0.994242 | 0.664254 |
Random Forest with PCA
X_train_5 = X_train_pca
rf = RandomForestClassifier()
params = {
'max_depth': range(5,10,5),
'min_samples_leaf': range(50, 150, 50),
'min_samples_split': range(50, 150, 50),
'n_estimators': [100,200,300],
'max_features': [10, 20]
}
grid_search_rf = GridSearchCV(estimator = rf, param_grid = params, cv = 3, n_jobs = -1, verbose = 1, return_train_score=True)
# Fitting the model
grid_search_rf.fit(X_train_5, y_train)
Fitting 3 folds for each of 24 candidates, totalling 72 fits
GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,
param_grid={'max_depth': range(5, 10, 5), 'max_features': [10, 20],
'min_samples_leaf': range(50, 150, 50),
'min_samples_split': range(50, 150, 50),
'n_estimators': [100, 200, 300]},
return_train_score=True, verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,
param_grid={'max_depth': range(5, 10, 5), 'max_features': [10, 20],
'min_samples_leaf': range(50, 150, 50),
'min_samples_split': range(50, 150, 50),
'n_estimators': [100, 200, 300]},
return_train_score=True, verbose=1)RandomForestClassifier()
RandomForestClassifier()
grid_search_rf.best_score_
0.8474461538770628
model_5 = grid_search_rf.best_estimator_
model_5
RandomForestClassifier(max_depth=5, max_features=20, min_samples_leaf=50,
min_samples_split=50, n_estimators=200)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomForestClassifier(max_depth=5, max_features=20, min_samples_leaf=50,
min_samples_split=50, n_estimators=200)roc(model_5, X_train_5, lib='sklearn')
Area under ROC curve is 0.93 which is good.
train_perf_5 = model_performance(model_5, X_train_5, lib='sklearn')
train_perf_5
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.500000 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.703793 | 0.996989 | 0.410596 |
| 0.2 | 0.2 | 0.773837 | 0.982621 | 0.565052 |
| 0.3 | 0.3 | 0.819608 | 0.948444 | 0.690772 |
| 0.4 | 0.4 | 0.840790 | 0.910200 | 0.771380 |
| 0.5 | 0.5 | 0.852385 | 0.854683 | 0.850087 |
| 0.6 | 0.6 | 0.842824 | 0.787491 | 0.898156 |
| 0.7 | 0.7 | 0.823543 | 0.709947 | 0.937140 |
| 0.8 | 0.8 | 0.767683 | 0.574085 | 0.961280 |
| 0.9 | 0.9 | 0.732740 | 0.484813 | 0.980667 |
train_perf_5.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
Optimal cutoff can be 0.4 because we are trying to maximise sensitivity over accuracy.
train_cutoff_5 = 0.3
train_perf_5.loc[[train_cutoff_5]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.3 | 0.3 | 0.819608 | 0.948444 | 0.690772 |
# Feature Scaling on Test Set
df_test[num_vars] = scaler.transform(df_test[num_vars])
# Extract target variable
y_test = df_test.pop('churn')
X_test = df_test
# PCA transform
X_test_pca = X_test[num_vars].copy()
X_test_pca = pca.transform(X_test_pca)
X_test_pca = pd.DataFrame(X_test_pca, columns=['PC'+str(i) for i in range(1,26)])
X_test_pca.head()
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 | PC13 | PC14 | PC15 | PC16 | PC17 | PC18 | PC19 | PC20 | PC21 | PC22 | PC23 | PC24 | PC25 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.440440 | -0.697849 | -0.423436 | 0.050620 | -0.139523 | -0.366688 | 0.434308 | 0.390053 | -0.156415 | -0.048298 | -0.627741 | 0.107684 | 0.065399 | -0.099949 | -0.148811 | 0.040262 | -0.114201 | 0.024325 | -0.030826 | 0.245024 | 0.253547 | -0.002278 | 0.320436 | 0.072466 | -0.083523 |
| 1 | -0.263751 | -0.202711 | 0.556703 | -0.287392 | 0.072033 | -0.940119 | -0.644798 | -0.528677 | 0.341877 | 0.288094 | -0.260406 | 0.110319 | -0.287287 | -0.322323 | 0.119659 | -0.078429 | 0.264867 | 0.491032 | -0.406629 | -0.342707 | 0.202471 | 0.101099 | 0.309833 | -0.380015 | 0.139167 |
| 2 | 1.791507 | -0.277309 | -1.355858 | -0.882396 | -0.199006 | 0.045232 | -0.469308 | 0.100522 | -0.440546 | -0.055695 | -0.118625 | 0.820371 | 0.187019 | 0.151478 | -0.655997 | -0.384681 | 0.101573 | 0.154003 | 0.148422 | -0.232870 | -0.183229 | 0.176190 | -0.129028 | 0.080129 | 0.260594 |
| 3 | 0.211306 | -0.298223 | 1.121458 | -0.252266 | -0.194304 | -0.072581 | -0.453454 | -0.450398 | 0.294236 | -0.424393 | -0.103451 | -0.340706 | 0.431403 | 0.627435 | -0.051703 | 0.363340 | -0.260935 | -0.359697 | 0.214642 | 0.358296 | -0.256062 | 0.206435 | 0.117578 | -0.269853 | -0.039681 |
| 4 | -0.827084 | 1.260245 | 0.324048 | -0.827283 | -0.600716 | 0.079294 | -0.943727 | 0.005158 | 0.147385 | -0.300392 | -0.171619 | 0.105876 | -0.016424 | 0.114586 | -0.166453 | -0.072910 | 0.064564 | -0.266438 | -0.105746 | 0.001399 | 0.008426 | -0.087106 | 0.007990 | -0.022491 | 0.077490 |
test_perf_1 = model_performance(model_1, X_test[X_train_1.columns], y_test)
test_perf_1
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.034175 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.394975 | 0.975610 | 0.374430 |
| 0.2 | 0.2 | 0.524172 | 0.930314 | 0.509802 |
| 0.3 | 0.3 | 0.620862 | 0.891986 | 0.611269 |
| 0.4 | 0.4 | 0.704573 | 0.839721 | 0.699790 |
| 0.5 | 0.5 | 0.775542 | 0.787456 | 0.775120 |
| 0.6 | 0.6 | 0.840438 | 0.710801 | 0.845025 |
| 0.7 | 0.7 | 0.905096 | 0.592334 | 0.916163 |
| 0.8 | 0.8 | 0.940462 | 0.452962 | 0.957712 |
| 0.9 | 0.9 | 0.956775 | 0.198606 | 0.983603 |
test_perf_1.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
test_cutoff_1 = 0.5
test_perf_1.loc[[test_cutoff_1]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.5 | 0.5 | 0.775542 | 0.787456 | 0.77512 |
test_perf_2 = model_performance(model_2, X_test[X_train_2.columns], y_test)
test_perf_2
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.034175 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.401524 | 0.968641 | 0.381457 |
| 0.2 | 0.2 | 0.513813 | 0.916376 | 0.499568 |
| 0.3 | 0.3 | 0.611931 | 0.888502 | 0.602145 |
| 0.4 | 0.4 | 0.692189 | 0.853659 | 0.686475 |
| 0.5 | 0.5 | 0.766254 | 0.808362 | 0.764764 |
| 0.6 | 0.6 | 0.843058 | 0.721254 | 0.847368 |
| 0.7 | 0.7 | 0.906883 | 0.609756 | 0.917396 |
| 0.8 | 0.8 | 0.941415 | 0.459930 | 0.958451 |
| 0.9 | 0.9 | 0.957728 | 0.174216 | 0.985452 |
test_perf_2.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
# Choosing cutoff to maximise Sensitivity
test_cutoff_2 = 0.5
test_perf_2.loc[[test_cutoff_2]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.5 | 0.5 | 0.766254 | 0.808362 | 0.764764 |
test_perf_3 = model_performance(model_3, X_test_pca[X_train_3.columns], y_test, lib='sklearn')
test_perf_3
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.034175 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.438080 | 0.986063 | 0.418691 |
| 0.2 | 0.2 | 0.580853 | 0.958188 | 0.567501 |
| 0.3 | 0.3 | 0.675994 | 0.895470 | 0.668228 |
| 0.4 | 0.4 | 0.749345 | 0.829268 | 0.746517 |
| 0.5 | 0.5 | 0.809121 | 0.801394 | 0.809395 |
| 0.6 | 0.6 | 0.866992 | 0.763066 | 0.870669 |
| 0.7 | 0.7 | 0.911646 | 0.682927 | 0.919739 |
| 0.8 | 0.8 | 0.946654 | 0.547038 | 0.960794 |
| 0.9 | 0.9 | 0.962134 | 0.247387 | 0.987424 |
test_perf_3.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
test_cutoff_3 = 0.5
test_perf_3.loc[[test_cutoff_3]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.5 | 0.5 | 0.809121 | 0.801394 | 0.809395 |
test_perf_4 = model_performance(model_4, X_test_pca[X_train_4.columns], y_test, lib='sklearn')
test_perf_4
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.532627 | 0.926829 | 0.518678 |
| 0.1 | 0.1 | 0.652774 | 0.867596 | 0.645173 |
| 0.2 | 0.2 | 0.725292 | 0.832753 | 0.721489 |
| 0.3 | 0.3 | 0.790307 | 0.763066 | 0.791271 |
| 0.4 | 0.4 | 0.828650 | 0.714286 | 0.832696 |
| 0.5 | 0.5 | 0.851869 | 0.675958 | 0.858094 |
| 0.6 | 0.6 | 0.870326 | 0.665505 | 0.877574 |
| 0.7 | 0.7 | 0.887950 | 0.609756 | 0.897793 |
| 0.8 | 0.8 | 0.924506 | 0.505226 | 0.939342 |
| 0.9 | 0.9 | 0.944034 | 0.435540 | 0.962027 |
test_perf_4.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
test_cutoff_4 = 0.3
test_perf_4.loc[[test_cutoff_4]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.3 | 0.3 | 0.790307 | 0.763066 | 0.791271 |
test_perf_5 = model_performance(model_5, X_test_pca[X_train_5.columns], y_test, lib='sklearn')
test_perf_5
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.0 | 0.0 | 0.034175 | 1.000000 | 0.000000 |
| 0.1 | 0.1 | 0.425101 | 0.982578 | 0.405375 |
| 0.2 | 0.2 | 0.572160 | 0.947735 | 0.558871 |
| 0.3 | 0.3 | 0.689926 | 0.874564 | 0.683393 |
| 0.4 | 0.4 | 0.772089 | 0.832753 | 0.769942 |
| 0.5 | 0.5 | 0.845797 | 0.766551 | 0.848601 |
| 0.6 | 0.6 | 0.887950 | 0.689895 | 0.894957 |
| 0.7 | 0.7 | 0.921529 | 0.571429 | 0.933917 |
| 0.8 | 0.8 | 0.941534 | 0.491289 | 0.957465 |
| 0.9 | 0.9 | 0.959157 | 0.407666 | 0.978671 |
test_perf_5.plot(x='probability_cutoff', y=['accuracy','sensitivity','specificity'], figsize=(10,8))
plt.show()
test_cutoff_5 = 0.4
test_perf_5.loc[[test_cutoff_5]]
| probability_cutoff | accuracy | sensitivity | specificity | |
|---|---|---|---|---|
| 0.4 | 0.4 | 0.772089 | 0.832753 | 0.769942 |
model_names = ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5']
models = [model_1, model_2, model_3, model_4, model_5]
libs = ['sm', 'sm', 'sklearn', 'sklearn', 'sklearn']
used_pca = ['No', 'No', 'Yes', 'Yes', 'Yes']
train_perfs = [train_perf_1, train_perf_2, train_perf_3, train_perf_4, train_perf_5]
test_perfs = [test_perf_1, test_perf_2, test_perf_3, test_perf_4, test_perf_5]
train_cutoffs = [train_cutoff_1, train_cutoff_2, train_cutoff_3, train_cutoff_4, train_cutoff_5]
test_cutoffs = [test_cutoff_1, test_cutoff_2, test_cutoff_3, test_cutoff_4, test_cutoff_5]
num_predictors = []
train_accuracies = []
test_accuracies = []
train_sensitivities = []
test_sensitivities = []
for model, lib, train_perf, test_perf, train_cutoff, test_cutoff in zip(models, libs, train_perfs, test_perfs, train_cutoffs, test_cutoffs):
p = 0
if lib == 'sm':
p = len(model.params) - 1 # Subtracted 1 to not count intercept
else:
p = 25
train_accuracy = train_perf.loc[train_cutoff, 'accuracy']
test_accuracy = test_perf.loc[test_cutoff, 'accuracy']
train_sensitivity = train_perf.loc[train_cutoff, 'sensitivity']
test_sensitivity = test_perf.loc[test_cutoff, 'sensitivity']
num_predictors.append(p)
train_accuracies.append(train_accuracy)
test_accuracies.append(test_accuracy)
train_sensitivities.append(train_sensitivity)
test_sensitivities.append(test_sensitivity)
result_df = pd.DataFrame({'Model': model_names, 'Used PCA': used_pca, 'No. of Predictors': num_predictors, 'Accuracy (Train)': train_accuracies, 'Accuracy (Test)': test_accuracies, 'Sensitivity (Train)': train_sensitivities, 'Sesitivity (Test)': test_sensitivities})
result_df
| Model | Used PCA | No. of Predictors | Accuracy (Train) | Accuracy (Test) | Sensitivity (Train) | Sesitivity (Test) | |
|---|---|---|---|---|---|---|---|
| 0 | Model 1 | No | 8 | 0.794702 | 0.775542 | 0.894195 | 0.787456 |
| 1 | Model 2 | No | 6 | 0.798030 | 0.766254 | 0.907718 | 0.808362 |
| 2 | Model 3 | Yes | 25 | 0.824864 | 0.809121 | 0.906344 | 0.801394 |
| 3 | Model 4 | Yes | 25 | 0.829248 | 0.790307 | 0.994242 | 0.763066 |
| 4 | Model 5 | Yes | 25 | 0.819608 | 0.772089 | 0.948444 | 0.832753 |
Assessment: .
# Final Models
final_model_1 = model_2
final_model_2 = model_3
final_model_1.summary()
| Dep. Variable: | churn | No. Observations: | 37862 |
|---|---|---|---|
| Model: | GLM | Df Residuals: | 37855 |
| Model Family: | Binomial | Df Model: | 6 |
| Link Function: | Logit | Scale: | 1.0000 |
| Method: | IRLS | Log-Likelihood: | -16313. |
| Date: | Thu, 08 Dec 2022 | Deviance: | 32626. |
| Time: | 22:27:21 | Pearson chi2: | 5.92e+04 |
| No. Iterations: | 7 | Pseudo R-squ. (CS): | 0.4082 |
| Covariance Type: | nonrobust |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4722 | 0.026 | 17.950 | 0.000 | 0.421 | 0.524 |
| std_og_t2m_mou_good | 2.4009 | 0.065 | 37.161 | 0.000 | 2.274 | 2.528 |
| std_og_t2m_mou_action | -2.1431 | 0.064 | -33.306 | 0.000 | -2.269 | -2.017 |
| loc_ic_t2t_mou_action | -2.1579 | 0.099 | -21.753 | 0.000 | -2.352 | -1.964 |
| std_og_t2t_mou_good | 1.4393 | 0.050 | 28.955 | 0.000 | 1.342 | 1.537 |
| loc_ic_t2m_mou_action | -5.7799 | 0.133 | -43.428 | 0.000 | -6.041 | -5.519 |
| onnet_mou_action | -0.8666 | 0.060 | -14.437 | 0.000 | -0.984 | -0.749 |
| Variables | Coefficients |
|---|---|
| onnet_mou_action | -0.8666 |
| loc_ic_t2t_mou_action | -2.1579 |
| std_og_t2t_mou_good | 1.4393 |
| loc_ic_t2m_mou_action | -5.7799 |
| std_og_t2m_mou_good | 2.4009 |
| std_og_t2m_mou_action | -2.1431 |
Predictor with highest negative coefficient is loc_ic_t2m_mou_action. It means that higher the local incoming call minutes from other telecoms in the action phase (month August), the customer is less likely to churn.
Predictor with highest positive coefficient is std_og_t2m_mou_good. It means that higher the STD outgoing call minutes to the other telecoms in good phase (months June & July), the customer is more likely to churn.
num_uni_box_seg_analysis('onnet_mou_action', 'Same network minutes of usage in action phase')
plt.show()
num_uni_box_seg_analysis('loc_ic_t2t_mou_action', 'Local incoming from same network minutes of usage in action phase')
plt.show()
num_uni_box_seg_analysis('std_og_t2t_mou_good', 'STD outgoing to same network minutes of usage in good phase')
plt.show()
num_uni_box_seg_analysis('loc_ic_t2m_mou_action', 'Local incoming from other network minutes of usage in action phase')
plt.show()
num_uni_box_seg_analysis('std_og_t2m_mou_good', 'STD outgoing to other network minutes of usage in good phase')
plt.show()
num_uni_box_seg_analysis('std_og_t2m_mou_action', 'STD outgoing to other network minutes of usage in action phase')
plt.show()
Recommendations:
total minutes of usage decreases from good phase (months 6 & 7) to action phase phase (month 8).total minutes of usage is very low in the action phase (month 8).ARPU decreases from good phase (months 6 & 7) to action phase phase (month 8).recharge amount decreases from good phase (months 6 & 7) to action phase phase (month 8).number of recharges decreases from good phase (months 6 & 7) to action phase phase (month 8).