gilangw commited on
Commit
b30fead
·
1 Parent(s): 92584f3

Upload 14 files

Browse files
adaboost_logreg_10_features.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ee7277a0e7dd0c1cecabad45bc134c4835123ce2a0f4263628efda382de489
3
+ size 4000
home.py CHANGED
@@ -2,10 +2,10 @@ import streamlit as st
2
 
3
  def run():
4
  st.write('## Our Team :')
5
- st.write('##### :adult: [Gilang Wiradhyaksa](https://github.com/gilangwd)')
6
- st.write('##### :adult: [Stephanus Adinata Susanto](https://github.com/StephanusAdinata)')
7
- st.write('##### :adult: [Samuel Christian Soendjojo](https://github.com/samchriss94)')
8
- st.write('##### :adult: [Joshua Osaze Kurniawan](https://github.com/JoshuaOsazeKurniawan)')
9
 
10
  st.write('## Background :')
11
  st.markdown('''
 
2
 
3
  def run():
4
  st.write('## Our Team :')
5
+ st.write('##### :adult: [Gilang Wiradhyaksa](https://www.linkedin.com/in/gilangwiradhyaksa/) | [GitHub](https://github.com/gilangwd)')
6
+ st.write('##### :adult: [Stephanus Adinata Susanto](https://www.linkedin.com/in/stephanus-adinata-susanto-1b115b170/) | [GitHub](https://github.com/StephanusAdinata)')
7
+ st.write('##### :adult: [Samuel Christian Soendjojo](https://www.linkedin.com/in/samchriss94/) | [GitHub](https://github.com/samchriss94)')
8
+ st.write('##### :adult: [Joshua Osaze Kurniawan](https://www.linkedin.com/in/joshua-osaze-kurniawan-45560228a/) | [GitHub](https://github.com/JoshuaOsazeKurniawan)')
9
 
10
  st.write('## Background :')
11
  st.markdown('''
prediction.py CHANGED
@@ -3,11 +3,14 @@ import pandas as pd
3
  import numpy as np
4
  import pickle
5
  import os
 
 
6
  import json
7
 
8
  def run():
9
  # Load Model Classification
10
  with open('adaboost_logreg_best.pkl', 'rb') as file_1:
 
11
  classification_model = pickle.load(file_1)
12
 
13
  # Load Model Clustering
@@ -32,108 +35,129 @@ def run():
32
 
33
  # Create Function for Prediction
34
  def predictData(df):
35
- # Classification prediction
36
- y_pred_uploaded = classification_model.predict(df)
37
- df['churn'] = y_pred_uploaded
38
- # st.dataframe(df)
39
 
40
- # Filter the DataFrame for Predicted Churn (1)
41
- df_churn = df[df['churn'] == 1]
42
-
43
- churnCustomer = len(df_churn)
44
-
45
- if churnCustomer == 0:
46
- st.write('## There is no Customer predicted as Churn from this Data!')
47
  else:
48
- # Clustering prediction for Predicted Churn (1)
49
- ## Split Numerical and Categorical for K-Prototype
50
- data_cluster_num = df_churn[num_col]
51
- data_cluster_cat = df_churn[cat_col]
52
-
53
- ## Scale Numerical column
54
- num_scaled = scaler.transform(data_cluster_num)
55
-
56
- ## Merge Scaled Numerical + Categorical
57
- data_cluster_final = np.concatenate([num_scaled, data_cluster_cat], axis=1)
58
- data_cluster_final = pd.DataFrame(data_cluster_final, columns=['tenure', 'monthly_charges'] + cat_col)
59
- data_cluster_final = data_cluster_final.infer_objects()
60
-
61
- ## Mark Categorical Column
62
- index_cat_columns = [data_cluster_final.columns.get_loc(col) for col in cat_col]
63
-
64
- ## Predict Cluster
65
- y_cluster = clustering_model.predict(data_cluster_final, categorical=index_cat_columns)
66
- # y_cluster = []
67
- #for rd in range(0, len(df_churn)): y_cluster.append(random.randint(0, 2)) # Random Generator for testing
68
- df_churn['cluster'] = y_cluster
69
-
70
- # Split Data into 3 Cluster DataFrames
71
- df_cluster_0 = df_churn[df_churn['cluster'] == 0]
72
- df_cluster_1 = df_churn[df_churn['cluster'] == 1]
73
- df_cluster_2 = df_churn[df_churn['cluster'] == 2]
74
-
75
- st.write(f'## Result : `{churnCustomer} customer` are predicted as churn!')
76
- st.write('##### Here are some suggestion to minimalize churn potential for each customer depend on their cluster')
77
- c0, c1, c2 = '', '', ''
78
- for x in df_cluster_0['name']: c0 += str(x) + ', '
79
- for y in df_cluster_1['name']: c1 += str(y) + ', '
80
- for z in df_cluster_2['name']: c2 += str(z) + ', '
81
-
82
- cluster_0 = '''
83
- - Most of them are senior citizen
84
- - Having partner and dependents
85
- - High monthly charges
86
- '''
87
-
88
- suggestion_0 = '''
89
- - Offers packages with additional speed for 3 months for those who have subscribed for more than 3 years
90
- - Open all TV channels during big holiday events such as Eid, Christmas and others
91
- - Provide special offers to increase internet speed to them
92
- '''
93
-
94
- cluster_1 = '''
95
- - Mix of senior citizan and youngster
96
- - Having partner and dependents
97
- - Low monthly charges
98
- '''
99
-
100
- suggestion_1 = '''
101
- - Provides offers with many benefits if they subscribe for the long term
102
- - Offers annual DSL internet packages at affordable prices
103
- '''
104
-
105
- cluster_2 = '''
106
- - Most of them are young people
107
- - Most of them have no partner and dependents
108
- - Moderate monthly charges
109
- '''
110
-
111
- suggestion_2 = '''
112
- Providing special packages with the following criteria:
113
- - High speed internet but lower bandwidth at a cheaper price than normal packages
114
- - Low speed internet but large bandwidth so the connection is much more stable at a cheaper price than normal packages
115
- '''
116
-
117
- if c0 != '':
118
- st.write('##### Cluster 1')
119
- st.write(cluster_0)
120
- st.write('Suggestion for `', c0[0:-2], '` is')
121
- st.write(suggestion_0)
122
- st.markdown('---')
123
-
124
- if c1 != '':
125
- st.write('##### Cluster 2')
126
- st.write(cluster_1)
127
- st.write('Suggestion for `', c1[0:-2], '` is')
128
- st.write(suggestion_1)
129
- st.markdown('---')
130
 
131
- if c2 != '':
132
- st.write('##### Cluster 3')
133
- st.write(cluster_2)
134
- st.write('Suggestion for `', c2[0:-2], '` is')
135
- st.write(suggestion_2)
136
- st.markdown('---')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  def tenureMonthToYear():
139
  year = st.session_state.tenurem % 12
@@ -155,13 +179,22 @@ def run():
155
 
156
  # A. For CSV
157
  if inputType == "Upload Excel or CSV File":
 
158
  with open('telco_data_test.xlsx', 'rb') as file:
159
- st.download_button(
160
- label='💾 Download Template Excel',
161
  data=file,
162
  file_name='telco_example.xlsx',
163
  mime='application/vnd.ms-excel'
164
  )
 
 
 
 
 
 
 
 
165
 
166
  uploaded_file = st.file_uploader("Choose Excel or CSV file", type=["csv", "xlsx"], accept_multiple_files=False)
167
  if uploaded_file is not None:
@@ -177,7 +210,7 @@ def run():
177
  predictData(df)
178
  # B. For Manual
179
  else:
180
- # Create Form
181
  # with st.form(key='Form Parameters'):
182
  name = st.text_input('Name', value='', help='Customer Name')
183
 
 
3
  import numpy as np
4
  import pickle
5
  import os
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
  import json
9
 
10
  def run():
11
  # Load Model Classification
12
  with open('adaboost_logreg_best.pkl', 'rb') as file_1:
13
+ # with open('adaboost_logreg_10_features.pkl', 'rb') as file_1:
14
  classification_model = pickle.load(file_1)
15
 
16
  # Load Model Clustering
 
35
 
36
  # Create Function for Prediction
37
  def predictData(df):
38
+ totalCustomer = len(df)
 
 
 
39
 
40
+ if totalCustomer < 1:
41
+ st.write('## There is no Customer on this data, please check again.')
 
 
 
 
 
42
  else:
43
+ # Classification prediction
44
+ y_pred_uploaded = classification_model.predict(df)
45
+ df['churn'] = y_pred_uploaded
46
+ # st.dataframe(df)
47
+
48
+ # Filter the DataFrame for Predicted Churn (1)
49
+ df_churn = df[df['churn'] == 1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ churnCustomer = len(df_churn)
52
+
53
+ if churnCustomer == 0:
54
+ st.write('## There is no Customer predicted as Churn from this Data!')
55
+ else:
56
+ # Clustering prediction for Predicted Churn (1)
57
+ ## Split Numerical and Categorical for K-Prototype
58
+ data_cluster_num = df_churn[num_col]
59
+ data_cluster_cat = df_churn[cat_col]
60
+
61
+ ## Scale Numerical column
62
+ num_scaled = scaler.transform(data_cluster_num)
63
+
64
+ ## Merge Scaled Numerical + Categorical
65
+ data_cluster_final = np.concatenate([num_scaled, data_cluster_cat], axis=1)
66
+ data_cluster_final = pd.DataFrame(data_cluster_final, columns=['tenure', 'monthly_charges'] + cat_col)
67
+ data_cluster_final = data_cluster_final.infer_objects()
68
+
69
+ ## Mark Categorical Column
70
+ index_cat_columns = [data_cluster_final.columns.get_loc(col) for col in cat_col]
71
+
72
+ ## Predict Cluster
73
+ y_cluster = clustering_model.predict(data_cluster_final, categorical=index_cat_columns)
74
+ # y_cluster = []
75
+ #for rd in range(0, len(df_churn)): y_cluster.append(random.randint(0, 2)) # Random Generator for testing
76
+ df_churn['cluster'] = y_cluster
77
+
78
+ # Split Data into 3 Cluster DataFrames
79
+ df_cluster_0 = df_churn[df_churn['cluster'] == 0]
80
+ df_cluster_1 = df_churn[df_churn['cluster'] == 1]
81
+ df_cluster_2 = df_churn[df_churn['cluster'] == 2]
82
+
83
+ st.write(f'## Result : `{churnCustomer} customer` from total {totalCustomer} customer ({int((churnCustomer/totalCustomer)*100)}%) are predicted as churn!')
84
+ st.write('##### Here are some suggestion to minimalize churn potential for each customer depend on their cluster')
85
+ c0, c1, c2 = '', '', ''
86
+ for x in df_cluster_0['name']: c0 += str(x) + ', '
87
+ for y in df_cluster_1['name']: c1 += str(y) + ', '
88
+ for z in df_cluster_2['name']: c2 += str(z) + ', '
89
+
90
+ cluster_0 = '''
91
+ - Most of them are senior citizen
92
+ - Having partner and dependents
93
+ - High monthly charges
94
+ '''
95
+
96
+ suggestion_0 = '''
97
+ - Offers packages with additional speed for 3 months for those who have subscribed for more than 3 years
98
+ - Open all TV channels during big holiday events such as Eid, Christmas and others
99
+ - Provide special offers to increase internet speed to them
100
+ '''
101
+
102
+ cluster_1 = '''
103
+ - Mix of senior citizan and youngster
104
+ - Having partner and dependents
105
+ - Low monthly charges
106
+ '''
107
+
108
+ suggestion_1 = '''
109
+ - Provides offers with many benefits if they subscribe for the long term
110
+ - Offers annual DSL internet packages at affordable prices
111
+ '''
112
+
113
+ cluster_2 = '''
114
+ - Most of them are young people
115
+ - Most of them have no partner and dependents
116
+ - Moderate monthly charges
117
+ '''
118
+
119
+ suggestion_2 = '''
120
+ Providing special packages with the following criteria:
121
+ - High speed internet but lower bandwidth at a cheaper price than normal packages
122
+ - Low speed internet but large bandwidth so the connection is much more stable at a cheaper price than normal packages
123
+ '''
124
+
125
+ if c0 != '':
126
+ st.write(f'##### Cluster 1 - Elder Group - {len(df_cluster_0)} customer ({((len(df_cluster_0)/churnCustomer)*100):.1f}%)')
127
+ st.write(cluster_0)
128
+ st.write('Suggestion for `', c0[0:-2], '` is')
129
+ st.write(suggestion_0)
130
+ st.markdown('---')
131
+
132
+ if c1 != '':
133
+ st.write(f'##### Cluster 2 - Mixuage - {len(df_cluster_1)} customer ({((len(df_cluster_1)/churnCustomer)*100):.1f}%)')
134
+ st.write(cluster_1)
135
+ st.write('Suggestion for `', c1[0:-2], '` is')
136
+ st.write(suggestion_1)
137
+ st.markdown('---')
138
+
139
+ if c2 != '':
140
+ st.write(f'##### Cluster 3 - Young Blood - {len(df_cluster_2)} customer ({((len(df_cluster_2)/churnCustomer)*100):.1f}%)')
141
+ st.write(cluster_2)
142
+ st.write('Suggestion for `', c2[0:-2], '` is')
143
+ st.write(suggestion_2)
144
+ st.markdown('---')
145
+
146
+ # Create Bar Plot for Analyze Cluster
147
+ num_agg_df = df_churn.groupby(['cluster']).agg({'tenure': 'mean', 'monthly_charges': 'mean'})
148
+ num_agg_df = np.round(num_agg_df, decimals=2)
149
+ fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(9, 5))
150
+
151
+ # Loop through each subplot to populate it
152
+ for ax, column in zip(axes, num_agg_df.columns):
153
+ sns.barplot(ax=ax, data=num_agg_df, x=num_agg_df.index, y=column, orient='v')
154
+ ax.set_title(f'Average {column} by Cluster')
155
+ ax.set_xlabel('Cluster')
156
+ ax.set_ylabel(f'Average {column}')
157
+ ax.bar_label(ax.containers[0])
158
+
159
+ plt.tight_layout()
160
+ st.pyplot(fig)
161
 
162
  def tenureMonthToYear():
163
  year = st.session_state.tenurem % 12
 
179
 
180
  # A. For CSV
181
  if inputType == "Upload Excel or CSV File":
182
+ dl_1, dl_2, dl_3 = st.columns([3, 3, 3])
183
  with open('telco_data_test.xlsx', 'rb') as file:
184
+ dl_1.download_button(
185
+ label='💾 Download Data Example',
186
  data=file,
187
  file_name='telco_example.xlsx',
188
  mime='application/vnd.ms-excel'
189
  )
190
+
191
+ with open('telco_data_template.xlsx', 'rb') as file:
192
+ dl_2.download_button(
193
+ label='💾 Download Template Excel',
194
+ data=file,
195
+ file_name='telco_template.xlsx',
196
+ mime='application/vnd.ms-excel'
197
+ )
198
 
199
  uploaded_file = st.file_uploader("Choose Excel or CSV file", type=["csv", "xlsx"], accept_multiple_files=False)
200
  if uploaded_file is not None:
 
210
  predictData(df)
211
  # B. For Manual
212
  else:
213
+ # Create Form
214
  # with st.form(key='Form Parameters'):
215
  name = st.text_input('Name', value='', help='Customer Name')
216
 
telco_data_template.xlsx ADDED
Binary file (8.89 kB). View file