File size: 15,049 Bytes
b000592
 
 
 
8d84223
b30fead
 
b000592
 
 
8d84223
b000592
b30fead
b000592
 
8d84223
 
 
b000592
8d84223
 
 
b000592
8d84223
 
 
b000592
8d84223
 
 
b000592
8d84223
 
d6b6b64
8d84223
 
 
b30fead
8d84223
b30fead
 
8d84223
b30fead
 
 
 
 
 
 
b000592
b30fead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d09ce4f
 
 
 
 
 
 
 
b30fead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384467e
 
 
 
 
b30fead
384467e
 
 
b30fead
 
 
 
 
 
 
 
 
 
 
384467e
b30fead
 
 
 
 
 
 
 
384467e
 
 
 
 
b30fead
384467e
734fbbb
b30fead
 
 
 
 
 
 
 
 
 
384467e
b30fead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d09ce4f
b30fead
 
d09ce4f
 
 
 
 
 
 
 
 
8d84223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b30fead
d6b6b64
b30fead
 
d6b6b64
 
 
 
b30fead
 
 
 
 
 
 
 
d6b6b64
 
8d84223
 
 
 
 
 
 
 
 
 
 
 
b000592
b30fead
8d84223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6b6b64
8d84223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6b6b64
8d84223
 
 
 
 
 
 
 
d6b6b64
8d84223
 
 
 
 
 
 
 
 
 
 
 
 
d6b6b64
8d84223
 
 
 
 
 
 
 
b000592
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import os
import matplotlib.pyplot as plt
import seaborn as sns
import json

def run():
    # Load Model Classification
    with open('adaboost_logreg_best.pkl', 'rb') as file_1:
    # with open('adaboost_logreg_10_features.pkl', 'rb') as file_1:
        classification_model = pickle.load(file_1)
    
    # Load Model Clustering
    with open('kp.pkl','rb') as file_2:
        clustering_model = pickle.load(file_2)

    # Load Clustering Scaler
    with open('scaler.pkl','rb') as file_3:
        scaler = pickle.load(file_3)
    
    # Load Clustering Numerical
    with open('num_col.txt','rb') as file_4:
        num_col = pickle.load(file_4)

    # Load Clustering Categorical
    with open('cat_col.txt','rb') as file_5:
        cat_col = pickle.load(file_5)

    # Choice of input: Upload or Manual Input
    inputType = st.selectbox("How would you like to input data ?", ["Upload Excel or CSV File", "Manual Input"])
    st.markdown('---')

    # Create Function for Prediction
    def predictData(df):
        totalCustomer = len(df)

        if totalCustomer < 1:
            st.write('## There is no Customer on this data, please check again.')
        else:
            # Classification prediction
            y_pred_uploaded = classification_model.predict(df)
            df['churn'] = y_pred_uploaded
            # st.dataframe(df)

            # Filter the DataFrame for Predicted Churn (1) 
            df_churn = df[df['churn'] == 1]
            
            churnCustomer = len(df_churn)

            if churnCustomer == 0:
                st.write('## There is no Customer predicted as Churn from this Data!')
            else:
                # Clustering prediction for Predicted Churn (1)
                ## Split Numerical and Categorical for K-Prototype
                data_cluster_num = df_churn[num_col]
                data_cluster_cat = df_churn[cat_col]

                ## Scale Numerical column
                num_scaled = scaler.transform(data_cluster_num)

                ## Merge Scaled Numerical + Categorical
                data_cluster_final = np.concatenate([num_scaled, data_cluster_cat], axis=1)
                data_cluster_final = pd.DataFrame(data_cluster_final, columns=['tenure', 'monthly_charges'] + cat_col)
                data_cluster_final = data_cluster_final.infer_objects()

                ## Mark Categorical Column
                index_cat_columns = [data_cluster_final.columns.get_loc(col) for col in cat_col] 

                ## Predict Cluster
                y_cluster = clustering_model.predict(data_cluster_final, categorical=index_cat_columns)
                # y_cluster = []
                #for rd in range(0, len(df_churn)): y_cluster.append(random.randint(0, 2)) # Random Generator for testing
                df_churn['cluster'] = y_cluster

                temp_cols = df_churn.columns.tolist()
                new_cols = temp_cols[0:1] + temp_cols[-2:] + temp_cols[1:-2]
                df_churn = df_churn[new_cols]
                df_churn = df_churn.sort_values(by=['cluster'], ascending=True)

                # Saving Result to Excel
                df_churn.to_excel('model_result.xlsx', index=False)
                
                # Split Data into 3 Cluster DataFrames
                df_cluster_0 = df_churn[df_churn['cluster'] == 0]
                df_cluster_1 = df_churn[df_churn['cluster'] == 1]
                df_cluster_2 = df_churn[df_churn['cluster'] == 2]

                st.write(f'## Result : `{churnCustomer} customer` from total {totalCustomer} customer ({int((churnCustomer/totalCustomer)*100)}%) are predicted as churn!')
                st.write('##### Here are some suggestion to minimalize churn potential for each customer depend on their cluster')
                c0, c1, c2 = '', '', ''
                for x in df_cluster_0['name']: c0 += str(x) + ', '
                for y in df_cluster_1['name']: c1 += str(y) + ', '
                for z in df_cluster_2['name']: c2 += str(z) + ', '
                
                cluster_0 = '''
                    - Most of them are senior citizen
                    - Having partner and dependents
                    - High monthly charges
                '''

                # suggestion_0 = '''
                #     - Offers packages with additional speed for 3 months for those who have subscribed for more than 3 years
                #     - Open all TV channels during big holiday events such as Eid, Christmas and others
                #     - Provide special offers to increase internet speed to them
                # '''
                suggestion_0 = '''
                    - Offers long term packages
                    - Give limited time offer
                    - Maintain good communication with this customer
                '''

                cluster_1 = '''
                    - Mix of senior citizan and youngster
                    - Having partner and dependents
                    - Low monthly charges
                '''

                suggestion_1 = '''
                    - Provides offers with many benefits if they subscribe for the long term
                    - Offers annual DSL internet packages at affordable prices
                    - New customer onboarding and orientation
                '''

                cluster_2 = '''
                    - Most of them are young people
                    - Most of them have no partner and dependents
                    - Moderate monthly charges
                '''

                # suggestion_2 = '''
                #     Providing special packages with the following criteria:
                #     - High speed internet but lower bandwidth at a cheaper price than normal packages
                #     - Low speed internet but large bandwidth so the connection is much more stable at a cheaper price than normal packages
                # '''
                suggestion_2 = '''
                    - Make an affordable internet package prices for this cluster
                    - Provides variation in Payment Method
                '''

                if c0 != '':
                    st.write(f'##### Cluster 1 - Elder Group - {len(df_cluster_0)} customer ({((len(df_cluster_0)/churnCustomer)*100):.1f}%)')
                    st.write(cluster_0)
                    st.write('Suggestion for `', c0[0:-2], '` is')
                    st.write(suggestion_0)
                    st.markdown('---')
                
                if c1 != '':
                    st.write(f'##### Cluster 2 - Mixage - {len(df_cluster_1)} customer ({((len(df_cluster_1)/churnCustomer)*100):.1f}%)')
                    st.write(cluster_1)
                    st.write('Suggestion for `', c1[0:-2], '` is')
                    st.write(suggestion_1)
                    st.markdown('---')
                
                if c2 != '':
                    st.write(f'##### Cluster 3 - Young Blood - {len(df_cluster_2)} customer ({((len(df_cluster_2)/churnCustomer)*100):.1f}%)')
                    st.write(cluster_2)
                    st.write('Suggestion for `', c2[0:-2], '` is')
                    st.write(suggestion_2)
                    st.markdown('---')
                
                # Create Bar Plot for Analyze Cluster
                num_agg_df = df_churn.groupby(['cluster']).agg({'tenure': 'mean', 'monthly_charges': 'mean'})
                num_agg_df = np.round(num_agg_df, decimals=2)
                fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(9, 5))

                # Loop through each subplot to populate it
                for ax, column in zip(axes, num_agg_df.columns):
                    sns.barplot(ax=ax, data=num_agg_df, x=num_agg_df.index, y=column, orient='v')
                    ax.set_title(f'Average {column} by Cluster')
                    ax.set_xlabel('Cluster')
                    ax.set_ylabel(f'Average {column}')
                    ax.bar_label(ax.containers[0])
                
                # plt.style.use('dark_background')
                plt.tight_layout()
                st.pyplot(fig)
                # plt.style.use('default')

                with open('model_result.xlsx', 'rb') as file:
                    st.download_button(
                        label='💾 Download Prediction Result',
                        data=file,
                        file_name='model_result.xlsx',
                        mime='application/vnd.ms-excel'
                )
    
    def tenureMonthToYear():
        year = st.session_state.tenurem % 12
        if year == 0:
            st.session_state.tenurey = int((st.session_state.tenurem / 12))
        else:
            st.session_state.tenurey = int((st.session_state.tenurem / 12) + 1)
    
    def calculateChargesAndCategory():
        st.session_state.tcharges = int((st.session_state.mcharges * st.session_state.tenurem))
        if st.session_state.mcharges <= 30:
            st.session_state.catcharges = 'Low Expense'
        elif st.session_state.mcharges <= 60:
            st.session_state.catcharges = 'Medium Expense'
        elif st.session_state.mcharges <= 90:
            st.session_state.catcharges = 'Medium High Expense'
        else:
            st.session_state.catcharges = 'High Expense'

    # A. For CSV
    if inputType == "Upload Excel or CSV File":
        dl_1, dl_2, dl_3 = st.columns([3, 3, 3])
        with open('telco_data_test.xlsx', 'rb') as file:
            dl_1.download_button(
                label='💾 Download Data Example',
                data=file,
                file_name='telco_example.xlsx',
                mime='application/vnd.ms-excel'
            )
        
        with open('telco_data_template.xlsx', 'rb') as file:
            dl_2.download_button(
                label='💾 Download Template Excel',
                data=file,
                file_name='telco_template.xlsx',
                mime='application/vnd.ms-excel'
            )

        uploaded_file = st.file_uploader("Choose Excel or CSV file", type=["csv", "xlsx"], accept_multiple_files=False)
        if uploaded_file is not None:
            split_file_name = os.path.splitext(uploaded_file.name)
            # file_name = split_file_name[0]
            file_extension = split_file_name[1]

            if file_extension == '.csv':
                df = pd.read_csv(uploaded_file)
            else:    
                df = pd.read_excel(uploaded_file)
            # st.dataframe(df.head())
            predictData(df)
    # B. For Manual        
    else:
        # Create Form
        # with st.form(key='Form Parameters'):
        name = st.text_input('Name', value='', help='Customer Name')

        col_left, col_mid, col_right = st.columns([3, 2, 2])
        gender =  col_left.selectbox('Gender', ('Male', 'Female'), index=0)
        with col_mid:
            tenure =  st.number_input('Tenure (Month)', min_value=1, max_value=999, step=1, help='Month', key='tenurem', on_change=tenureMonthToYear)
        with col_right:
            tenure_year = st.number_input('Tenure (Year)', min_value=1, max_value=999, step=1, disabled=True, key='tenurey')
        
        col1, col2, col3 = st.columns([1, 1, 1])
        senior_citizen = col1.radio(label='Senior Citizen?', options=['Yes', 'No'], help='Choose \'Yes\' for 61 years old above')
        partner = col2.radio(label='Having a partner?', options=['Yes', 'No'])
        dependents = col3.radio(label='Having a dependents?', options=['Yes', 'No'], help='For example : children')
        
        # col4, col5 = st.columns([1, 1])
        # internet_service =  col4.selectbox('Internet Service', ('DSL', 'Fiber optic', 'No'), index=0)

        col4, col5, col6 = st.columns([1, 1, 1])
        internet_service =  col4.radio(label='Subs for Internet service?', options=['DSL', 'Fiber optic', 'No'])
        phone_service = col5.radio(label='Subs for Phone service?', options=['Yes', 'No'])
        multiple_lines = col6.radio(label='Subs for Multiple Lines?', options=['Yes', 'No', 'No Phone Services'])

        col7, col8, col9 = st.columns([1, 1, 1])
        online_security = col7.radio(label='Subs for Online Security?', options=['Yes', 'No', 'No Internet Services'])
        online_backup = col8.radio(label='Subs for Online Backup?', options=['Yes', 'No', 'No Internet Services'])
        device_protection = col9.radio(label='Having Device Protections?', options=['Yes', 'No', 'No Internet Services'])
        tech_support = col7.radio(label='Having Tech Support service?', options=['Yes', 'No', 'No Internet Services'])
        streaming_tv = col8.radio(label='Subs for TV Streaming?', options=['Yes', 'No', 'No Internet Services'])
        streaming_movies = col9.radio(label='Subs for Movie Streaming?', options=['Yes', 'No', 'No Internet Services'])

        col_pm1, col_pm2, col_pm3 = st.columns([3, 3, 2])
        contract = col_pm1.selectbox('Contract', ('Month-to-month', 'One year', 'Two year'), index=0)
        payment_method = col_pm2.selectbox('Payment Method', ('Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'), index=0)
        paperless_billing = col_pm3.selectbox('Paperless billing?', ('Yes', 'No'), index=0)

        col_charges1, col_charges2, col_charges3 = st.columns([1, 1, 2])
        monthly_charges =  col_charges1.number_input('Monthly Charges', min_value=1, max_value=999, step=1, help='Amount to paid per month', key='mcharges', on_change=calculateChargesAndCategory)
        total_charges = col_charges2.number_input('Total Charges', min_value=1, max_value=999999, step=1, disabled=True, key='tcharges')
        charges_cat = col_charges3.text_input('Charges Category', disabled=True, key='catcharges')

        # st.button('Predict', on_click=predict)
        data_inf = {
            'name': name, 
            'gender': gender, 
            'senior_citizen': senior_citizen, 
            'partner': partner, 
            'dependents': dependents, 
            'tenure': tenure, 
            'phone_service': phone_service, 
            'multiple_lines': multiple_lines, 
            'internet_service': internet_service, 
            'online_security': online_security, 
            'online_backup': online_backup, 
            'device_protection': device_protection, 
            'tech_support': tech_support, 
            'streaming_tv': streaming_tv, 
            'streaming_movies': streaming_movies, 
            'contract': contract, 
            'paperless_billing': paperless_billing, 
            'payment_method': payment_method, 
            'monthly_charges': monthly_charges, 
            'total_charges': total_charges, 
            'monthly_charges_cat': charges_cat, 
            'tenure_year': tenure_year
            }

        if st.button('Predict'):
            data_inf = pd.DataFrame([data_inf])
            # st.dataframe(data_inf.head())
            predictData(data_inf)

if __name__ == '__main__':
    run()