Abubakari commited on
Commit
9581807
·
1 Parent(s): 2537c41

Upload 11 files

Browse files
Churn.png ADDED
NotChurn.jpg ADDED
banner.png ADDED
cat_imputer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30ccb53964d4b14131ef8b413a068a1ca399d1668c6d6a53f155b9ebf7b6270
3
+ size 1033
encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce49eceb7e11a32bc8babcc7046b0a5395d11588e2d3abc63a891c6aa441ae0a
3
+ size 1668
gb_model_vif_smote.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311c897ee6652506247576d174760a9dab012e66814115ca1d0a2831a6426ae3
3
+ size 181004
lr_model_vif_smote.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99ce78fc5406fe3850259aa8750391dfc4d998092396291a3c4ca744e35c8dee
3
+ size 2271
main.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import plotly.graph_objects as go
6
+ from PIL import Image
7
+ import time
8
+ import matplotlib.pyplot as plt
9
+ import qrcode
10
+ from io import BytesIO
11
+
12
+ # Load the trained models and transformers
13
+ num_imputer = joblib.load('numerical_imputer.joblib')
14
+ cat_imputer = joblib.load('cat_imputer.joblib')
15
+ encoder = joblib.load('encoder.joblib')
16
+ scaler = joblib.load('scaler.joblib')
17
+ model1 = joblib.load('lr_model_vif_smote.joblib')
18
+ model2 = joblib.load('gb_model_vif_smote.joblib')
19
+
20
+
21
+ def preprocess_input(input_data):
22
+ input_df = pd.DataFrame(input_data, index=[0])
23
+
24
+ cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
25
+ num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
26
+
27
+ input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
28
+ input_df_imputed_num = num_imputer.transform(input_df[num_columns])
29
+
30
+ input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
31
+ columns=encoder.get_feature_names_out(cat_columns))
32
+
33
+ input_df_scaled = scaler.transform(input_df_imputed_num)
34
+ input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
35
+ final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
36
+ final_df = final_df.reindex(columns=original_feature_names, fill_value=0)
37
+
38
+ return final_df
39
+
40
+ original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
41
+ 'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
42
+ 'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
43
+ 'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
44
+ 'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
45
+ 'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
46
+ 'TENURE_Very short-term', 'TOP_PACK_data', 'TOP_PACK_international', 'TOP_PACK_messaging',
47
+ 'TOP_PACK_other_services', 'TOP_PACK_social_media', 'TOP_PACK_value_added_services',
48
+ 'TOP_PACK_voice']
49
+
50
+ # Set up the Streamlit app
51
+ st.set_page_config(layout="wide")
52
+
53
+ # Main page - Churn Prediction
54
+ st.title('📞 EXPRESSO TELECOM CUSTOMER CHURN PREDICTION APP 📞')
55
+
56
+ # Main page - Churn Prediction
57
+ st.image("banner.png", use_column_width=True)
58
+
59
+ # How to use
60
+ st.sidebar.title('How to Use')
61
+ st.sidebar.markdown('1. Select your model of choice on the left sidebar.')
62
+ st.sidebar.markdown('2. Adjust the input parameters based on customer details')
63
+ st.sidebar.markdown('3. Click the "Predict" button to initiate the prediction.')
64
+ st.sidebar.markdown('4. The app will simulate a prediction process with a progress bar.')
65
+ st.sidebar.markdown('5. Once the prediction is complete, the results will be displayed below.')
66
+
67
+ st.markdown("This app predicts whether a customer will leave your company ❌ or not 🎉. Enter the details of the customer on the left sidebar to see the result")
68
+
69
+ # Define a dictionary of models with their names, actual models, and types
70
+ models = {
71
+ 'Logistic Regression': {'model': model1, 'type': 'logistic_regression'},
72
+ 'Gradient Boosting': {'model': model2, 'type': 'gradient_boosting'}
73
+ }
74
+
75
+ # Allow the user to select a model from the sidebar
76
+ model_name = st.sidebar.selectbox('Select Model', list(models.keys()))
77
+
78
+ # Retrieve the selected model and its type from the dictionary
79
+ model = models[model_name]['model']
80
+ model_type = models[model_name]['type']
81
+
82
+
83
+ # Collect input from the user
84
+ st.sidebar.title('Enter Customer Details')
85
+ input_features = {
86
+ 'MONTANT': st.sidebar.number_input('Top-up Amount (MONTANT)'),
87
+ 'FREQUENCE_RECH': st.sidebar.number_input('Number of Times the Customer Refilled (FREQUENCE_RECH)'),
88
+ 'REVENUE': st.sidebar.number_input('Monthly income of the client (REVENUE)'),
89
+ 'ARPU_SEGMENT': st.sidebar.number_input('Income over 90 days / 3 (ARPU_SEGMENT)'),
90
+ 'FREQUENCE': st.sidebar.number_input('Number of times the client has made an income (FREQUENCE)'),
91
+ 'DATA_VOLUME': st.sidebar.number_input('Number of Connections (DATA_VOLUME)'),
92
+ 'ON_NET': st.sidebar.number_input('Inter Expresso Call (ON_NET)'),
93
+ 'ORANGE': st.sidebar.number_input('Call to Orange (ORANGE)'),
94
+ 'TIGO': st.sidebar.number_input('Call to Tigo (TIGO)'),
95
+ 'ZONE1': st.sidebar.number_input('Call to Zone 1 (ZONE1)'),
96
+ 'ZONE2': st.sidebar.number_input('Call to Zone 2 (ZONE2)'),
97
+ 'REGULARITY': st.sidebar.number_input('Number of Times the Client is Active for 90 Days (REGULARITY)'),
98
+ 'FREQ_TOP_PACK': st.sidebar.number_input('Number of Times the Client has Activated the Top Packs (FREQ_TOP_PACK)'),
99
+ 'REGION': st.sidebar.selectbox('Location of Each Client (REGION)', ['SAINT-LOUIS', 'THIES', 'LOUGA', 'MATAM', 'FATICK', 'KAOLACK',
100
+ 'DIOURBEL', 'TAMBACOUNDA', 'ZIGUINCHOR', 'KOLDA', 'KAFFRINE', 'SEDHIOU',
101
+ 'KEDOUGOU']),
102
+ 'TENURE': st.sidebar.selectbox('Duration in the Network (TENURE)', ['Short-term', 'Mid-term', 'Medium-term', 'Very short-term']),
103
+ 'TOP_PACK': st.sidebar.selectbox('Most Active Pack (TOP_PACK)', ['data', 'international', 'messaging', 'social_media',
104
+ 'value_added_services', 'voice'])
105
+ }
106
+
107
+ # Input validation
108
+ valid_input = True
109
+ error_messages = []
110
+
111
+ # Validate numeric inputs
112
+ numeric_ranges = {
113
+ 'MONTANT': [0, 1000000],
114
+ 'FREQUENCE_RECH': [0, 100],
115
+ 'REVENUE': [0, 1000000],
116
+ 'ARPU_SEGMENT': [0, 100000],
117
+ 'FREQUENCE': [0, 100],
118
+ 'DATA_VOLUME': [0, 100000],
119
+ 'ON_NET': [0, 100000],
120
+ 'ORANGE': [0, 100000],
121
+ 'TIGO': [0, 100000],
122
+ 'ZONE1': [0, 100000],
123
+ 'ZONE2': [0, 100000],
124
+ 'REGULARITY': [0, 100],
125
+ 'FREQ_TOP_PACK': [0, 100]
126
+ }
127
+
128
+ for feature, value in input_features.items():
129
+ range_min, range_max = numeric_ranges.get(feature, [None, None])
130
+ if range_min is not None and range_max is not None:
131
+ if not range_min <= value <= range_max:
132
+ valid_input = False
133
+ error_messages.append(f"{feature} should be between {range_min} and {range_max}.")
134
+
135
+ #Churn Prediction
136
+
137
+ def predict_churn(input_data, model):
138
+ # Preprocess the input data
139
+ preprocessed_data = preprocess_input(input_data)
140
+
141
+ # Calculate churn probabilities using the model
142
+ probabilities = model.predict_proba(preprocessed_data)
143
+
144
+ # Determine churn labels based on the model type
145
+ if model_type == "logistic_regression":
146
+ churn_labels = ["No Churn", "Churn"]
147
+ elif model_type == "gradient_boosting":
148
+ churn_labels = ["Churn", "No Churn"]
149
+ # Extract churn probability for the first sample
150
+ churn_probability = probabilities[0]
151
+
152
+ # Create a dictionary mapping churn labels to their indices
153
+ churn_indices = {label: idx for idx, label in enumerate(churn_labels)}
154
+
155
+ # Determine the index with the highest churn probability
156
+ churn_index = np.argmax(churn_probability)
157
+
158
+ # Return churn labels, churn probabilities, churn indices, and churn index
159
+ return churn_labels, churn_probability, churn_indices, churn_index
160
+
161
+ # Predict churn based on user input
162
+ if st.sidebar.button('Predict Churn'):
163
+ try:
164
+ with st.spinner("Predicting..."):
165
+ # Simulate a long-running process
166
+ progress_bar = st.progress(0)
167
+ step = 20 # A big step will reduce the execution time
168
+ for i in range(0, 100, step):
169
+ time.sleep(0.1)
170
+ progress_bar.progress(i + step)
171
+
172
+ #churn_labels, churn_probability = predict_churn(input_features, model) # Pass model1 or model2 based on the selected model
173
+ churn_labels, churn_probability, churn_indices, churn_index = predict_churn(input_features, model)
174
+
175
+ col1, col2 = st.columns(2)
176
+
177
+ if churn_labels[churn_index] == "Churn":
178
+ churn_prob = churn_probability[churn_index]
179
+ with col1:
180
+ st.error(f"Beware!!! This customer is likely to churn with a probability of {churn_prob * 100:.2f}% 😢")
181
+ resized_churn_image = Image.open('Churn.png')
182
+ resized_churn_image = resized_churn_image.resize((350, 300)) # Adjust the width and height as desired
183
+ st.image(resized_churn_image)
184
+ # Add suggestions for retaining churned customers in the 'Churn' group
185
+ with col2:
186
+ st.info("Suggestions for retaining churned customers in this customer group:\n"
187
+ "- Offer personalized discounts or promotions\n"
188
+ "- Provide exceptional customer service\n"
189
+ "- Introduce loyalty programs\n"
190
+ "- Send targeted re-engagement emails\n"
191
+ "- Provide a dedicated account manager\n"
192
+ "- Offer extended trial periods\n"
193
+ "- Conduct exit surveys to understand reasons for churn\n"
194
+ "- Implement a customer win-back campaign\n"
195
+ "- Provide incentives for referrals\n"
196
+ "- Improve product or service offerings based on customer feedback")
197
+ else:
198
+ #churn_index = churn_indices["No Churn"]
199
+ churn_prob = churn_probability[churn_index]
200
+ with col1:
201
+ st.success(f"This customer is not likely to churn with a probability of {churn_prob * 100:.2f}% 😀")
202
+ resized_not_churn_image = Image.open('NotChurn.jpg')
203
+ resized_not_churn_image = resized_not_churn_image.resize((350, 300)) # Adjust the width and height as desired
204
+ st.image(resized_not_churn_image)
205
+ # Add suggestions for retaining churned customers in the 'Churn' group
206
+ with col2:
207
+ st.info("Suggestions for retaining non-churned customers in this customer group:\n"
208
+ "- Provide personalized product recommendations\n"
209
+ "- Offer exclusive features or upgrades\n"
210
+ "- Implement proactive customer support\n"
211
+ "- Conduct customer satisfaction surveys\n"
212
+ "- Recognize and reward loyal customers\n"
213
+ "- Organize customer appreciation events\n"
214
+ "- Offer early access to new features or products\n"
215
+ "- Provide educational resources or tutorials\n"
216
+ "- Implement a customer loyalty program\n"
217
+ "- Offer flexible billing or pricing options")
218
+
219
+
220
+ # Create a donut chart to display probabilities
221
+ fig = go.Figure(data=[go.Pie(
222
+ labels=churn_labels,
223
+ values=churn_probability,
224
+ hole=0.5,
225
+ textinfo='label+percent',
226
+ marker=dict(colors=['#FFA07A', '#6495ED', '#FFD700', '#32CD32', '#FF69B4', '#8B008B']))])
227
+
228
+ fig.update_traces(
229
+ hoverinfo='label+percent',
230
+ textfont_size=12,
231
+ textposition='inside',
232
+ texttemplate='%{label}: %{percent:.2f}%'
233
+ )
234
+
235
+ fig.update_layout(
236
+ title='Churn Probability',
237
+ title_x=0.5,
238
+ showlegend=False,
239
+ width=500,
240
+ height=500
241
+ )
242
+
243
+ st.plotly_chart(fig, use_container_width=True)
244
+
245
+ # Calculate the average churn rate (replace with your actual value)
246
+ average_churn_rate = 19
247
+
248
+ # Convert the overall churn rate to churn probability
249
+ main_data_churn_probability = average_churn_rate / 100
250
+
251
+ # Retrieve the predicted churn probability for the selected customer
252
+ predicted_churn_prob = churn_probability[churn_index]
253
+
254
+ # Create a bar chart comparing the predicted churn probability with the average churn rate
255
+ labels = ['Predicted Churn Probability', 'Average Churn Probability']
256
+ values = [predicted_churn_prob, main_data_churn_probability]
257
+
258
+ fig = go.Figure(data=[go.Bar(x=labels, y=values)])
259
+ fig.update_layout(
260
+ xaxis_title='Churn Probability',
261
+ yaxis_title='Probability',
262
+ title='Comparison with Average Churn Rate',
263
+ yaxis=dict(range=[0, 1]) # Set the y-axis limits between 0 and 1
264
+ )
265
+
266
+ # Add explanations
267
+ if predicted_churn_prob > main_data_churn_probability:
268
+ churn_comparison = "higher"
269
+ elif predicted_churn_prob < main_data_churn_probability:
270
+ churn_comparison = "lower"
271
+ else:
272
+ churn_comparison = "equal"
273
+
274
+ explanation = f"This bar chart compares the predicted churn probability of the selected customer " \
275
+ f"with the average churn rate of all customers. It provides insights into how the " \
276
+ f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
277
+ f"overall trend. The 'Predicted Churn Probability' represents the likelihood of churn " \
278
+ f"for the selected customer, while the 'Average Churn Rate' represents the average " \
279
+ f"churn rate across all customers ({main_data_churn_probability:.2f}).\n\n" \
280
+ f"The customer's churn rate is {churn_comparison} than the average churn rate."
281
+
282
+ st.plotly_chart(fig)
283
+ st.write(explanation)
284
+
285
+
286
+ # Visualize Feature Importance
287
+ if hasattr(model, 'coef_'): # Check if the model has attribute 'coef_' to determine importance type
288
+ feature_importances = model.coef_[0]
289
+ importance_type = 'Coef'
290
+ elif hasattr(model, 'feature_importances_'):
291
+ feature_importances = model.feature_importances_
292
+ importance_type = 'Importance'
293
+ else:
294
+ st.write('Feature importance is not available for this model.')
295
+
296
+ # If importance information is available, create a DataFrame and sort it
297
+ if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
298
+ importance_df = pd.DataFrame({'Feature': original_feature_names, importance_type: feature_importances})
299
+ importance_df = importance_df.sort_values(importance_type, ascending=False)
300
+
301
+ st.subheader('Feature Importance')
302
+
303
+ # Determine color for each bar based on positive or negative importance
304
+ colors = ['green' if importance > 0 else 'red' for importance in importance_df[importance_type]]
305
+
306
+ # Create a horizontal bar chart using Plotly
307
+ fig = go.Figure(go.Bar(
308
+ x=importance_df[importance_type],
309
+ y=importance_df['Feature'],
310
+ orientation='h',
311
+ marker=dict(color=colors),
312
+ text=importance_df[importance_type].apply(lambda x: f'{x:.2f}'),
313
+ textposition='inside'))
314
+
315
+ # Configure the layout of the bar chart
316
+ fig.update_layout(
317
+ title='Feature Importance',
318
+ xaxis_title='Importance',
319
+ yaxis_title='Feature',
320
+ bargap=0.1,
321
+ width=600,
322
+ height=800)
323
+
324
+ # Display the bar chart using Plotly chart in Streamlit
325
+ st.plotly_chart(fig)
326
+
327
+ # Explanation of feature importance
328
+ importance_explanation = f"The feature importance plot shows the relative importance of each feature " \
329
+ f"for predicting churn. The importance is calculated based on the " \
330
+ f"{importance_type} value of each feature in the model. " \
331
+ f"A higher {importance_type} value indicates a stronger influence " \
332
+ f"of the corresponding feature on the prediction of churn.\n\n" \
333
+ f"For logistic regression, positive {importance_type} values indicate " \
334
+ f"features that positively contribute to predicting churn, " \
335
+ f"while negative {importance_type} values indicate features that " \
336
+ f"negatively contribute to predicting churn.\n\n" \
337
+ f"For gradient boosting, higher {importance_type} values " \
338
+ f"indicate features that have a greater importance in predicting churn.\n\n" \
339
+ f"Note: The feature importance values may change depending on the model " \
340
+ f"and the data used for training."
341
+
342
+ st.write(importance_explanation)
343
+ else:
344
+ st.write('Feature importance is not available for this model.')
345
+
346
+ # def generate_qr_code(churn_labels, churn_probability, average_churn_rate):
347
+ # # Create a string representation of the important results
348
+ # result_string = f"Churn Probability: {churn_probability:.2f}\n" \
349
+ # f"Average Churn Rate: {average_churn_rate:.2f}"
350
+ #
351
+ # # Generate the QR code
352
+ # qr = qrcode.QRCode(
353
+ # version=1,
354
+ # error_correction=qrcode.constants.ERROR_CORRECT_L,
355
+ # box_size=10,
356
+ # border=4,)
357
+ # qr.add_data(result_string)
358
+ # qr.make(fit=True)
359
+
360
+ # Create an image from the QR code
361
+ # qr_image = qr.make_image(fill_color="black", back_color="white")
362
+
363
+ # Resize the image to a smaller size for mobile-friendly display
364
+ # qr_image = qr_image.resize((200, 200))
365
+
366
+ # Create a BytesIO object to store the image data
367
+ # image_stream = BytesIO()
368
+ # qr_image.save(image_stream, format='PNG')
369
+ # image_stream.seek(0)
370
+
371
+ # return image_stream
372
+
373
+ # Generate the QR code for the important results
374
+ # qr_image_stream = generate_qr_code(churn_labels, churn_probability, average_churn_rate)
375
+
376
+ # # Display the QR code using the Streamlit `image` function
377
+ # st.image(qr_image_stream, use_column_width=True)
378
+
379
+ except Exception as e:
380
+ st.error(f"An error occurred: {str(e)}")
numerical_imputer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cc3b03cdbe7e804e10073e8a5c1718c078f15e0fe0d87ac0c74a5640d44f05
3
+ size 1103
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ joblib==1.2.0
2
+ numpy==1.22.4
3
+ pandas==1.5.3
4
+ shap==0.41.0
5
+ streamlit==1.22.0
6
+ scikit-learn==1.2.2
7
+ matplotlib==3.7.1
8
+ shap==0.41.0
9
+ fastapi==0.95.1
10
+ uvicorn==0.22.0
11
+ pydantic==1.10.7
12
+ jinja2==3.0.2
13
+ python-multipart==0.0.6
14
+ qrcode==7.4.2
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a342ef9d36377bbc748a53205340fd5e5d386e43a8d3d2497a117db766e23764
3
+ size 1199