hhhar commited on
Commit
1a947f0
·
verified ·
1 Parent(s): 356d400

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +312 -0
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import numpy as np
4
+ import os
5
+ import pandas as pd
6
+
7
+ # Load the preprocessor
8
+ preprocessor_path = 'modelExports/preprocessor.pkl'
9
+ preprocessor = joblib.load(preprocessor_path)
10
+
11
+ # Load models and record whether they include the preprocessor
12
+ model_folder = 'modelExports'
13
+ models = {}
14
+ models_with_preprocessor = {}
15
+ for file_name in os.listdir(model_folder):
16
+ if file_name.endswith('.pkl') and file_name != 'preprocessor.pkl':
17
+ model_name = file_name.replace('.pkl', '').replace('_', ' ').upper()
18
+ model = joblib.load(os.path.join(model_folder, file_name))
19
+ models[model_name] = model
20
+
21
+ # Check if model includes preprocessor
22
+ includes_preprocessor = hasattr(
23
+ model, 'named_steps') and 'preprocessor' in model.named_steps
24
+ models_with_preprocessor[model_name] = includes_preprocessor
25
+
26
+ # Model accuracies
27
+ model_accuracies = {
28
+ "GAUSSIAN NAIVE BAYES WITH SMOTE MODEL": 86,
29
+ "GAUSSIAN NAIVE BAYES WITHOUT SMOTE MODEL": 85,
30
+ "GRADIENT BOOSTING WITH SMOTE MODEL": 95,
31
+ "GRADIENT BOOSTING WITHOUT SMOTE MODEL": 94,
32
+ "LINEAR DISCRIMINANT ANALYSIS WITH SMOTE MODEL": 88,
33
+ "LINEAR DISCRIMINANT ANALYSIS WITHOUT SMOTE MODEL": 87,
34
+ "LOGISTIC REGRESSION WITH SMOTE MODEL": 90,
35
+ "LOGISTIC REGRESSION WITHOUT SMOTE MODEL": 89,
36
+ "RANDOM FOREST WITH SMOTE MODEL": 95,
37
+ "RANDOM FOREST WITHOUT SMOTE MODEL": 93,
38
+ "SUPPORT VECTOR MACHINE WITH SMOTE MODEL": 91,
39
+ "SUPPORT VECTOR MACHINE WITHOUT SMOTE MODEL": 90
40
+ }
41
+
42
+ # Define the Streamlit app
43
+ st.title('Customer Churn Prediction')
44
+
45
+ # Sidebar for interface selection
46
+ st.sidebar.header('Interface Selection')
47
+ interface = st.sidebar.radio(
48
+ "Choose an interface",
49
+ ("Single Prediction", "Batch Prediction")
50
+ )
51
+
52
+ # Sidebar for model selection
53
+ st.sidebar.header('Model Selection')
54
+ selected_models = st.sidebar.multiselect(
55
+ 'Select models for prediction',
56
+ list(models.keys()),
57
+ default=list(models.keys())
58
+ )
59
+
60
+ # Define categorical options
61
+ crm_pid_value_segment_options = ['Bronze', 'Iron', 'Gold', 'Silver', 'Lead',
62
+ 'Platinum', 'SME', 'SE', 'Sliver', 'Unknown']
63
+ effective_segment_options = ['SOHO', 'VSE', 'Other', 'SME', 'LE', 'SE']
64
+ ka_name_options = ['Vladimir Manahilov', 'Desislava Ivanova', 'Martin Tilev',
65
+ 'Anna Dimitrova', 'Rumiana Jordanova', 'Anna Dimova',
66
+ 'Vania Uzunova', 'Varta Torosian', 'Daniela Stefanova',
67
+ 'Ginka Vachkova', 'Tatiana Trifonova', 'Jenia Gogova', 'Unknown']
68
+
69
+ if interface == "Single Prediction":
70
+ # Input fields for new customer data
71
+ st.header('Enter New Customer Data')
72
+
73
+ # Collect input data
74
+ input_data = {}
75
+
76
+ # Categorical inputs
77
+ input_data['CRM_PID_VALUE_SEGMENT'] = st.selectbox(
78
+ 'CRM_PID_VALUE_SEGMENT', crm_pid_value_segment_options)
79
+ input_data['EFFECTIVESEGMENT'] = st.selectbox(
80
+ 'EFFECTIVESEGMENT', effective_segment_options)
81
+ input_data['KA_NAME'] = st.selectbox('KA_NAME', ka_name_options)
82
+
83
+ # Numerical inputs
84
+ input_data['BILLING_ZIP'] = st.number_input(
85
+ 'BILLING_ZIP', min_value=0, format="%d")
86
+ input_data['ACTIVE_SUBSCRIBERS'] = st.number_input(
87
+ 'ACTIVE_SUBSCRIBERS', min_value=0, format="%d")
88
+ input_data['NOT_ACTIVE_SUBSCRIBERS'] = st.number_input(
89
+ 'NOT_ACTIVE_SUBSCRIBERS', min_value=0, format="%d")
90
+ input_data['SUSPENDED_SUBSCRIBERS'] = st.number_input(
91
+ 'SUSPENDED_SUBSCRIBERS', min_value=0, format="%d")
92
+ input_data['TOTAL_SUBS'] = st.number_input(
93
+ 'TOTAL_SUBS', min_value=0, format="%d")
94
+ input_data['AVGMOBILEREVENUE'] = st.number_input(
95
+ 'AVGMOBILEREVENUE', min_value=0.0, format="%.2f")
96
+ input_data['AVGFIXREVENUE'] = st.number_input(
97
+ 'AVGFIXREVENUE', min_value=0.0, format="%.2f")
98
+ input_data['TOTALREVENUE'] = st.number_input(
99
+ 'TOTALREVENUE', min_value=0.0, format="%.2f")
100
+ input_data['ARPU'] = st.number_input('ARPU', min_value=0.0, format="%.2f")
101
+
102
+ # Predict churn
103
+ if st.button('Predict Churn'):
104
+ # Convert input data to DataFrame
105
+ input_df = pd.DataFrame([input_data])
106
+
107
+ # Preprocess the data only if needed
108
+ input_data_transformed = preprocessor.transform(input_df)
109
+
110
+ st.write("### Model Predictions")
111
+
112
+ predictions = {}
113
+ weighted_votes = {'Churn': 0, 'No Churn': 0}
114
+
115
+ for model_name in selected_models:
116
+ model = models[model_name]
117
+ includes_preprocessor = models_with_preprocessor[model_name]
118
+
119
+ try:
120
+ if includes_preprocessor:
121
+ # Model includes preprocessor; use raw data
122
+ prediction = model.predict(input_df)
123
+ else:
124
+ # Model does not include preprocessor; use preprocessed data
125
+ prediction = model.predict(input_data_transformed)
126
+ except Exception as e:
127
+ st.error(f"Error predicting with model {model_name}: {e}")
128
+ continue
129
+
130
+ churn_prediction = 'Churn' if prediction[0] == 1 else 'No Churn'
131
+ predictions[model_name] = churn_prediction
132
+
133
+ # Add weighted vote
134
+ weight = model_accuracies.get(model_name, 1)
135
+ weighted_votes[churn_prediction] += weight
136
+
137
+ # Display individual model predictions
138
+ st.write(
139
+ f"**{model_name}:** {churn_prediction} (Accuracy: {weight}%)")
140
+
141
+ # Calculate and display the overall prediction
142
+ total_weight = sum(weighted_votes.values())
143
+ if total_weight == 0:
144
+ st.error(
145
+ "No valid predictions were made. Cannot compute churn probability.")
146
+ else:
147
+ churn_probability = weighted_votes['Churn'] / total_weight
148
+ overall_prediction = 'Churn' if churn_probability > 0.5 else 'No Churn'
149
+
150
+ st.write("### Overall Prediction")
151
+ st.write(f"**Final Prediction:** {overall_prediction}")
152
+ st.write(f"**Churn Probability:** {churn_probability:.2%}")
153
+ st.write(f"**No Churn Probability:** {1 - churn_probability:.2%}")
154
+
155
+ # Visualize the predictions
156
+ st.write("### Prediction Visualization")
157
+ chart_data = pd.DataFrame(
158
+ {
159
+ 'Prediction': ['Churn', 'No Churn'],
160
+ 'Weighted Vote': [
161
+ weighted_votes['Churn'],
162
+ weighted_votes['No Churn']
163
+ ]
164
+ }
165
+ )
166
+ st.bar_chart(chart_data.set_index('Prediction'))
167
+
168
+ elif interface == "Batch Prediction":
169
+ # Batch Prediction Interface
170
+ st.header('Batch Prediction')
171
+ st.write('Upload a CSV file containing customer data.')
172
+
173
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
174
+
175
+ if uploaded_file is not None:
176
+ # Check if models are selected
177
+ if not selected_models:
178
+ st.error(
179
+ "No models selected for prediction. Please select at least one model in the sidebar.")
180
+ st.stop()
181
+
182
+ # Read the uploaded CSV file
183
+ try:
184
+ df = pd.read_csv(uploaded_file)
185
+ st.write(
186
+ f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
187
+ except Exception as e:
188
+ st.error(f"Error reading the CSV file: {e}")
189
+ st.stop()
190
+
191
+ # Check for required columns
192
+ required_columns = [
193
+ 'CRM_PID_VALUE_SEGMENT', 'EFFECTIVESEGMENT', 'BILLING_ZIP', 'KA_NAME',
194
+ 'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS', 'SUSPENDED_SUBSCRIBERS',
195
+ 'TOTAL_SUBS', 'AVGMOBILEREVENUE', 'AVGFIXREVENUE', 'TOTALREVENUE', 'ARPU'
196
+ ]
197
+
198
+ missing_columns = [
199
+ col for col in required_columns if col not in df.columns]
200
+ if missing_columns:
201
+ st.error(
202
+ f"The following required columns are missing from the uploaded file: {missing_columns}")
203
+ st.stop()
204
+
205
+ # Fill missing values if any
206
+ df.fillna({
207
+ 'CRM_PID_VALUE_SEGMENT': 'Unknown',
208
+ 'EFFECTIVESEGMENT': 'Unknown',
209
+ 'KA_NAME': 'Unknown',
210
+ 'BILLING_ZIP': 0,
211
+ 'ACTIVE_SUBSCRIBERS': 0,
212
+ 'NOT_ACTIVE_SUBSCRIBERS': 0,
213
+ 'SUSPENDED_SUBSCRIBERS': 0,
214
+ 'TOTAL_SUBS': 0,
215
+ 'AVGMOBILEREVENUE': 0.0,
216
+ 'AVGFIXREVENUE': 0.0,
217
+ 'TOTALREVENUE': 0.0,
218
+ 'ARPU': 0.0
219
+ }, inplace=True)
220
+
221
+ # Preprocess the data only if needed
222
+ try:
223
+ data_transformed = preprocessor.transform(df)
224
+ except Exception as e:
225
+ st.error(f"Error during data preprocessing: {e}")
226
+ st.stop()
227
+
228
+ # Initialize a DataFrame to store predictions
229
+ prediction_results = df.copy()
230
+ prediction_results['Final Prediction'] = ''
231
+ prediction_results['Churn Probability'] = 0.0
232
+
233
+ st.write("### Processing Batch Predictions...")
234
+
235
+ for idx in range(df.shape[0]):
236
+ sample_raw = df.iloc[[idx]] # Raw data as DataFrame
237
+ sample_preprocessed = data_transformed[idx].reshape(
238
+ 1, -1) # Preprocessed data
239
+ weighted_votes = {'Churn': 0, 'No Churn': 0}
240
+ models_used = 0
241
+
242
+ for model_name in selected_models:
243
+ model = models[model_name]
244
+ includes_preprocessor = models_with_preprocessor[model_name]
245
+
246
+ try:
247
+ if includes_preprocessor:
248
+ # Model includes preprocessor; use raw data
249
+ prediction = model.predict(sample_raw)
250
+ else:
251
+ # Model does not include preprocessor; use preprocessed data
252
+ prediction = model.predict(sample_preprocessed)
253
+ models_used += 1
254
+ except Exception as e:
255
+ st.error(
256
+ f"Error predicting with model {model_name} on sample {idx}: {e}")
257
+ continue
258
+
259
+ churn_prediction = 'Churn' if prediction[0] == 1 else 'No Churn'
260
+
261
+ # Add weighted vote
262
+ weight = model_accuracies.get(model_name, 1)
263
+ weighted_votes[churn_prediction] += weight
264
+
265
+ # Check if any models made predictions
266
+ if models_used == 0:
267
+ st.error(f"No models could make predictions for sample {idx}.")
268
+ prediction_results.at[idx, 'Final Prediction'] = 'Unknown'
269
+ prediction_results.at[idx, 'Churn Probability'] = None
270
+ continue # Skip to the next sample
271
+
272
+ # Calculate overall prediction for the sample
273
+ total_weight = sum(weighted_votes.values())
274
+ if total_weight == 0:
275
+ st.error(
276
+ f"No valid predictions were made for sample {idx}. Cannot compute churn probability.")
277
+ prediction_results.at[idx, 'Final Prediction'] = 'Unknown'
278
+ prediction_results.at[idx, 'Churn Probability'] = None
279
+ continue # Skip to the next sample
280
+
281
+ churn_probability = weighted_votes['Churn'] / total_weight
282
+ overall_prediction = 'Churn' if churn_probability > 0.5 else 'No Churn'
283
+
284
+ # Store results
285
+ prediction_results.at[idx, 'Final Prediction'] = overall_prediction
286
+ prediction_results.at[idx, 'Churn Probability'] = churn_probability
287
+
288
+ st.success('Batch predictions completed.')
289
+
290
+ # Display a sample of the results
291
+ st.write("### Prediction Results")
292
+ st.dataframe(prediction_results.head())
293
+
294
+ # Allow user to download the results
295
+ csv = prediction_results.to_csv(index=False).encode('utf-8')
296
+ st.download_button(
297
+ label="Download Prediction Results as CSV",
298
+ data=csv,
299
+ file_name='batch_predictions.csv',
300
+ mime='text/csv',
301
+ )
302
+
303
+ else:
304
+ st.info('Awaiting CSV file to be uploaded.')
305
+
306
+ # Sidebar information
307
+ st.sidebar.write("### Model Information")
308
+ st.sidebar.write(f"Total models available: {len(models)}")
309
+ st.sidebar.write(f"Models selected for prediction: {len(selected_models)}")
310
+ st.sidebar.write("### Model Accuracies")
311
+ for model, accuracy in model_accuracies.items():
312
+ st.sidebar.write(f"{model}: {accuracy}%")