Spaces:

hhhar
/

ChurnPredUpdated

Sleeping

App Files Files Community

hhhar commited on Sep 14, 2024

Commit

1a947f0

verified ·

1 Parent(s): 356d400

Create app.py

Browse files

Files changed (1) hide show

app.py +312 -0

app.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import streamlit as st
+import joblib
+import numpy as np
+import os
+import pandas as pd
+# Load the preprocessor
+preprocessor_path = 'modelExports/preprocessor.pkl'
+preprocessor = joblib.load(preprocessor_path)
+# Load models and record whether they include the preprocessor
+model_folder = 'modelExports'
+models = {}
+models_with_preprocessor = {}
+for file_name in os.listdir(model_folder):
+    if file_name.endswith('.pkl') and file_name != 'preprocessor.pkl':
+        model_name = file_name.replace('.pkl', '').replace('_', ' ').upper()
+        model = joblib.load(os.path.join(model_folder, file_name))
+        models[model_name] = model
+        # Check if model includes preprocessor
+        includes_preprocessor = hasattr(
+            model, 'named_steps') and 'preprocessor' in model.named_steps
+        models_with_preprocessor[model_name] = includes_preprocessor
+# Model accuracies
+model_accuracies = {
+    "GAUSSIAN NAIVE BAYES WITH SMOTE MODEL": 86,
+    "GAUSSIAN NAIVE BAYES WITHOUT SMOTE MODEL": 85,
+    "GRADIENT BOOSTING WITH SMOTE MODEL": 95,
+    "GRADIENT BOOSTING WITHOUT SMOTE MODEL": 94,
+    "LINEAR DISCRIMINANT ANALYSIS WITH SMOTE MODEL": 88,
+    "LINEAR DISCRIMINANT ANALYSIS WITHOUT SMOTE MODEL": 87,
+    "LOGISTIC REGRESSION WITH SMOTE MODEL": 90,
+    "LOGISTIC REGRESSION WITHOUT SMOTE MODEL": 89,
+    "RANDOM FOREST WITH SMOTE MODEL": 95,
+    "RANDOM FOREST WITHOUT SMOTE MODEL": 93,
+    "SUPPORT VECTOR MACHINE WITH SMOTE MODEL": 91,
+    "SUPPORT VECTOR MACHINE WITHOUT SMOTE MODEL": 90
+}
+# Define the Streamlit app
+st.title('Customer Churn Prediction')
+# Sidebar for interface selection
+st.sidebar.header('Interface Selection')
+interface = st.sidebar.radio(
+    "Choose an interface",
+    ("Single Prediction", "Batch Prediction")
+)
+# Sidebar for model selection
+st.sidebar.header('Model Selection')
+selected_models = st.sidebar.multiselect(
+    'Select models for prediction',
+    list(models.keys()),
+    default=list(models.keys())
+)
+# Define categorical options
+crm_pid_value_segment_options = ['Bronze', 'Iron', 'Gold', 'Silver', 'Lead',
+                                 'Platinum', 'SME', 'SE', 'Sliver', 'Unknown']
+effective_segment_options = ['SOHO', 'VSE', 'Other', 'SME', 'LE', 'SE']
+ka_name_options = ['Vladimir Manahilov', 'Desislava Ivanova', 'Martin Tilev',
+                   'Anna Dimitrova', 'Rumiana Jordanova', 'Anna Dimova',
+                   'Vania Uzunova', 'Varta Torosian', 'Daniela Stefanova',
+                   'Ginka Vachkova', 'Tatiana Trifonova', 'Jenia Gogova', 'Unknown']
+if interface == "Single Prediction":
+    # Input fields for new customer data
+    st.header('Enter New Customer Data')
+    # Collect input data
+    input_data = {}
+    # Categorical inputs
+    input_data['CRM_PID_VALUE_SEGMENT'] = st.selectbox(
+        'CRM_PID_VALUE_SEGMENT', crm_pid_value_segment_options)
+    input_data['EFFECTIVESEGMENT'] = st.selectbox(
+        'EFFECTIVESEGMENT', effective_segment_options)
+    input_data['KA_NAME'] = st.selectbox('KA_NAME', ka_name_options)
+    # Numerical inputs
+    input_data['BILLING_ZIP'] = st.number_input(
+        'BILLING_ZIP', min_value=0, format="%d")
+    input_data['ACTIVE_SUBSCRIBERS'] = st.number_input(
+        'ACTIVE_SUBSCRIBERS', min_value=0, format="%d")
+    input_data['NOT_ACTIVE_SUBSCRIBERS'] = st.number_input(
+        'NOT_ACTIVE_SUBSCRIBERS', min_value=0, format="%d")
+    input_data['SUSPENDED_SUBSCRIBERS'] = st.number_input(
+        'SUSPENDED_SUBSCRIBERS', min_value=0, format="%d")
+    input_data['TOTAL_SUBS'] = st.number_input(
+        'TOTAL_SUBS', min_value=0, format="%d")
+    input_data['AVGMOBILEREVENUE'] = st.number_input(
+        'AVGMOBILEREVENUE', min_value=0.0, format="%.2f")
+    input_data['AVGFIXREVENUE'] = st.number_input(
+        'AVGFIXREVENUE', min_value=0.0, format="%.2f")
+    input_data['TOTALREVENUE'] = st.number_input(
+        'TOTALREVENUE', min_value=0.0, format="%.2f")
+    input_data['ARPU'] = st.number_input('ARPU', min_value=0.0, format="%.2f")
+    # Predict churn
+    if st.button('Predict Churn'):
+        # Convert input data to DataFrame
+        input_df = pd.DataFrame([input_data])
+        # Preprocess the data only if needed
+        input_data_transformed = preprocessor.transform(input_df)
+        st.write("### Model Predictions")
+        predictions = {}
+        weighted_votes = {'Churn': 0, 'No Churn': 0}
+        for model_name in selected_models:
+            model = models[model_name]
+            includes_preprocessor = models_with_preprocessor[model_name]
+            try:
+                if includes_preprocessor:
+                    # Model includes preprocessor; use raw data
+                    prediction = model.predict(input_df)
+                else:
+                    # Model does not include preprocessor; use preprocessed data
+                    prediction = model.predict(input_data_transformed)
+            except Exception as e:
+                st.error(f"Error predicting with model {model_name}: {e}")
+                continue
+            churn_prediction = 'Churn' if prediction[0] == 1 else 'No Churn'
+            predictions[model_name] = churn_prediction
+            # Add weighted vote
+            weight = model_accuracies.get(model_name, 1)
+            weighted_votes[churn_prediction] += weight
+            # Display individual model predictions
+            st.write(
+                f"**{model_name}:** {churn_prediction} (Accuracy: {weight}%)")
+        # Calculate and display the overall prediction
+        total_weight = sum(weighted_votes.values())
+        if total_weight == 0:
+            st.error(
+                "No valid predictions were made. Cannot compute churn probability.")
+        else:
+            churn_probability = weighted_votes['Churn'] / total_weight
+            overall_prediction = 'Churn' if churn_probability > 0.5 else 'No Churn'
+            st.write("### Overall Prediction")
+            st.write(f"**Final Prediction:** {overall_prediction}")
+            st.write(f"**Churn Probability:** {churn_probability:.2%}")
+            st.write(f"**No Churn Probability:** {1 - churn_probability:.2%}")
+            # Visualize the predictions
+            st.write("### Prediction Visualization")
+            chart_data = pd.DataFrame(
+                {
+                    'Prediction': ['Churn', 'No Churn'],
+                    'Weighted Vote': [
+                        weighted_votes['Churn'],
+                        weighted_votes['No Churn']
+                    ]
+                }
+            )
+            st.bar_chart(chart_data.set_index('Prediction'))
+elif interface == "Batch Prediction":
+    # Batch Prediction Interface
+    st.header('Batch Prediction')
+    st.write('Upload a CSV file containing customer data.')
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Check if models are selected
+        if not selected_models:
+            st.error(
+                "No models selected for prediction. Please select at least one model in the sidebar.")
+            st.stop()
+        # Read the uploaded CSV file
+        try:
+            df = pd.read_csv(uploaded_file)
+            st.write(
+                f"Uploaded data has {df.shape[0]} rows and {df.shape[1]} columns.")
+        except Exception as e:
+            st.error(f"Error reading the CSV file: {e}")
+            st.stop()
+        # Check for required columns
+        required_columns = [
+            'CRM_PID_VALUE_SEGMENT', 'EFFECTIVESEGMENT', 'BILLING_ZIP', 'KA_NAME',
+            'ACTIVE_SUBSCRIBERS', 'NOT_ACTIVE_SUBSCRIBERS', 'SUSPENDED_SUBSCRIBERS',
+            'TOTAL_SUBS', 'AVGMOBILEREVENUE', 'AVGFIXREVENUE', 'TOTALREVENUE', 'ARPU'
+        ]
+        missing_columns = [
+            col for col in required_columns if col not in df.columns]
+        if missing_columns:
+            st.error(
+                f"The following required columns are missing from the uploaded file: {missing_columns}")
+            st.stop()
+        # Fill missing values if any
+        df.fillna({
+            'CRM_PID_VALUE_SEGMENT': 'Unknown',
+            'EFFECTIVESEGMENT': 'Unknown',
+            'KA_NAME': 'Unknown',
+            'BILLING_ZIP': 0,
+            'ACTIVE_SUBSCRIBERS': 0,
+            'NOT_ACTIVE_SUBSCRIBERS': 0,
+            'SUSPENDED_SUBSCRIBERS': 0,
+            'TOTAL_SUBS': 0,
+            'AVGMOBILEREVENUE': 0.0,
+            'AVGFIXREVENUE': 0.0,
+            'TOTALREVENUE': 0.0,
+            'ARPU': 0.0
+        }, inplace=True)
+        # Preprocess the data only if needed
+        try:
+            data_transformed = preprocessor.transform(df)
+        except Exception as e:
+            st.error(f"Error during data preprocessing: {e}")
+            st.stop()
+        # Initialize a DataFrame to store predictions
+        prediction_results = df.copy()
+        prediction_results['Final Prediction'] = ''
+        prediction_results['Churn Probability'] = 0.0
+        st.write("### Processing Batch Predictions...")
+        for idx in range(df.shape[0]):
+            sample_raw = df.iloc[[idx]]  # Raw data as DataFrame
+            sample_preprocessed = data_transformed[idx].reshape(
+                1, -1)  # Preprocessed data
+            weighted_votes = {'Churn': 0, 'No Churn': 0}
+            models_used = 0
+            for model_name in selected_models:
+                model = models[model_name]
+                includes_preprocessor = models_with_preprocessor[model_name]
+                try:
+                    if includes_preprocessor:
+                        # Model includes preprocessor; use raw data
+                        prediction = model.predict(sample_raw)
+                    else:
+                        # Model does not include preprocessor; use preprocessed data
+                        prediction = model.predict(sample_preprocessed)
+                    models_used += 1
+                except Exception as e:
+                    st.error(
+                        f"Error predicting with model {model_name} on sample {idx}: {e}")
+                    continue
+                churn_prediction = 'Churn' if prediction[0] == 1 else 'No Churn'
+                # Add weighted vote
+                weight = model_accuracies.get(model_name, 1)
+                weighted_votes[churn_prediction] += weight
+            # Check if any models made predictions
+            if models_used == 0:
+                st.error(f"No models could make predictions for sample {idx}.")
+                prediction_results.at[idx, 'Final Prediction'] = 'Unknown'
+                prediction_results.at[idx, 'Churn Probability'] = None
+                continue  # Skip to the next sample
+            # Calculate overall prediction for the sample
+            total_weight = sum(weighted_votes.values())
+            if total_weight == 0:
+                st.error(
+                    f"No valid predictions were made for sample {idx}. Cannot compute churn probability.")
+                prediction_results.at[idx, 'Final Prediction'] = 'Unknown'
+                prediction_results.at[idx, 'Churn Probability'] = None
+                continue  # Skip to the next sample
+            churn_probability = weighted_votes['Churn'] / total_weight
+            overall_prediction = 'Churn' if churn_probability > 0.5 else 'No Churn'
+            # Store results
+            prediction_results.at[idx, 'Final Prediction'] = overall_prediction
+            prediction_results.at[idx, 'Churn Probability'] = churn_probability
+        st.success('Batch predictions completed.')
+        # Display a sample of the results
+        st.write("### Prediction Results")
+        st.dataframe(prediction_results.head())
+        # Allow user to download the results
+        csv = prediction_results.to_csv(index=False).encode('utf-8')
+        st.download_button(
+            label="Download Prediction Results as CSV",
+            data=csv,
+            file_name='batch_predictions.csv',
+            mime='text/csv',
+        )
+    else:
+        st.info('Awaiting CSV file to be uploaded.')
+# Sidebar information
+st.sidebar.write("### Model Information")
+st.sidebar.write(f"Total models available: {len(models)}")
+st.sidebar.write(f"Models selected for prediction: {len(selected_models)}")
+st.sidebar.write("### Model Accuracies")
+for model, accuracy in model_accuracies.items():
+    st.sidebar.write(f"{model}: {accuracy}%")