Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_curve | |
| import xgboost as xgb | |
| import shap | |
| import joblib | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # Set page configuration | |
| st.set_page_config( | |
| page_title="E-commerce Churn Prediction", | |
| page_icon="🛒", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 3rem; | |
| color: #1f77b4; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .prediction-box { | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin: 10px 0px; | |
| background-color: #f0f2f6; | |
| } | |
| .churn-risk-high { | |
| background-color: #ffe6e6; | |
| border-left: 5px solid #ff4d4d; | |
| color: #cc0000; | |
| } | |
| .churn-risk-low { | |
| background-color: #e6ffe6; | |
| border-left: 5px solid #00cc66; | |
| color: #006600; | |
| } | |
| .feature-importance { | |
| background-color: #ffffff; | |
| padding: 15px; | |
| border-radius: 10px; | |
| border: 1px solid #ddd; | |
| } | |
| .recommendation-box { | |
| padding: 15px; | |
| border-radius: 8px; | |
| margin: 10px 0px; | |
| background-color: #f0f8ff; | |
| border-left: 5px solid #4682b4; | |
| color: #2c3e50; | |
| } | |
| .shap-explanation { | |
| background-color: #f8f9fa; | |
| padding: 15px; | |
| border-radius: 8px; | |
| border: 1px solid #dee2e6; | |
| margin: 10px 0px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| class ChurnPredictor: | |
| def __init__(self): | |
| self.model = None | |
| self.preprocessor = None | |
| self.feature_names = None | |
| self.target_name = 'Churn' | |
| def load_data(self): | |
| """Load and preprocess the data""" | |
| url = "https://raw.githubusercontent.com/Ricendfish/M1-Assignment/main/data_ecommerce(in).csv" | |
| df = pd.read_csv(url) | |
| # Remove duplicates | |
| df = df.drop_duplicates() | |
| # Create RecentOrder feature | |
| df['RecentOrder'] = np.where(df['DaySinceLastOrder'] <= 30, 1, 0) | |
| return df | |
| def preprocess_data(self, df): | |
| """Preprocess the data for modeling""" | |
| # Separate features and target | |
| X = df.drop('Churn', axis=1) | |
| y = df['Churn'] | |
| # Define features | |
| numerical_features = ['Tenure', 'WarehouseToHome', 'NumberOfDeviceRegistered', | |
| 'SatisfactionScore', 'NumberOfAddress', 'CashbackAmount'] | |
| categorical_features = ['PreferedOrderCat', 'MaritalStatus'] | |
| binary_features = ['Complain', 'RecentOrder'] | |
| # Preprocessors | |
| numerical_transformer = Pipeline(steps=[ | |
| ('imputer', SimpleImputer(strategy='median')), | |
| ('scaler', StandardScaler()) | |
| ]) | |
| categorical_transformer = Pipeline(steps=[ | |
| ('imputer', SimpleImputer(strategy='most_frequent')), | |
| ('onehot', OneHotEncoder(drop='first', sparse_output=False)) | |
| ]) | |
| binary_transformer = Pipeline(steps=[ | |
| ('imputer', SimpleImputer(strategy='most_frequent')) | |
| ]) | |
| # Column transformer | |
| self.preprocessor = ColumnTransformer( | |
| transformers=[ | |
| ('num', numerical_transformer, numerical_features), | |
| ('cat', categorical_transformer, categorical_features), | |
| ('bin', binary_transformer, binary_features) | |
| ]) | |
| # Fit and transform the data | |
| X_processed = self.preprocessor.fit_transform(X) | |
| # Get feature names after preprocessing | |
| feature_names = numerical_features.copy() | |
| cat_features = self.preprocessor.named_transformers_['cat'].named_steps['onehot'].get_feature_names_out(categorical_features) | |
| feature_names.extend(cat_features) | |
| feature_names.extend(binary_features) | |
| self.feature_names = feature_names | |
| return X_processed, y, feature_names | |
| def train_model(self, X, y): | |
| """Train the prediction model""" | |
| # Split the data | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42, stratify=y | |
| ) | |
| # Train XGBoost model | |
| self.model = xgb.XGBClassifier( | |
| n_estimators=100, | |
| max_depth=6, | |
| learning_rate=0.1, | |
| random_state=42 | |
| ) | |
| self.model.fit(X_train, y_train) | |
| # Calculate performance metrics | |
| y_pred = self.model.predict(X_test) | |
| y_pred_proba = self.model.predict_proba(X_test)[:, 1] | |
| accuracy = self.model.score(X_test, y_test) | |
| auc_score = roc_auc_score(y_test, y_pred_proba) | |
| return X_test, y_test, y_pred, y_pred_proba, accuracy, auc_score | |
| def predict_churn(self, input_data): | |
| """Predict churn for new data""" | |
| if self.model is None or self.preprocessor is None: | |
| raise ValueError("Model not trained yet!") | |
| # Preprocess input data | |
| input_processed = self.preprocessor.transform(input_data) | |
| # Make prediction | |
| prediction = self.model.predict(input_processed) | |
| probability = self.model.predict_proba(input_processed)[:, 1] | |
| return prediction[0], probability[0] | |
| def what_if_analysis(self, base_data, feature_to_change, values_range): | |
| """Perform what-if analysis by changing one feature""" | |
| probabilities = [] | |
| for value in values_range: | |
| modified_data = base_data.copy() | |
| modified_data[feature_to_change] = value | |
| _, probability = self.predict_churn(modified_data) | |
| probabilities.append(probability) | |
| return probabilities | |
| def explain_prediction(self, input_data): | |
| """Generate SHAP explanation for a prediction""" | |
| if self.model is None or self.preprocessor is None: | |
| raise ValueError("Model not trained yet!") | |
| # Preprocess input data | |
| input_processed = self.preprocessor.transform(input_data) | |
| # Create SHAP explainer | |
| explainer = shap.TreeExplainer(self.model) | |
| shap_values = explainer.shap_values(input_processed) | |
| # For binary classification, shap_values might be a list with two arrays | |
| if isinstance(shap_values, list): | |
| shap_values = shap_values[1] # Use the positive class (churn) | |
| # Get feature names | |
| feature_names = self.feature_names | |
| return shap_values[0], explainer.expected_value[1] if isinstance(explainer.expected_value, list) else explainer.expected_value, feature_names | |
| def main(): | |
| # Header | |
| st.markdown('<h1 class="main-header">🛒 E-commerce Customer Churn Prediction</h1>', | |
| unsafe_allow_html=True) | |
| # Initialize predictor | |
| predictor = ChurnPredictor() | |
| # Sidebar | |
| st.sidebar.title("Navigation") | |
| app_mode = st.sidebar.selectbox("Choose App Mode", | |
| ["Data Overview", "EDA", "Churn Prediction", "What-If Analysis", "Model Insights"]) | |
| # Load data | |
| with st.spinner('Loading data...'): | |
| df = predictor.load_data() | |
| if app_mode == "Data Overview": | |
| show_data_overview(df) | |
| elif app_mode == "EDA": | |
| show_eda(df) | |
| elif app_mode == "Churn Prediction": | |
| show_churn_prediction(predictor, df) | |
| elif app_mode == "What-If Analysis": | |
| show_what_if_analysis(predictor, df) | |
| elif app_mode == "Model Insights": | |
| show_model_insights(predictor, df) | |
| def show_data_overview(df): | |
| st.header("📊 Data Overview") | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| st.subheader("Dataset Preview") | |
| st.dataframe(df.head(10), use_container_width=True) | |
| with col2: | |
| st.subheader("Dataset Info") | |
| st.write(f"**Shape:** {df.shape}") | |
| st.write(f"**Columns:** {len(df.columns)}") | |
| st.write(f"**Missing Values:** {df.isnull().sum().sum()}") | |
| # Churn distribution | |
| churn_count = df['Churn'].value_counts() | |
| churn_rate = churn_count[1] | |
| non_churn_rate = churn_count[0] | |
| st.metric("Customers Likely to Churn", f"{churn_rate}") | |
| st.metric("Customers Not Likely to Churn", f"{non_churn_rate}") | |
| st.subheader("Data Description") | |
| st.dataframe(df.describe(), use_container_width=True) | |
| def show_eda(df): | |
| st.header("📈 Exploratory Data Analysis") | |
| # Churn distribution | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Convert churn to meaningful labels | |
| df_churn_display = df.copy() | |
| df_churn_display['Churn_Label'] = df_churn_display['Churn'].map({0: 'Not Likely to Churn', 1: 'Likely to Churn'}) | |
| fig = px.pie(df_churn_display, names='Churn_Label', title='Churn Distribution', | |
| color='Churn_Label', | |
| color_discrete_map={'Not Likely to Churn':'lightblue', 'Likely to Churn':'lightcoral'}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| churn_by_marital = df.groupby('MaritalStatus')['Churn'].mean().reset_index() | |
| churn_by_marital['Churn_Rate'] = churn_by_marital['Churn'] | |
| fig = px.bar(churn_by_marital, x='MaritalStatus', y='Churn_Rate', | |
| title='Churn Rate by Marital Status', color='MaritalStatus') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Line graphs instead of box plots | |
| st.subheader("Trend Analysis") | |
| # Line graph 1: Churn rate vs Satisfaction Score | |
| satisfaction_churn = df.groupby('SatisfactionScore')['Churn'].mean().reset_index() | |
| fig1 = px.line(satisfaction_churn, x='SatisfactionScore', y='Churn', | |
| title='Churn Rate vs Satisfaction Score', | |
| markers=True) | |
| fig1.update_layout(xaxis_title="Satisfaction Score", yaxis_title="Churn Rate") | |
| st.plotly_chart(fig1, use_container_width=True) | |
| # Line graph 2: Churn rate vs Tenure | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| tenure_churn = df.groupby('Tenure')['Churn'].mean().reset_index() | |
| fig2 = px.line(tenure_churn, x='Tenure', y='Churn', | |
| title='Churn Rate vs Customer Tenure', | |
| markers=True) | |
| fig2.update_layout(xaxis_title="Tenure (months)", yaxis_title="Churn Rate") | |
| st.plotly_chart(fig2, use_container_width=True) | |
| with col2: | |
| # Line graph 3: Churn rate vs Cashback Amount (binned) | |
| df_cashback_binned = df.copy() | |
| df_cashback_binned['Cashback_Bin'] = pd.cut(df_cashback_binned['CashbackAmount'], bins=10) | |
| cashback_churn = df_cashback_binned.groupby('Cashback_Bin')['Churn'].mean().reset_index() | |
| cashback_churn['Cashback_Mid'] = cashback_churn['Cashback_Bin'].apply(lambda x: x.mid) | |
| fig3 = px.line(cashback_churn, x='Cashback_Mid', y='Churn', | |
| title='Churn Rate vs Cashback Amount', | |
| markers=True) | |
| fig3.update_layout(xaxis_title="Cashback Amount", yaxis_title="Churn Rate") | |
| st.plotly_chart(fig3, use_container_width=True) | |
| def show_churn_prediction(predictor, df): | |
| st.header("🔮 Churn Prediction") | |
| # Train model if not already trained | |
| if predictor.model is None: | |
| with st.spinner('Training model...'): | |
| X_processed, y, feature_names = predictor.preprocess_data(df) | |
| X_test, y_test, y_pred, y_pred_proba, accuracy, auc_score = predictor.train_model(X_processed, y) | |
| # Input form | |
| st.subheader("Enter Customer Details") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| tenure = st.slider("Tenure (months)", 0, 60, 12) | |
| warehouse_to_home = st.slider("Distance to Warehouse (km)", 5, 50, 15) | |
| num_devices = st.slider("Number of Devices Registered", 1, 6, 3) | |
| satisfaction = st.slider("Satisfaction Score", 1, 5, 3) | |
| with col2: | |
| num_addresses = st.slider("Number of Addresses", 1, 20, 4) | |
| cashback = st.slider("Cashback Amount", 0.0, 300.0, 150.0) | |
| days_since_order = st.slider("Days Since Last Order", 0, 60, 7) | |
| # Changed complaint to meaningful labels | |
| complain_option = st.selectbox("Complaint Status", ["No Complaint", "Complaint Filed"]) | |
| complain = 1 if complain_option == "Complaint Filed" else 0 | |
| with col3: | |
| # Simplified category selection - you can remove this if not needed | |
| preferred_category = st.selectbox("Preferred Category", | |
| ['Electronics', 'Fashion', 'Grocery', 'Home & Kitchen', 'Others']) | |
| marital_status = st.selectbox("Marital Status", ['Single', 'Married', 'Divorced']) | |
| # Map simplified categories to original format if needed | |
| category_mapping = { | |
| 'Electronics': 'Laptop & Accessory', | |
| 'Fashion': 'Fashion', | |
| 'Grocery': 'Grocery', | |
| 'Home & Kitchen': 'Others', | |
| 'Others': 'Others' | |
| } | |
| # Create input dataframe | |
| input_data = pd.DataFrame({ | |
| 'Tenure': [tenure], | |
| 'WarehouseToHome': [warehouse_to_home], | |
| 'NumberOfDeviceRegistered': [num_devices], | |
| 'PreferedOrderCat': [category_mapping[preferred_category]], | |
| 'SatisfactionScore': [satisfaction], | |
| 'MaritalStatus': [marital_status], | |
| 'NumberOfAddress': [num_addresses], | |
| 'Complain': [complain], | |
| 'DaySinceLastOrder': [days_since_order], | |
| 'CashbackAmount': [cashback], | |
| 'RecentOrder': [1 if days_since_order <= 30 else 0] | |
| }) | |
| if st.button("Predict Churn", type="primary"): | |
| try: | |
| prediction, probability = predictor.predict_churn(input_data) | |
| # Display results | |
| st.subheader("Prediction Results") | |
| if prediction == 1: | |
| risk_class = "churn-risk-high" | |
| risk_text = "LIKELY TO CHURN" | |
| risk_color = "red" | |
| emoji = "🔴" | |
| prediction_label = "Likely to Churn" | |
| else: | |
| risk_class = "churn-risk-low" | |
| risk_text = "NOT LIKELY TO CHURN" | |
| risk_color = "green" | |
| emoji = "🟢" | |
| prediction_label = "Not Likely to Churn" | |
| st.markdown(f""" | |
| <div class="prediction-box {risk_class}"> | |
| <h3>{emoji} Churn Prediction: {risk_text}</h3> | |
| <p><strong>Prediction:</strong> {prediction_label}</p> | |
| <p><strong>Probability:</strong> {probability:.2f}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Generate SHAP explanation | |
| with st.spinner('Analyzing factors...'): | |
| shap_values, expected_value, feature_names = predictor.explain_prediction(input_data) | |
| # Create a DataFrame for SHAP values | |
| shap_df = pd.DataFrame({ | |
| 'Feature': feature_names, | |
| 'SHAP Value': shap_values | |
| }) | |
| # Sort by absolute SHAP value | |
| shap_df['Abs_SHAP'] = np.abs(shap_df['SHAP Value']) | |
| shap_df = shap_df.sort_values('Abs_SHAP', ascending=False).head(10) | |
| # Create horizontal bar chart | |
| st.subheader("📊 Factors Influencing Prediction") | |
| fig = px.bar(shap_df, | |
| x='SHAP Value', | |
| y='Feature', | |
| orientation='h', | |
| title='Top Factors Influencing Prediction', | |
| color='SHAP Value', | |
| color_continuous_scale='RdBu_r', | |
| range_color=[-max(np.abs(shap_df['SHAP Value'])), max(np.abs(shap_df['SHAP Value']))]) | |
| fig.update_layout(yaxis={'categoryorder':'total ascending'}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Display key factors in a more user-friendly way | |
| st.subheader("🔑 Key Factors") | |
| # Get top 5 factors | |
| top_factors = shap_df.head(5) | |
| for _, row in top_factors.iterrows(): | |
| factor_name = row['Feature'] | |
| impact = row['SHAP Value'] | |
| # Convert feature names to more readable format | |
| readable_names = { | |
| 'Tenure': 'Customer Tenure', | |
| 'SatisfactionScore': 'Satisfaction Score', | |
| 'CashbackAmount': 'Cashback Amount', | |
| 'Complain': 'Complaint Status', | |
| 'WarehouseToHome': 'Distance to Warehouse', | |
| 'NumberOfDeviceRegistered': 'Number of Devices', | |
| 'NumberOfAddress': 'Number of Addresses', | |
| 'RecentOrder': 'Recent Order Activity', | |
| 'PreferedOrderCat_Mobile': 'Preferred Category: Mobile', | |
| 'PreferedOrderCat_Laptop & Accessory': 'Preferred Category: Electronics', | |
| 'PreferedOrderCat_Fashion': 'Preferred Category: Fashion', | |
| 'PreferedOrderCat_Grocery': 'Preferred Category: Grocery', | |
| 'MaritalStatus_Married': 'Marital Status: Married', | |
| 'MaritalStatus_Single': 'Marital Status: Single' | |
| } | |
| display_name = readable_names.get(factor_name, factor_name) | |
| if impact > 0: | |
| st.write(f"🔴 **{display_name}** increased churn risk") | |
| else: | |
| st.write(f"🟢 **{display_name}** decreased churn risk") | |
| # Recommendations | |
| st.subheader("📋 Recommendations") | |
| if prediction == 1: | |
| st.markdown(""" | |
| <div class="recommendation-box"> | |
| <h4>🛑 Customer Retention Actions Recommended:</h4> | |
| <ul> | |
| <li><strong>Improve the service:</strong> Identify the causes of recent complaints</li> | |
| <li><strong>Collect feedback:</strong> Carry out surveys in order to identify service issues</li> | |
| <li><strong>Cashback:</strong> Increase cashback for loyal customers</li> | |
| <li><strong>Loyalty programs:</strong> Special benefits and discounts for longterm customers</li> | |
| </ul> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.markdown(""" | |
| <div class="recommendation-box"> | |
| <h4>✅ Customer Retention Actions:</h4> | |
| <ul> | |
| <li><strong>Maintening current customers:</strong> Use loyalty programs, coupons</li> | |
| <li><strong>Constant checkins:</strong> Send short surveys to prevent complaints</li> | |
| <li><strong>Keep engagement:</strong> through special offers, bundles, time-limited offers</li> | |
| </ul> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| except Exception as e: | |
| st.error(f"Error making prediction: {str(e)}") | |
| def show_what_if_analysis(predictor, df): | |
| st.header("🔍 What-If Analysis") | |
| st.markdown("Explore how changing different factors affects churn likelihood") | |
| # Train model if not already trained | |
| if predictor.model is None: | |
| with st.spinner('Training model...'): | |
| X_processed, y, feature_names = predictor.preprocess_data(df) | |
| predictor.train_model(X_processed, y) | |
| # Simplified Base Customer Profile | |
| st.subheader("Base Customer Profile") | |
| # Use columns for better layout | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| base_tenure = st.slider("Base Tenure (months)", 0, 60, 12, key="base_tenure") | |
| base_satisfaction = st.slider("Base Satisfaction Score", 1, 5, 3, key="base_satisfaction") | |
| with col2: | |
| base_cashback = st.slider("Base Cashback Amount", 0.0, 300.0, 150.0, key="base_cashback") | |
| base_warehouse_dist = st.slider("Base Warehouse Distance", 5, 50, 15, key="base_dist") | |
| base_complain = st.selectbox("Base Complaint Status", ["No Complaint", "Complaint Filed"], key="base_complain") | |
| base_complain_val = 1 if base_complain == "Complaint Filed" else 0 | |
| # Create base data | |
| base_data = pd.DataFrame({ | |
| 'Tenure': [base_tenure], | |
| 'WarehouseToHome': [base_warehouse_dist], | |
| 'NumberOfDeviceRegistered': [3], | |
| 'PreferedOrderCat': ['Laptop & Accessory'], | |
| 'SatisfactionScore': [base_satisfaction], | |
| 'MaritalStatus': ['Single'], | |
| 'NumberOfAddress': [4], | |
| 'Complain': [base_complain_val], | |
| 'DaySinceLastOrder': [7], | |
| 'CashbackAmount': [base_cashback], | |
| 'RecentOrder': [1] | |
| }) | |
| # What-if scenario | |
| st.subheader("What-If Scenario") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| feature_to_test = st.selectbox( | |
| "Feature to Analyze", | |
| ['SatisfactionScore', 'CashbackAmount', 'Tenure', 'WarehouseToHome'] | |
| ) | |
| with col2: | |
| if feature_to_test == 'SatisfactionScore': | |
| test_range = st.slider("Test Range", 1, 5, (1, 5)) | |
| values_range = list(range(test_range[0], test_range[1] + 1)) | |
| elif feature_to_test == 'CashbackAmount': | |
| test_range = st.slider("Test Range", 0, 300, (0, 300)) | |
| values_range = list(range(test_range[0], test_range[1] + 1, 30)) | |
| elif feature_to_test == 'Tenure': | |
| test_range = st.slider("Test Range", 0, 60, (0, 60)) | |
| values_range = list(range(test_range[0], test_range[1] + 1, 6)) | |
| else: # WarehouseToHome | |
| test_range = st.slider("Test Range", 5, 50, (5, 50)) | |
| values_range = list(range(test_range[0], test_range[1] + 1, 5)) | |
| if st.button("Run What-If Analysis"): | |
| with st.spinner('Analyzing scenarios...'): | |
| probabilities = predictor.what_if_analysis(base_data, feature_to_test, values_range) | |
| # Create what-if analysis chart | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=values_range, | |
| y=probabilities, | |
| mode='lines+markers', | |
| name='Churn Probability', | |
| line=dict(color='red', width=3), | |
| marker=dict(size=8) | |
| )) | |
| # Add threshold line | |
| fig.add_hline(y=0.5, line_dash="dash", line_color="orange", | |
| annotation_text="Decision Threshold", | |
| annotation_position="bottom right") | |
| fig.update_layout( | |
| title=f'What-If Analysis: Churn Probability vs {feature_to_test}', | |
| xaxis_title=feature_to_test, | |
| yaxis_title='Churn Probability', | |
| hovermode='x unified', | |
| height=500 | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Insights | |
| st.subheader("📊 Analysis Insights") | |
| current_prob = predictor.predict_churn(base_data)[1] | |
| min_prob = min(probabilities) | |
| max_prob = max(probabilities) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Current Probability", f"{current_prob:.2f}") | |
| with col2: | |
| st.metric("Minimum Probability", f"{min_prob:.2f}") | |
| with col3: | |
| st.metric("Maximum Probability", f"{max_prob:.2f}") | |
| # Business recommendations based on analysis | |
| if feature_to_test == 'SatisfactionScore': | |
| st.info("**💡 Insight:** Improving satisfaction score from 1 to 5 can reduce churn probability by " | |
| f"{(max(probabilities) - min(probabilities)):.2%}") | |
| elif feature_to_test == 'CashbackAmount': | |
| st.info("**💡 Insight:** Higher cashback amounts show diminishing returns on churn reduction. " | |
| "Optimal range appears to be between 150-200 units.") | |
| def show_model_insights(predictor, df): | |
| st.header("🤖 Model Insights") | |
| # Train model if not already trained | |
| if predictor.model is None: | |
| with st.spinner('Training model and generating insights...'): | |
| X_processed, y, feature_names = predictor.preprocess_data(df) | |
| X_test, y_test, y_pred, y_pred_proba, accuracy, auc_score = predictor.train_model(X_processed, y) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Model Accuracy", f"{accuracy:.1%}") | |
| st.metric("AUC Score", f"{auc_score:.3f}") | |
| with col2: | |
| # Display the provided confusion matrix image | |
| st.subheader("Confusion Matrix") | |
| IMAGE_URL = "https://raw.githubusercontent.com/Ricendfish/M1-Assignment/main/image.png" | |
| st.image(IMAGE_URL, caption="Final Confusion Matrix (Threshold = 0.4150)") | |
| # Feature Importance | |
| st.subheader("Feature Importance") | |
| if hasattr(predictor.model, 'feature_importances_'): | |
| feature_importance = pd.DataFrame({ | |
| 'feature': predictor.feature_names, | |
| 'importance': predictor.model.feature_importances_ | |
| }).sort_values('importance', ascending=True) | |
| fig = px.bar(feature_importance.tail(10), | |
| x='importance', y='feature', | |
| title='Top 10 Most Important Features', | |
| orientation='h', | |
| color='importance', | |
| color_continuous_scale='Viridis') | |
| st.plotly_chart(fig, use_container_width=True) | |
| if __name__ == "__main__": | |
| main() |