| """ |
| ╔══════════════════════════════════════════════════════════════════════════════╗ |
| ║ PHASE 9: INTERACTIVE STREAMLIT DASHBOARD ║ |
| ║ Customer Churn Prediction & Sales Intelligence Dashboard ║ |
| ╚══════════════════════════════════════════════════════════════════════════════╝ |
| |
| ═══════════════════════════════════════════════════════════════════════════════ |
| HOW TO RUN THIS DASHBOARD: |
| ═══════════════════════════════════════════════════════════════════════════════ |
| 1. Install dependencies: pip install streamlit plotly |
| 2. Run: streamlit run dashboard.py |
| 3. Open browser at http://localhost:8501 |
| |
| ═══════════════════════════════════════════════════════════════════════════════ |
| DASHBOARD ARCHITECTURE: |
| ═══════════════════════════════════════════════════════════════════════════════ |
| This dashboard connects ALL phases into a single business-facing product: |
| |
| • Overview Tab: KPIs, churn rate, revenue, at-risk alerts |
| • Predict Tab: Interactive churn prediction (input → risk score + reasons) |
| • Segments Tab: Customer segments (VIP/Loyal/At Risk/Lost) with drill-down |
| • Sales Tab: Time-series trends, top products, growth rates |
| • Recommendations: Priority queue of customers needing action |
| • Model Health: Model performance, feature importance, data drift |
| """ |
|
|
| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import plotly.express as px |
| import plotly.graph_objects as go |
| from plotly.subplots import make_subplots |
| import json |
| import joblib |
| from pathlib import Path |
| import warnings |
| warnings.filterwarnings("ignore") |
|
|
| |
| |
| |
| st.set_page_config( |
| page_title="Customer Churn Intelligence Dashboard", |
| page_icon="📊", |
| layout="wide", |
| initial_sidebar_state="expanded" |
| ) |
|
|
| |
| |
| |
| st.markdown(""" |
| <style> |
| .main-header { |
| font-size: 2.5rem; |
| font-weight: 700; |
| color: #1f2937; |
| margin-bottom: 0.5rem; |
| } |
| .sub-header { |
| font-size: 1.2rem; |
| color: #6b7280; |
| margin-bottom: 2rem; |
| } |
| .kpi-card { |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| border-radius: 12px; |
| padding: 1.5rem; |
| color: white; |
| text-align: center; |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); |
| } |
| .kpi-card-green { |
| background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%); |
| } |
| .kpi-card-red { |
| background: linear-gradient(135deg, #cb2d3e 0%, #ef473a 100%); |
| } |
| .kpi-card-orange { |
| background: linear-gradient(135deg, #f7971e 0%, #ffd200 100%); |
| } |
| .kpi-value { |
| font-size: 2.2rem; |
| font-weight: 700; |
| margin: 0; |
| } |
| .kpi-label { |
| font-size: 0.9rem; |
| opacity: 0.9; |
| margin-top: 0.3rem; |
| } |
| .alert-critical { |
| background-color: #fee2e2; |
| border-left: 4px solid #dc2626; |
| padding: 1rem; |
| border-radius: 4px; |
| margin: 0.5rem 0; |
| } |
| .alert-warning { |
| background-color: #fef3c7; |
| border-left: 4px solid #f59e0b; |
| padding: 1rem; |
| border-radius: 4px; |
| margin: 0.5rem 0; |
| } |
| .stTabs [data-baseweb="tab-list"] { |
| gap: 8px; |
| } |
| .stTabs [data-baseweb="tab"] { |
| padding: 10px 20px; |
| border-radius: 8px 8px 0 0; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| |
| |
| @st.cache_data |
| def load_data(): |
| """Load all pre-computed datasets from previous phases.""" |
| data = {} |
| |
| |
| data["customers"] = pd.read_csv("/app/deploy/data/customer_data.csv") |
| data["transactions"] = pd.read_csv("/app/deploy/data/transaction_data.csv", parse_dates=["transaction_date"]) |
| data["products"] = pd.read_csv("/app/deploy/data/product_data.csv") |
| |
| |
| data["model_preds"] = pd.read_csv("/app/deploy/data/all_model_predictions.csv") |
| data["test_risk"] = pd.read_csv("/app/outputs/phase4/test_set_risk_scores.csv") |
| |
| |
| data["explanations"] = pd.read_csv("/app/deploy/data/customer_churn_explanations.csv") |
| |
| |
| data["segmented"] = pd.read_csv("/app/deploy/data/customers_segmented.csv") |
| data["segment_summary"] = pd.read_csv("/app/deploy/data/segment_summary.csv") |
| |
| |
| data["monthly_sales"] = pd.read_csv("/app/deploy/data/monthly_sales.csv", parse_dates=["period"]) |
| data["product_revenue"] = pd.read_csv("/app/deploy/data/product_revenue.csv") |
| data["category_revenue"] = pd.read_csv("/app/deploy/data/category_revenue.csv") |
| |
| with open("/app/deploy/data/sales_kpis.json", "r") as f: |
| data["sales_kpis"] = json.load(f) |
| |
| |
| data["recommendations"] = pd.read_csv("/app/deploy/data/customer_recommendations.csv") |
| data["priority_queue"] = pd.read_csv("/app/deploy/data/priority_queue_top100.csv") |
| |
| with open("/app/deploy/data/campaign_roi_summary.json", "r") as f: |
| data["campaign_roi"] = json.load(f) |
| |
| |
| try: |
| data["model"] = joblib.load("/app/deploy/models/best_churn_model.joblib") |
| data["scaler"] = joblib.load("/app/deploy/models/scaler.joblib") |
| data["feature_names"] = joblib.load("/app/deploy/models/feature_names.joblib") |
| except Exception as e: |
| st.warning(f"Model loading issue: {e}") |
| data["model"] = None |
| |
| return data |
|
|
| try: |
| data = load_data() |
| DATA_LOADED = True |
| except Exception as e: |
| st.error(f"Failed to load data: {e}") |
| DATA_LOADED = False |
| data = {} |
|
|
| |
| |
| |
| def get_churn_risk_color(score): |
| if score < 25: return "#2ecc71", "Low" |
| elif score < 50: return "#f1c40f", "Medium" |
| elif score < 75: return "#e67e22", "High" |
| else: return "#e74c3c", "Critical" |
|
|
| def format_currency(val): |
| return f"${val:,.0f}" if val >= 1000 else f"${val:,.2f}" |
|
|
| |
| |
| |
| with st.sidebar: |
| st.image("https://img.icons8.com/color/96/analytics.png", width=60) |
| st.title("Churn Intelligence") |
| st.markdown("---") |
| st.markdown("**Navigation**") |
| |
| page = st.radio("Go to:", [ |
| "📊 Overview & KPIs", |
| "🔮 Churn Predictor", |
| "👥 Customer Segments", |
| "📈 Sales Analytics", |
| "🎯 Action Recommendations", |
| "🧠 Model Performance" |
| ]) |
| |
| st.markdown("---") |
| st.markdown("**About**") |
| st.info(""" |
| This dashboard combines churn prediction, customer segmentation, |
| sales analytics, and automated recommendations into a single |
| business intelligence platform. |
| |
| **Data**: 5,000 customers | 33,037 transactions | 150 products |
| """) |
| |
| if st.button("🔄 Refresh Data"): |
| st.cache_data.clear() |
| st.rerun() |
|
|
| |
| |
| |
|
|
| |
| |
| |
| if page == "📊 Overview & KPIs": |
| st.markdown('<div class="main-header">Customer Churn Intelligence Dashboard</div>', unsafe_allow_html=True) |
| st.markdown('<div class="sub-header">Real-time insights into customer health, revenue, and retention</div>', unsafe_allow_html=True) |
| |
| if not DATA_LOADED: |
| st.error("Data not loaded. Please run all phases first.") |
| st.stop() |
| |
| |
| col1, col2, col3, col4, col5 = st.columns(5) |
| |
| with col1: |
| churn_rate = data["customers"]["churned"].mean() * 100 |
| st.markdown(f""" |
| <div class="kpi-card kpi-card-red"> |
| <div class="kpi-value">{churn_rate:.1f}%</div> |
| <div class="kpi-label">Churn Rate</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| with col2: |
| total_revenue = data["transactions"]["total_amount"].sum() |
| st.markdown(f""" |
| <div class="kpi-card"> |
| <div class="kpi-value">{format_currency(total_revenue)}</div> |
| <div class="kpi-label">Total Revenue</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| with col3: |
| total_customers = len(data["customers"]) |
| st.markdown(f""" |
| <div class="kpi-card kpi-card-green"> |
| <div class="kpi-value">{total_customers:,}</div> |
| <div class="kpi-label">Total Customers</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| with col4: |
| at_risk = len(data["recommendations"][data["recommendations"]["priority"] >= 7]) |
| st.markdown(f""" |
| <div class="kpi-card kpi-card-orange"> |
| <div class="kpi-value">{at_risk:,}</div> |
| <div class="kpi-label">High Priority Actions</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| with col5: |
| revenue_at_stake = data["recommendations"]["revenue_at_stake"].sum() |
| st.markdown(f""" |
| <div class="kpi-card kpi-card-red"> |
| <div class="kpi-value">{format_currency(revenue_at_stake)}</div> |
| <div class="kpi-label">Revenue at Stake</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| st.markdown("---") |
| |
| |
| st.subheader("🚨 Live Alerts") |
| |
| |
| critical_count = len(data["recommendations"][data["recommendations"]["risk_level"] == "Critical"]) |
| vip_critical = len(data["recommendations"][ |
| (data["recommendations"]["segment_label"] == "VIP") & |
| (data["recommendations"]["risk_level"] == "Critical") |
| ]) |
| |
| col_alert1, col_alert2 = st.columns(2) |
| |
| with col_alert1: |
| st.markdown(f""" |
| <div class="alert-critical"> |
| <strong>⚠️ {critical_count:,} customers at CRITICAL churn risk</strong><br> |
| Immediate action required. These customers have >75% churn probability. |
| </div> |
| """, unsafe_allow_html=True) |
| |
| with col_alert2: |
| st.markdown(f""" |
| <div class="alert-warning"> |
| <strong>💎 {vip_critical:,} VIP customers need urgent attention</strong><br> |
| High-value customers at risk. Executive intervention recommended. |
| </div> |
| """, unsafe_allow_html=True) |
| |
| |
| st.markdown("---") |
| st.subheader("📋 Quick Insights") |
| |
| col_q1, col_q2, col_q3 = st.columns(3) |
| |
| with col_q1: |
| seg_dist = data["segmented"]["segment_label"].value_counts() |
| fig = px.pie( |
| values=seg_dist.values, names=seg_dist.index, |
| color=seg_dist.index, |
| color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71", |
| "At Risk": "#e67e22", "Lost": "#e74c3c"}, |
| hole=0.4 |
| ) |
| fig.update_layout(showlegend=True, margin=dict(t=0, b=0, l=0, r=0), |
| height=300, title="Customer Segments") |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with col_q2: |
| monthly = data["monthly_sales"].tail(12) |
| fig = px.area( |
| monthly, x="period", y="revenue", |
| color_discrete_sequence=["#3498db"], |
| title="Revenue Trend (Last 12 Months)" |
| ) |
| fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0)) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with col_q3: |
| risk_dist = data["recommendations"]["risk_level"].value_counts() |
| risk_order = ["Low", "Medium", "High", "Critical"] |
| risk_dist = risk_dist.reindex(risk_order).fillna(0) |
| fig = px.bar( |
| x=risk_dist.index, y=risk_dist.values, |
| color=risk_dist.index, |
| color_discrete_map={"Low": "#2ecc71", "Medium": "#f1c40f", |
| "High": "#e67e22", "Critical": "#e74c3c"}, |
| title="Risk Distribution" |
| ) |
| fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0), showlegend=False) |
| st.plotly_chart(fig, use_container_width=True) |
|
|
| |
| |
| |
| elif page == "🔮 Churn Predictor": |
| st.markdown('<div class="main-header">Churn Risk Predictor</div>', unsafe_allow_html=True) |
| st.markdown('<div class="sub-header">Input customer data to predict churn probability and get actionable insights</div>', unsafe_allow_html=True) |
| |
| if not DATA_LOADED or data.get("model") is None: |
| st.warning("⚠️ Model not available. Please run Phase 4 first.") |
| st.stop() |
| |
| |
| pred_mode = st.radio("Prediction Mode:", ["Single Customer", "Batch Upload (CSV)"], horizontal=True) |
| |
| if pred_mode == "Single Customer": |
| |
| sample_customers = data["customers"]["customer_id"].sample(5, random_state=42).tolist() |
| selected_customer = st.selectbox("Select a customer (or enter ID manually):", |
| ["Manual Entry"] + sample_customers) |
| |
| if selected_customer != "Manual Entry": |
| |
| cust_row = data["customers"][data["customers"]["customer_id"] == selected_customer].iloc[0] |
| default_age = int(cust_row["age"]) |
| default_tickets = int(cust_row["support_tickets_6m"]) |
| default_email = float(cust_row["email_open_rate"]) |
| default_web = int(cust_row["web_sessions_30d"]) |
| else: |
| default_age, default_tickets, default_email, default_web = 35, 1, 0.3, 5 |
| |
| st.markdown("---") |
| |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| st.subheader("📋 Customer Profile") |
| age = st.slider("Age", 18, 80, default_age) |
| gender = st.selectbox("Gender", ["M", "F", "Other"]) |
| region = st.selectbox("Region", ["North", "South", "East", "West", "Central"]) |
| segment = st.selectbox("Business Segment", ["Retail", "SMB", "Enterprise"]) |
| channel = st.selectbox("Acquisition Channel", ["Online", "In-Store", "Mobile App", "Phone"]) |
| ltv_baseline = st.number_input("Baseline LTV ($)", 0.0, 15000.0, 500.0) |
| |
| with col2: |
| st.subheader("📊 Behavioral Data") |
| support_tickets = st.slider("Support Tickets (6m)", 0, 10, default_tickets) |
| email_rate = st.slider("Email Open Rate", 0.0, 1.0, default_email) |
| web_sessions = st.slider("Web Sessions (30d)", 0, 30, default_web) |
| recency = st.slider("Days Since Last Purchase", 0, 500, 120) |
| total_spent = st.number_input("Total Spent ($)", 0.0, 20000.0, 800.0) |
| txn_count = st.slider("Transaction Count", 0, 50, 8) |
| |
| st.markdown("---") |
| |
| if st.button("🔮 Predict Churn Risk", type="primary", use_container_width=True): |
| |
| |
| feature_names = data["feature_names"] |
| |
| |
| tenure_days = 365 |
| purchase_frequency = txn_count / (tenure_days / 30 + 1) |
| avg_txn_value = total_spent / (txn_count + 1) |
| rfm_score = 8 |
| engagement = (email_rate * 0.35 + min(web_sessions/20, 1) * 0.4 + |
| (1 - min(support_tickets/5, 1)) * 0.25) |
| |
| input_dict = { |
| "age": age, |
| "lifetime_value_baseline": ltv_baseline, |
| "support_tickets_6m": support_tickets, |
| "email_open_rate": email_rate, |
| "web_sessions_30d": web_sessions, |
| "total_spent": total_spent, |
| "txn_count": txn_count, |
| "avg_order_value": avg_txn_value, |
| "max_order_value": avg_txn_value * 1.5, |
| "min_order_value": avg_txn_value * 0.5, |
| "std_order_value": avg_txn_value * 0.3, |
| "total_quantity": txn_count * 2, |
| "total_discount": txn_count * 0.02, |
| "unique_products": min(txn_count, 20), |
| "recency_days": recency, |
| "R_score": max(1, 6 - recency // 60), |
| "F_score": min(5, max(1, txn_count // 3)), |
| "M_score": min(5, max(1, int(total_spent // 500))), |
| "RFM_score": 10, |
| "tenure_days": tenure_days, |
| "days_between_first_last": tenure_days, |
| "purchase_frequency": purchase_frequency, |
| "avg_transaction_value": avg_txn_value, |
| "engagement_score": engagement, |
| "spending_trend_ratio": 1.0, |
| "purchase_regularity": 1.0, |
| "category_diversity": min(txn_count, 10), |
| "discount_sensitivity": 0.02, |
| } |
| |
| |
| for g in ["F", "M", "Other"]: |
| input_dict[f"gender_{g}"] = 1 if gender == g else 0 |
| |
| |
| for r in ["Central", "East", "North", "South", "West"]: |
| input_dict[f"region_{r}"] = 1 if region == r else 0 |
| |
| |
| for s in ["Enterprise", "Retail", "SMB"]: |
| input_dict[f"segment_{s}"] = 1 if segment == s else 0 |
| |
| |
| for c in ["In-Store", "Mobile App", "Online", "Phone"]: |
| input_dict[f"acquisition_channel_{c}"] = 1 if channel == c else 0 |
| |
| |
| X_input = pd.DataFrame([{k: input_dict.get(k, 0) for k in feature_names}]) |
| X_scaled = data["scaler"].transform(X_input) |
| |
| |
| proba = data["model"].predict_proba(X_scaled)[0][1] |
| risk_score = proba * 100 |
| color, risk_label = get_churn_risk_color(risk_score) |
| prediction = 1 if risk_score >= 50 else 0 |
| |
| |
| st.markdown("---") |
| |
| res_col1, res_col2, res_col3 = st.columns([1, 2, 1]) |
| |
| with res_col2: |
| st.markdown(f""" |
| <div style="text-align: center; padding: 2rem; background: {color}20; border-radius: 16px; border: 3px solid {color};"> |
| <div style="font-size: 1.2rem; color: {color}; font-weight: 600;">Churn Risk: {risk_label}</div> |
| <div style="font-size: 4rem; font-weight: 800; color: {color}; margin: 0.5rem 0;">{risk_score:.1f}%</div> |
| <div style="font-size: 1rem; color: #666;">Predicted Outcome: {'🔴 CHURN' if prediction == 1 else '🟢 RETAIN'}</div> |
| </div> |
| """, unsafe_allow_html=True) |
| |
| |
| fig = go.Figure(go.Indicator( |
| mode="gauge+number", |
| value=risk_score, |
| domain={'x': [0, 1], 'y': [0, 1]}, |
| gauge={ |
| 'axis': {'range': [0, 100]}, |
| 'bar': {'color': color}, |
| 'steps': [ |
| {'range': [0, 25], 'color': '#d5f5e3'}, |
| {'range': [25, 50], 'color': '#fcf3cf'}, |
| {'range': [50, 75], 'color': '#f5cba7'}, |
| {'range': [75, 100], 'color': '#f5b7b1'} |
| ], |
| 'threshold': { |
| 'line': {'color': 'black', 'width': 4}, |
| 'thickness': 0.75, |
| 'value': risk_score |
| } |
| } |
| )) |
| fig.update_layout(height=300, margin=dict(t=0, b=0, l=0, r=0)) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| st.subheader("📊 Key Risk Factors") |
| |
| reasons = [] |
| if recency > 90: |
| reasons.append(("No purchase in 90+ days", "HIGH", f"{recency} days since last purchase")) |
| if txn_count < 3: |
| reasons.append(("Very few transactions", "HIGH", f"Only {txn_count} total purchases")) |
| if total_spent < 200: |
| reasons.append(("Low lifetime spend", "MEDIUM", f"${total_spent:.0f} total spent")) |
| if email_rate < 0.2: |
| reasons.append(("Low email engagement", "MEDIUM", f"{email_rate:.1%} open rate")) |
| if support_tickets > 3: |
| reasons.append(("High support activity", "MEDIUM", f"{support_tickets} recent tickets")) |
| |
| if not reasons: |
| reasons.append(("Overall profile stable", "LOW", "No major risk flags detected")) |
| |
| for reason, severity, detail in reasons: |
| sev_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}[severity] |
| st.markdown(f""" |
| <div style="padding: 0.75rem; background: #f8f9fa; border-radius: 8px; margin: 0.3rem 0;"> |
| <strong>{sev_color} {reason}</strong> — {detail} |
| </div> |
| """, unsafe_allow_html=True) |
| |
| |
| st.subheader("🎯 Recommended Action") |
| if risk_score >= 75: |
| st.error("🚨 **URGENT**: Executive-level intervention recommended. Personal call + exclusive offer.") |
| elif risk_score >= 50: |
| st.warning("⚠️ **HIGH PRIORITY**: Re-engagement campaign + targeted discount.") |
| elif risk_score >= 25: |
| st.info("ℹ️ **MONITOR**: Increase touchpoints. Watch for further decline.") |
| else: |
| st.success("✅ **HEALTHY**: Consider upsell/cross-sell opportunities.") |
| |
| else: |
| st.info("Upload a CSV with customer features to predict churn risk for multiple customers.") |
| uploaded = st.file_uploader("Upload CSV", type=["csv"]) |
| |
| if uploaded: |
| batch_df = pd.read_csv(uploaded) |
| st.write(f"📄 Loaded {len(batch_df)} customers") |
| st.dataframe(batch_df.head()) |
| st.info("Batch prediction coming soon — single customer mode is fully functional above.") |
|
|
| |
| |
| |
| elif page == "👥 Customer Segments": |
| st.markdown('<div class="main-header">Customer Segmentation</div>', unsafe_allow_html=True) |
| st.markdown('<div class="sub-header">Explore VIP, Loyal, At Risk, and Lost customer segments</div>', unsafe_allow_html=True) |
| |
| if not DATA_LOADED: |
| st.error("Data not loaded.") |
| st.stop() |
| |
| seg = data["segmented"] |
| |
| |
| selected_segments = st.multiselect( |
| "Filter Segments:", |
| ["VIP", "Loyal", "At Risk", "Lost"], |
| default=["VIP", "Loyal", "At Risk", "Lost"] |
| ) |
| |
| filtered = seg[seg["segment_label"].isin(selected_segments)] |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| |
| fig = px.scatter( |
| filtered, x="recency_days", y="total_spent", |
| color="segment_label", size="txn_count", |
| hover_data=["customer_id", "RFM_score", "engagement_score"], |
| color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71", |
| "At Risk": "#e67e22", "Lost": "#e74c3c"}, |
| title="Customers: Recency vs Lifetime Spend", |
| labels={"recency_days": "Days Since Last Purchase", "total_spent": "Total Spent ($)"} |
| ) |
| fig.update_layout(height=500) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with col2: |
| |
| fig = px.box( |
| filtered, x="segment_label", y="RFM_score", |
| color="segment_label", |
| color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71", |
| "At Risk": "#e67e22", "Lost": "#e74c3c"}, |
| title="RFM Score Distribution by Segment", |
| category_orders={"segment_label": ["VIP", "Loyal", "At Risk", "Lost"]} |
| ) |
| fig.update_layout(height=500, showlegend=False) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| st.subheader("📊 Segment Metrics") |
| |
| metrics = filtered.groupby("segment_label").agg( |
| Count=("customer_id", "count"), |
| Avg_Spend=("total_spent", "mean"), |
| Avg_Recency=("recency_days", "mean"), |
| Avg_Frequency=("txn_count", "mean"), |
| Avg_RFM=("RFM_score", "mean"), |
| Churn_Rate=("churned", "mean"), |
| ).round(2) |
| |
| metrics["Churn_Rate"] = (metrics["Churn_Rate"] * 100).round(1).astype(str) + "%" |
| metrics["Avg_Spend"] = "$" + metrics["Avg_Spend"].round(0).astype(int).astype(str) |
| |
| st.dataframe(metrics, use_container_width=True) |
| |
| |
| st.subheader("🔍 Customer Explorer") |
| |
| search_id = st.text_input("Search Customer ID (e.g., C00001):") |
| if search_id: |
| cust = seg[seg["customer_id"].str.contains(search_id, case=False, na=False)] |
| else: |
| cust = filtered.sample(min(20, len(filtered)), random_state=42) |
| |
| display_cols = ["customer_id", "segment_label", "recency_days", "txn_count", |
| "total_spent", "RFM_score", "engagement_score", "churned"] |
| st.dataframe(cust[display_cols].reset_index(drop=True), use_container_width=True) |
|
|
| |
| |
| |
| elif page == "📈 Sales Analytics": |
| st.markdown('<div class="main-header">Sales Analytics</div>', unsafe_allow_html=True) |
| st.markdown('<div class="sub-header">Revenue trends, product performance, and growth metrics</div>', unsafe_allow_html=True) |
| |
| if not DATA_LOADED: |
| st.error("Data not loaded.") |
| st.stop() |
| |
| tab1, tab2, tab3, tab4 = st.tabs(["📊 Revenue Trends", "🏆 Top Products", "📅 Seasonality", "📈 Growth"]) |
| |
| with tab1: |
| monthly = data["monthly_sales"] |
| |
| fig = make_subplots(rows=2, cols=1, shared_xaxes=True, |
| subplot_titles=("Monthly Revenue", "Monthly Orders"), |
| vertical_spacing=0.1) |
| |
| fig.add_trace( |
| go.Scatter(x=monthly["period"], y=monthly["revenue"], |
| mode="lines+markers", name="Revenue", line=dict(color="#3498db")), |
| row=1, col=1 |
| ) |
| fig.add_trace( |
| go.Scatter(x=monthly["period"], y=monthly["orders"], |
| mode="lines+markers", name="Orders", line=dict(color="#e67e22")), |
| row=2, col=1 |
| ) |
| |
| fig.update_layout(height=600, showlegend=False, |
| title_text="Revenue & Order Trends Over Time") |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| kpi_col1, kpi_col2, kpi_col3 = st.columns(3) |
| with kpi_col1: |
| st.metric("Total Revenue", data["sales_kpis"]["Total Revenue (All Time)"]) |
| with kpi_col2: |
| st.metric("Avg Order Value", data["sales_kpis"]["Average Order Value"]) |
| with kpi_col3: |
| st.metric("Latest MoM Growth", data["sales_kpis"]["Revenue Growth (MoM, latest)"]) |
| |
| with tab2: |
| top_n = st.slider("Show Top N Products", 5, 50, 10) |
| top_products = data["product_revenue"].head(top_n) |
| |
| fig = px.bar( |
| top_products, x="revenue", y="product_name", |
| orientation="h", color="category", |
| title=f"Top {top_n} Products by Revenue", |
| labels={"revenue": "Revenue ($)", "product_name": ""} |
| ) |
| fig.update_layout(height=500, yaxis=dict(autorange="reversed")) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| fig2 = px.pie( |
| data["category_revenue"], values="revenue", names="category", |
| title="Revenue by Category", hole=0.4 |
| ) |
| fig2.update_layout(height=400) |
| st.plotly_chart(fig2, use_container_width=True) |
| |
| with tab3: |
| |
| seasonal = data["transactions"].copy() |
| seasonal["month"] = seasonal["transaction_date"].dt.month |
| seasonal["dayofweek"] = seasonal["transaction_date"].dt.dayofweek |
| seasonal["quarter"] = seasonal["transaction_date"].dt.quarter |
| |
| month_pattern = seasonal.groupby("month")["total_amount"].sum().reset_index() |
| month_pattern["month_name"] = pd.to_datetime(month_pattern["month"], format="%m").dt.strftime("%b") |
| |
| dow_pattern = seasonal.groupby("dayofweek")["total_amount"].sum().reset_index() |
| dow_pattern["day_name"] = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] |
| |
| col_s1, col_s2 = st.columns(2) |
| |
| with col_s1: |
| fig = px.bar( |
| month_pattern, x="month_name", y="total_amount", |
| color_discrete_sequence=["#3498db"], |
| title="Revenue by Month (Seasonality)", |
| labels={"total_amount": "Revenue ($)", "month_name": "Month"} |
| ) |
| fig.update_layout(height=350) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with col_s2: |
| fig = px.bar( |
| dow_pattern, x="day_name", y="total_amount", |
| color_discrete_sequence=["#e67e22"], |
| title="Revenue by Day of Week", |
| labels={"total_amount": "Revenue ($)", "day_name": "Day"} |
| ) |
| fig.update_layout(height=350) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with tab4: |
| monthly = data["monthly_sales"].copy() |
| monthly["revenue_mom"] = monthly["revenue"].pct_change() * 100 |
| monthly["revenue_3ma"] = monthly["revenue"].rolling(3, min_periods=1).mean() |
| |
| fig = go.Figure() |
| fig.add_trace(go.Bar( |
| x=monthly["period"], y=monthly["revenue_mom"], |
| name="MoM Growth %", marker_color="#3498db" |
| )) |
| fig.add_trace(go.Scatter( |
| x=monthly["period"], y=monthly["revenue_3ma"], |
| name="3-Month MA ($)", yaxis="y2", line=dict(color="#e74c3c") |
| )) |
| |
| fig.update_layout( |
| title="Month-over-Month Growth Rate", |
| yaxis=dict(title="MoM Growth (%)"), |
| yaxis2=dict(title="Revenue ($)", overlaying="y", side="right"), |
| height=400, legend=dict(orientation="h", yanchor="bottom", y=1.02) |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
|
|
| |
| |
| |
| elif page == "🎯 Action Recommendations": |
| st.markdown('<div class="main-header">Action Recommendations</div>', unsafe_allow_html=True) |
| st.markdown('<div class="sub-header">Priority queue of customers needing immediate business action</div>', unsafe_allow_html=True) |
| |
| if not DATA_LOADED: |
| st.error("Data not loaded.") |
| st.stop() |
| |
| recs = data["recommendations"] |
| |
| |
| col_f1, col_f2, col_f3 = st.columns(3) |
| with col_f1: |
| seg_filter = st.multiselect("Segment:", recs["segment_label"].unique(), default=recs["segment_label"].unique()) |
| with col_f2: |
| risk_filter = st.multiselect("Risk Level:", recs["risk_level"].unique(), default=recs["risk_level"].unique()) |
| with col_f3: |
| priority_min = st.slider("Min Priority Score:", 0, 130, 50) |
| |
| filtered_recs = recs[ |
| (recs["segment_label"].isin(seg_filter)) & |
| (recs["risk_level"].isin(risk_filter)) & |
| (recs["final_priority_score"] >= priority_min) |
| ].sort_values("final_priority_score", ascending=False) |
| |
| st.markdown(f"**{len(filtered_recs):,} customers match your filters**") |
| |
| |
| stat_col1, stat_col2, stat_col3, stat_col4 = st.columns(4) |
| with stat_col1: |
| st.metric("Total at Risk", len(filtered_recs)) |
| with stat_col2: |
| st.metric("Revenue at Stake", f"${filtered_recs['revenue_at_stake'].sum():,.0f}") |
| with stat_col3: |
| st.metric("Est. Campaign Cost", f"${filtered_recs['cost_per_customer'].sum():,.0f}") |
| with stat_col4: |
| st.metric("Est. ROI", f"{((filtered_recs['expected_revenue_saved'].sum() - filtered_recs['cost_per_customer'].sum()) / max(filtered_recs['cost_per_customer'].sum(), 1) * 100):.0f}%") |
| |
| st.markdown("---") |
| |
| |
| display_cols = [ |
| "customer_id", "segment_label", "risk_level", "churn_risk_score", |
| "total_spent", "revenue_at_stake", "final_priority_score", |
| "recommended_action", "recommended_channel", "recommended_offer", |
| "urgency", "behavioral_enhancements" |
| ] |
| |
| st.dataframe( |
| filtered_recs[display_cols].head(100), |
| use_container_width=True, |
| column_config={ |
| "churn_risk_score": st.column_config.ProgressColumn("Risk %", min_value=0, max_value=100, format="%.0f%%"), |
| "final_priority_score": st.column_config.NumberColumn("Priority", format="%.1f"), |
| "revenue_at_stake": st.column_config.NumberColumn("Revenue at Stake", format="$%.0f"), |
| } |
| ) |
| |
| |
| st.markdown("---") |
| st.subheader("💰 Campaign ROI Summary") |
| |
| roi = data["campaign_roi"] |
| col_roi1, col_roi2 = st.columns(2) |
| |
| with col_roi1: |
| st.markdown("**Full Campaign (All Customers)**") |
| fc = roi["full_campaign"] |
| st.write(f"- Customers: {fc['customers']:,}") |
| st.write(f"- Revenue at Stake: ${fc['revenue_at_stake']:,.0f}") |
| st.write(f"- Campaign Cost: ${fc['campaign_cost']:,.0f}") |
| st.write(f"- Expected Saved: ${fc['expected_revenue_saved']:,.0f}") |
| st.write(f"- ROI: **{fc['roi_percent']:.0f}%**") |
| |
| with col_roi2: |
| st.markdown("**High Priority Only (Priority ≥ 7)**") |
| hp = roi["high_priority_campaign"] |
| st.write(f"- Customers: {hp['customers']:,}") |
| st.write(f"- Revenue at Stake: ${hp['revenue_at_stake']:,.0f}") |
| st.write(f"- Campaign Cost: ${hp['campaign_cost']:,.0f}") |
| st.write(f"- Expected Saved: ${hp['expected_revenue_saved']:,.0f}") |
| st.write(f"- ROI: **{hp['roi_percent']:.0f}%**") |
|
|
| |
| |
| |
| elif page == "🧠 Model Performance": |
| st.markdown('<div class="main-header">Model Performance & Explainability</div>', unsafe_allow_html=True) |
| st.markdown('<div class="sub-header">Churn prediction accuracy, feature importance, and model health</div>', unsafe_allow_html=True) |
| |
| if not DATA_LOADED: |
| st.error("Data not loaded.") |
| st.stop() |
| |
| tab1, tab2, tab3 = st.tabs(["📊 Model Metrics", "🔍 Feature Importance", "📋 Explanations"]) |
| |
| with tab1: |
| |
| try: |
| model_comp = pd.read_csv("/app/outputs/phase4/model_comparison.csv") |
| |
| fig = px.bar( |
| model_comp, x="Model", y=["Accuracy", "Precision", "Recall", "F1-Score", "AUC-ROC"], |
| barmode="group", title="Model Comparison", |
| color_discrete_sequence=["#3498db", "#2ecc71", "#e74c3c", "#9b59b6", "#f1c40f"] |
| ) |
| fig.update_layout(height=500) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| st.dataframe(model_comp.round(3), use_container_width=True) |
| except: |
| st.info("Model comparison data not available.") |
| |
| |
| st.subheader("Confusion Matrix (Best Model)") |
| |
| preds = data["model_preds"] |
| from sklearn.metrics import confusion_matrix |
| cm = confusion_matrix(preds["actual_churn"], preds["rf_pred"]) |
| |
| fig = px.imshow( |
| cm, text_auto=True, |
| labels=dict(x="Predicted", y="Actual", color="Count"), |
| x=["Active", "Churned"], y=["Active", "Churned"], |
| color_continuous_scale="Blues" |
| ) |
| fig.update_layout(height=400) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with tab2: |
| st.subheader("Top Churn Drivers") |
| |
| try: |
| |
| if data.get("model") and hasattr(data["model"], "feature_importances_"): |
| importances = data["model"].feature_importances_ |
| feat_names = data["feature_names"] |
| |
| imp_df = pd.DataFrame({ |
| "Feature": [f.replace("_scaled", "") for f in feat_names], |
| "Importance": importances |
| }).sort_values("Importance", ascending=False).head(15) |
| |
| fig = px.bar( |
| imp_df, x="Importance", y="Feature", orientation="h", |
| color="Importance", color_continuous_scale="Viridis", |
| title="Top 15 Feature Importances (Random Forest)" |
| ) |
| fig.update_layout(height=500, yaxis=dict(autorange="reversed")) |
| st.plotly_chart(fig, use_container_width=True) |
| else: |
| st.info("Feature importance data not available.") |
| except Exception as e: |
| st.error(f"Could not load feature importance: {e}") |
| |
| with tab3: |
| st.subheader("Customer Explanations") |
| |
| try: |
| explanations = data["explanations"] |
| |
| cust_search = st.text_input("Search Customer ID:") |
| if cust_search: |
| exp = explanations[explanations["customer_id"].str.contains(cust_search, case=False, na=False)] |
| else: |
| exp = explanations.head(10) |
| |
| for _, row in exp.iterrows(): |
| with st.expander(f"Customer {row['customer_id']} | P(churn)={row['predicted_churn_prob']:.1%} | {'🔴 CHURNED' if row['actual_churn']==1 else '🟢 ACTIVE'}"): |
| st.markdown(f"**Top Risk Factors:** {row['top_reasons_for_churn']}") |
| st.markdown(f"**Loyalty Signs:** {row['top_reasons_against_churn']}") |
| except: |
| st.info("Explanation data not available.") |
|
|
| |
| |
| |
| st.markdown("---") |
| st.markdown(""" |
| <div style="text-align: center; color: #9ca3af; padding: 1rem;"> |
| <small>Customer Churn Intelligence Dashboard | Built with Streamlit | Data: Synthetic (5,000 customers, 33,037 transactions)</small> |
| </div> |
| """, unsafe_allow_html=True) |
|
|