""" ╔══════════════════════════════════════════════════════════════════════════════╗ ║ PHASE 9: INTERACTIVE STREAMLIT DASHBOARD ║ ║ Customer Churn Prediction & Sales Intelligence Dashboard ║ ╚══════════════════════════════════════════════════════════════════════════════╝ ═══════════════════════════════════════════════════════════════════════════════ HOW TO RUN THIS DASHBOARD: ═══════════════════════════════════════════════════════════════════════════════ 1. Install dependencies: pip install streamlit plotly 2. Run: streamlit run dashboard.py 3. Open browser at http://localhost:8501 ═══════════════════════════════════════════════════════════════════════════════ DASHBOARD ARCHITECTURE: ═══════════════════════════════════════════════════════════════════════════════ This dashboard connects ALL phases into a single business-facing product: • Overview Tab: KPIs, churn rate, revenue, at-risk alerts • Predict Tab: Interactive churn prediction (input → risk score + reasons) • Segments Tab: Customer segments (VIP/Loyal/At Risk/Lost) with drill-down • Sales Tab: Time-series trends, top products, growth rates • Recommendations: Priority queue of customers needing action • Model Health: Model performance, feature importance, data drift """ import streamlit as st import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import json import joblib from pathlib import Path import warnings warnings.filterwarnings("ignore") # ───────────────────────────────────────────────────────────────────────────── # PAGE CONFIGURATION # ───────────────────────────────────────────────────────────────────────────── st.set_page_config( page_title="Customer Churn Intelligence Dashboard", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # ───────────────────────────────────────────────────────────────────────────── # CUSTOM CSS FOR POLISHED LOOK # ───────────────────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ───────────────────────────────────────────────────────────────────────────── # DATA LOADING (cached) # ───────────────────────────────────────────────────────────────────────────── @st.cache_data def load_data(): """Load all pre-computed datasets from previous phases.""" data = {} # Core datasets data["customers"] = pd.read_csv("/app/deploy/data/customer_data.csv") data["transactions"] = pd.read_csv("/app/deploy/data/transaction_data.csv", parse_dates=["transaction_date"]) data["products"] = pd.read_csv("/app/deploy/data/product_data.csv") # Phase 4: Model predictions data["model_preds"] = pd.read_csv("/app/deploy/data/all_model_predictions.csv") data["test_risk"] = pd.read_csv("/app/outputs/phase4/test_set_risk_scores.csv") # Phase 5: Explanations data["explanations"] = pd.read_csv("/app/deploy/data/customer_churn_explanations.csv") # Phase 6: Segmentation data["segmented"] = pd.read_csv("/app/deploy/data/customers_segmented.csv") data["segment_summary"] = pd.read_csv("/app/deploy/data/segment_summary.csv") # Phase 7: Sales data["monthly_sales"] = pd.read_csv("/app/deploy/data/monthly_sales.csv", parse_dates=["period"]) data["product_revenue"] = pd.read_csv("/app/deploy/data/product_revenue.csv") data["category_revenue"] = pd.read_csv("/app/deploy/data/category_revenue.csv") with open("/app/deploy/data/sales_kpis.json", "r") as f: data["sales_kpis"] = json.load(f) # Phase 8: Recommendations data["recommendations"] = pd.read_csv("/app/deploy/data/customer_recommendations.csv") data["priority_queue"] = pd.read_csv("/app/deploy/data/priority_queue_top100.csv") with open("/app/deploy/data/campaign_roi_summary.json", "r") as f: data["campaign_roi"] = json.load(f) # Model try: data["model"] = joblib.load("/app/deploy/models/best_churn_model.joblib") data["scaler"] = joblib.load("/app/deploy/models/scaler.joblib") data["feature_names"] = joblib.load("/app/deploy/models/feature_names.joblib") except Exception as e: st.warning(f"Model loading issue: {e}") data["model"] = None return data try: data = load_data() DATA_LOADED = True except Exception as e: st.error(f"Failed to load data: {e}") DATA_LOADED = False data = {} # ───────────────────────────────────────────────────────────────────────────── # HELPER FUNCTIONS # ───────────────────────────────────────────────────────────────────────────── def get_churn_risk_color(score): if score < 25: return "#2ecc71", "Low" elif score < 50: return "#f1c40f", "Medium" elif score < 75: return "#e67e22", "High" else: return "#e74c3c", "Critical" def format_currency(val): return f"${val:,.0f}" if val >= 1000 else f"${val:,.2f}" # ───────────────────────────────────────────────────────────────────────────── # SIDEBAR # ───────────────────────────────────────────────────────────────────────────── with st.sidebar: st.image("https://img.icons8.com/color/96/analytics.png", width=60) st.title("Churn Intelligence") st.markdown("---") st.markdown("**Navigation**") page = st.radio("Go to:", [ "📊 Overview & KPIs", "🔮 Churn Predictor", "👥 Customer Segments", "📈 Sales Analytics", "🎯 Action Recommendations", "🧠 Model Performance" ]) st.markdown("---") st.markdown("**About**") st.info(""" This dashboard combines churn prediction, customer segmentation, sales analytics, and automated recommendations into a single business intelligence platform. **Data**: 5,000 customers | 33,037 transactions | 150 products """) if st.button("🔄 Refresh Data"): st.cache_data.clear() st.rerun() # ───────────────────────────────────────────────────────────────────────────── # MAIN CONTENT AREA # ───────────────────────────────────────────────────────────────────────────── # ═══════════════════════════════════════════════════════════════════════════════ # PAGE 1: OVERVIEW & KPIs # ═══════════════════════════════════════════════════════════════════════════════ if page == "📊 Overview & KPIs": st.markdown('
Customer Churn Intelligence Dashboard
', unsafe_allow_html=True) st.markdown('
Real-time insights into customer health, revenue, and retention
', unsafe_allow_html=True) if not DATA_LOADED: st.error("Data not loaded. Please run all phases first.") st.stop() # KPI ROW col1, col2, col3, col4, col5 = st.columns(5) with col1: churn_rate = data["customers"]["churned"].mean() * 100 st.markdown(f"""
{churn_rate:.1f}%
Churn Rate
""", unsafe_allow_html=True) with col2: total_revenue = data["transactions"]["total_amount"].sum() st.markdown(f"""
{format_currency(total_revenue)}
Total Revenue
""", unsafe_allow_html=True) with col3: total_customers = len(data["customers"]) st.markdown(f"""
{total_customers:,}
Total Customers
""", unsafe_allow_html=True) with col4: at_risk = len(data["recommendations"][data["recommendations"]["priority"] >= 7]) st.markdown(f"""
{at_risk:,}
High Priority Actions
""", unsafe_allow_html=True) with col5: revenue_at_stake = data["recommendations"]["revenue_at_stake"].sum() st.markdown(f"""
{format_currency(revenue_at_stake)}
Revenue at Stake
""", unsafe_allow_html=True) st.markdown("---") # ALERTS SECTION st.subheader("🚨 Live Alerts") # Critical alerts critical_count = len(data["recommendations"][data["recommendations"]["risk_level"] == "Critical"]) vip_critical = len(data["recommendations"][ (data["recommendations"]["segment_label"] == "VIP") & (data["recommendations"]["risk_level"] == "Critical") ]) col_alert1, col_alert2 = st.columns(2) with col_alert1: st.markdown(f"""
⚠️ {critical_count:,} customers at CRITICAL churn risk
Immediate action required. These customers have >75% churn probability.
""", unsafe_allow_html=True) with col_alert2: st.markdown(f"""
💎 {vip_critical:,} VIP customers need urgent attention
High-value customers at risk. Executive intervention recommended.
""", unsafe_allow_html=True) # Quick insight cards st.markdown("---") st.subheader("📋 Quick Insights") col_q1, col_q2, col_q3 = st.columns(3) with col_q1: seg_dist = data["segmented"]["segment_label"].value_counts() fig = px.pie( values=seg_dist.values, names=seg_dist.index, color=seg_dist.index, color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71", "At Risk": "#e67e22", "Lost": "#e74c3c"}, hole=0.4 ) fig.update_layout(showlegend=True, margin=dict(t=0, b=0, l=0, r=0), height=300, title="Customer Segments") st.plotly_chart(fig, use_container_width=True) with col_q2: monthly = data["monthly_sales"].tail(12) fig = px.area( monthly, x="period", y="revenue", color_discrete_sequence=["#3498db"], title="Revenue Trend (Last 12 Months)" ) fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0)) st.plotly_chart(fig, use_container_width=True) with col_q3: risk_dist = data["recommendations"]["risk_level"].value_counts() risk_order = ["Low", "Medium", "High", "Critical"] risk_dist = risk_dist.reindex(risk_order).fillna(0) fig = px.bar( x=risk_dist.index, y=risk_dist.values, color=risk_dist.index, color_discrete_map={"Low": "#2ecc71", "Medium": "#f1c40f", "High": "#e67e22", "Critical": "#e74c3c"}, title="Risk Distribution" ) fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0), showlegend=False) st.plotly_chart(fig, use_container_width=True) # ═══════════════════════════════════════════════════════════════════════════════ # PAGE 2: CHURN PREDICTOR # ═══════════════════════════════════════════════════════════════════════════════ elif page == "🔮 Churn Predictor": st.markdown('
Churn Risk Predictor
', unsafe_allow_html=True) st.markdown('
Input customer data to predict churn probability and get actionable insights
', unsafe_allow_html=True) if not DATA_LOADED or data.get("model") is None: st.warning("⚠️ Model not available. Please run Phase 4 first.") st.stop() # TWO MODES: Single Customer or Batch Upload pred_mode = st.radio("Prediction Mode:", ["Single Customer", "Batch Upload (CSV)"], horizontal=True) if pred_mode == "Single Customer": # Use an existing customer as template sample_customers = data["customers"]["customer_id"].sample(5, random_state=42).tolist() selected_customer = st.selectbox("Select a customer (or enter ID manually):", ["Manual Entry"] + sample_customers) if selected_customer != "Manual Entry": # Pre-fill with existing customer data cust_row = data["customers"][data["customers"]["customer_id"] == selected_customer].iloc[0] default_age = int(cust_row["age"]) default_tickets = int(cust_row["support_tickets_6m"]) default_email = float(cust_row["email_open_rate"]) default_web = int(cust_row["web_sessions_30d"]) else: default_age, default_tickets, default_email, default_web = 35, 1, 0.3, 5 st.markdown("---") # Input form col1, col2 = st.columns(2) with col1: st.subheader("📋 Customer Profile") age = st.slider("Age", 18, 80, default_age) gender = st.selectbox("Gender", ["M", "F", "Other"]) region = st.selectbox("Region", ["North", "South", "East", "West", "Central"]) segment = st.selectbox("Business Segment", ["Retail", "SMB", "Enterprise"]) channel = st.selectbox("Acquisition Channel", ["Online", "In-Store", "Mobile App", "Phone"]) ltv_baseline = st.number_input("Baseline LTV ($)", 0.0, 15000.0, 500.0) with col2: st.subheader("📊 Behavioral Data") support_tickets = st.slider("Support Tickets (6m)", 0, 10, default_tickets) email_rate = st.slider("Email Open Rate", 0.0, 1.0, default_email) web_sessions = st.slider("Web Sessions (30d)", 0, 30, default_web) recency = st.slider("Days Since Last Purchase", 0, 500, 120) total_spent = st.number_input("Total Spent ($)", 0.0, 20000.0, 800.0) txn_count = st.slider("Transaction Count", 0, 50, 8) st.markdown("---") if st.button("🔮 Predict Churn Risk", type="primary", use_container_width=True): # Build feature vector # Create a DataFrame with same structure as training feature_names = data["feature_names"] # Compute derived features tenure_days = 365 # placeholder purchase_frequency = txn_count / (tenure_days / 30 + 1) avg_txn_value = total_spent / (txn_count + 1) rfm_score = 8 # placeholder engagement = (email_rate * 0.35 + min(web_sessions/20, 1) * 0.4 + (1 - min(support_tickets/5, 1)) * 0.25) input_dict = { "age": age, "lifetime_value_baseline": ltv_baseline, "support_tickets_6m": support_tickets, "email_open_rate": email_rate, "web_sessions_30d": web_sessions, "total_spent": total_spent, "txn_count": txn_count, "avg_order_value": avg_txn_value, "max_order_value": avg_txn_value * 1.5, "min_order_value": avg_txn_value * 0.5, "std_order_value": avg_txn_value * 0.3, "total_quantity": txn_count * 2, "total_discount": txn_count * 0.02, "unique_products": min(txn_count, 20), "recency_days": recency, "R_score": max(1, 6 - recency // 60), "F_score": min(5, max(1, txn_count // 3)), "M_score": min(5, max(1, int(total_spent // 500))), "RFM_score": 10, # placeholder "tenure_days": tenure_days, "days_between_first_last": tenure_days, "purchase_frequency": purchase_frequency, "avg_transaction_value": avg_txn_value, "engagement_score": engagement, "spending_trend_ratio": 1.0, "purchase_regularity": 1.0, "category_diversity": min(txn_count, 10), "discount_sensitivity": 0.02, } # Gender dummies for g in ["F", "M", "Other"]: input_dict[f"gender_{g}"] = 1 if gender == g else 0 # Region dummies for r in ["Central", "East", "North", "South", "West"]: input_dict[f"region_{r}"] = 1 if region == r else 0 # Segment dummies for s in ["Enterprise", "Retail", "SMB"]: input_dict[f"segment_{s}"] = 1 if segment == s else 0 # Channel dummies for c in ["In-Store", "Mobile App", "Online", "Phone"]: input_dict[f"acquisition_channel_{c}"] = 1 if channel == c else 0 # Create feature vector X_input = pd.DataFrame([{k: input_dict.get(k, 0) for k in feature_names}]) X_scaled = data["scaler"].transform(X_input) # Predict proba = data["model"].predict_proba(X_scaled)[0][1] risk_score = proba * 100 color, risk_label = get_churn_risk_color(risk_score) prediction = 1 if risk_score >= 50 else 0 # DISPLAY RESULTS st.markdown("---") res_col1, res_col2, res_col3 = st.columns([1, 2, 1]) with res_col2: st.markdown(f"""
Churn Risk: {risk_label}
{risk_score:.1f}%
Predicted Outcome: {'🔴 CHURN' if prediction == 1 else '🟢 RETAIN'}
""", unsafe_allow_html=True) # Risk meter fig = go.Figure(go.Indicator( mode="gauge+number", value=risk_score, domain={'x': [0, 1], 'y': [0, 1]}, gauge={ 'axis': {'range': [0, 100]}, 'bar': {'color': color}, 'steps': [ {'range': [0, 25], 'color': '#d5f5e3'}, {'range': [25, 50], 'color': '#fcf3cf'}, {'range': [50, 75], 'color': '#f5cba7'}, {'range': [75, 100], 'color': '#f5b7b1'} ], 'threshold': { 'line': {'color': 'black', 'width': 4}, 'thickness': 0.75, 'value': risk_score } } )) fig.update_layout(height=300, margin=dict(t=0, b=0, l=0, r=0)) st.plotly_chart(fig, use_container_width=True) # Reasons st.subheader("📊 Key Risk Factors") reasons = [] if recency > 90: reasons.append(("No purchase in 90+ days", "HIGH", f"{recency} days since last purchase")) if txn_count < 3: reasons.append(("Very few transactions", "HIGH", f"Only {txn_count} total purchases")) if total_spent < 200: reasons.append(("Low lifetime spend", "MEDIUM", f"${total_spent:.0f} total spent")) if email_rate < 0.2: reasons.append(("Low email engagement", "MEDIUM", f"{email_rate:.1%} open rate")) if support_tickets > 3: reasons.append(("High support activity", "MEDIUM", f"{support_tickets} recent tickets")) if not reasons: reasons.append(("Overall profile stable", "LOW", "No major risk flags detected")) for reason, severity, detail in reasons: sev_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}[severity] st.markdown(f"""
{sev_color} {reason} — {detail}
""", unsafe_allow_html=True) # Recommendation st.subheader("🎯 Recommended Action") if risk_score >= 75: st.error("🚨 **URGENT**: Executive-level intervention recommended. Personal call + exclusive offer.") elif risk_score >= 50: st.warning("⚠️ **HIGH PRIORITY**: Re-engagement campaign + targeted discount.") elif risk_score >= 25: st.info("ℹ️ **MONITOR**: Increase touchpoints. Watch for further decline.") else: st.success("✅ **HEALTHY**: Consider upsell/cross-sell opportunities.") else: # Batch mode st.info("Upload a CSV with customer features to predict churn risk for multiple customers.") uploaded = st.file_uploader("Upload CSV", type=["csv"]) if uploaded: batch_df = pd.read_csv(uploaded) st.write(f"📄 Loaded {len(batch_df)} customers") st.dataframe(batch_df.head()) st.info("Batch prediction coming soon — single customer mode is fully functional above.") # ═══════════════════════════════════════════════════════════════════════════════ # PAGE 3: CUSTOMER SEGMENTS # ═══════════════════════════════════════════════════════════════════════════════ elif page == "👥 Customer Segments": st.markdown('
Customer Segmentation
', unsafe_allow_html=True) st.markdown('
Explore VIP, Loyal, At Risk, and Lost customer segments
', unsafe_allow_html=True) if not DATA_LOADED: st.error("Data not loaded.") st.stop() seg = data["segmented"] # Segment selector selected_segments = st.multiselect( "Filter Segments:", ["VIP", "Loyal", "At Risk", "Lost"], default=["VIP", "Loyal", "At Risk", "Lost"] ) filtered = seg[seg["segment_label"].isin(selected_segments)] col1, col2 = st.columns(2) with col1: # 3D-ish scatter: Recency vs Spend, colored by segment fig = px.scatter( filtered, x="recency_days", y="total_spent", color="segment_label", size="txn_count", hover_data=["customer_id", "RFM_score", "engagement_score"], color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71", "At Risk": "#e67e22", "Lost": "#e74c3c"}, title="Customers: Recency vs Lifetime Spend", labels={"recency_days": "Days Since Last Purchase", "total_spent": "Total Spent ($)"} ) fig.update_layout(height=500) st.plotly_chart(fig, use_container_width=True) with col2: # RFM distribution by segment fig = px.box( filtered, x="segment_label", y="RFM_score", color="segment_label", color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71", "At Risk": "#e67e22", "Lost": "#e74c3c"}, title="RFM Score Distribution by Segment", category_orders={"segment_label": ["VIP", "Loyal", "At Risk", "Lost"]} ) fig.update_layout(height=500, showlegend=False) st.plotly_chart(fig, use_container_width=True) # Segment metrics table st.subheader("📊 Segment Metrics") metrics = filtered.groupby("segment_label").agg( Count=("customer_id", "count"), Avg_Spend=("total_spent", "mean"), Avg_Recency=("recency_days", "mean"), Avg_Frequency=("txn_count", "mean"), Avg_RFM=("RFM_score", "mean"), Churn_Rate=("churned", "mean"), ).round(2) metrics["Churn_Rate"] = (metrics["Churn_Rate"] * 100).round(1).astype(str) + "%" metrics["Avg_Spend"] = "$" + metrics["Avg_Spend"].round(0).astype(int).astype(str) st.dataframe(metrics, use_container_width=True) # Customer table with search st.subheader("🔍 Customer Explorer") search_id = st.text_input("Search Customer ID (e.g., C00001):") if search_id: cust = seg[seg["customer_id"].str.contains(search_id, case=False, na=False)] else: cust = filtered.sample(min(20, len(filtered)), random_state=42) display_cols = ["customer_id", "segment_label", "recency_days", "txn_count", "total_spent", "RFM_score", "engagement_score", "churned"] st.dataframe(cust[display_cols].reset_index(drop=True), use_container_width=True) # ═══════════════════════════════════════════════════════════════════════════════ # PAGE 4: SALES ANALYTICS # ═══════════════════════════════════════════════════════════════════════════════ elif page == "📈 Sales Analytics": st.markdown('
Sales Analytics
', unsafe_allow_html=True) st.markdown('
Revenue trends, product performance, and growth metrics
', unsafe_allow_html=True) if not DATA_LOADED: st.error("Data not loaded.") st.stop() tab1, tab2, tab3, tab4 = st.tabs(["📊 Revenue Trends", "🏆 Top Products", "📅 Seasonality", "📈 Growth"]) with tab1: monthly = data["monthly_sales"] fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Monthly Revenue", "Monthly Orders"), vertical_spacing=0.1) fig.add_trace( go.Scatter(x=monthly["period"], y=monthly["revenue"], mode="lines+markers", name="Revenue", line=dict(color="#3498db")), row=1, col=1 ) fig.add_trace( go.Scatter(x=monthly["period"], y=monthly["orders"], mode="lines+markers", name="Orders", line=dict(color="#e67e22")), row=2, col=1 ) fig.update_layout(height=600, showlegend=False, title_text="Revenue & Order Trends Over Time") st.plotly_chart(fig, use_container_width=True) # KPI row kpi_col1, kpi_col2, kpi_col3 = st.columns(3) with kpi_col1: st.metric("Total Revenue", data["sales_kpis"]["Total Revenue (All Time)"]) with kpi_col2: st.metric("Avg Order Value", data["sales_kpis"]["Average Order Value"]) with kpi_col3: st.metric("Latest MoM Growth", data["sales_kpis"]["Revenue Growth (MoM, latest)"]) with tab2: top_n = st.slider("Show Top N Products", 5, 50, 10) top_products = data["product_revenue"].head(top_n) fig = px.bar( top_products, x="revenue", y="product_name", orientation="h", color="category", title=f"Top {top_n} Products by Revenue", labels={"revenue": "Revenue ($)", "product_name": ""} ) fig.update_layout(height=500, yaxis=dict(autorange="reversed")) st.plotly_chart(fig, use_container_width=True) # Category breakdown fig2 = px.pie( data["category_revenue"], values="revenue", names="category", title="Revenue by Category", hole=0.4 ) fig2.update_layout(height=400) st.plotly_chart(fig2, use_container_width=True) with tab3: # Seasonal patterns seasonal = data["transactions"].copy() seasonal["month"] = seasonal["transaction_date"].dt.month seasonal["dayofweek"] = seasonal["transaction_date"].dt.dayofweek seasonal["quarter"] = seasonal["transaction_date"].dt.quarter month_pattern = seasonal.groupby("month")["total_amount"].sum().reset_index() month_pattern["month_name"] = pd.to_datetime(month_pattern["month"], format="%m").dt.strftime("%b") dow_pattern = seasonal.groupby("dayofweek")["total_amount"].sum().reset_index() dow_pattern["day_name"] = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] col_s1, col_s2 = st.columns(2) with col_s1: fig = px.bar( month_pattern, x="month_name", y="total_amount", color_discrete_sequence=["#3498db"], title="Revenue by Month (Seasonality)", labels={"total_amount": "Revenue ($)", "month_name": "Month"} ) fig.update_layout(height=350) st.plotly_chart(fig, use_container_width=True) with col_s2: fig = px.bar( dow_pattern, x="day_name", y="total_amount", color_discrete_sequence=["#e67e22"], title="Revenue by Day of Week", labels={"total_amount": "Revenue ($)", "day_name": "Day"} ) fig.update_layout(height=350) st.plotly_chart(fig, use_container_width=True) with tab4: monthly = data["monthly_sales"].copy() monthly["revenue_mom"] = monthly["revenue"].pct_change() * 100 monthly["revenue_3ma"] = monthly["revenue"].rolling(3, min_periods=1).mean() fig = go.Figure() fig.add_trace(go.Bar( x=monthly["period"], y=monthly["revenue_mom"], name="MoM Growth %", marker_color="#3498db" )) fig.add_trace(go.Scatter( x=monthly["period"], y=monthly["revenue_3ma"], name="3-Month MA ($)", yaxis="y2", line=dict(color="#e74c3c") )) fig.update_layout( title="Month-over-Month Growth Rate", yaxis=dict(title="MoM Growth (%)"), yaxis2=dict(title="Revenue ($)", overlaying="y", side="right"), height=400, legend=dict(orientation="h", yanchor="bottom", y=1.02) ) st.plotly_chart(fig, use_container_width=True) # ═══════════════════════════════════════════════════════════════════════════════ # PAGE 5: ACTION RECOMMENDATIONS # ═══════════════════════════════════════════════════════════════════════════════ elif page == "🎯 Action Recommendations": st.markdown('
Action Recommendations
', unsafe_allow_html=True) st.markdown('
Priority queue of customers needing immediate business action
', unsafe_allow_html=True) if not DATA_LOADED: st.error("Data not loaded.") st.stop() recs = data["recommendations"] # Filters col_f1, col_f2, col_f3 = st.columns(3) with col_f1: seg_filter = st.multiselect("Segment:", recs["segment_label"].unique(), default=recs["segment_label"].unique()) with col_f2: risk_filter = st.multiselect("Risk Level:", recs["risk_level"].unique(), default=recs["risk_level"].unique()) with col_f3: priority_min = st.slider("Min Priority Score:", 0, 130, 50) filtered_recs = recs[ (recs["segment_label"].isin(seg_filter)) & (recs["risk_level"].isin(risk_filter)) & (recs["final_priority_score"] >= priority_min) ].sort_values("final_priority_score", ascending=False) st.markdown(f"**{len(filtered_recs):,} customers match your filters**") # Priority stats stat_col1, stat_col2, stat_col3, stat_col4 = st.columns(4) with stat_col1: st.metric("Total at Risk", len(filtered_recs)) with stat_col2: st.metric("Revenue at Stake", f"${filtered_recs['revenue_at_stake'].sum():,.0f}") with stat_col3: st.metric("Est. Campaign Cost", f"${filtered_recs['cost_per_customer'].sum():,.0f}") with stat_col4: st.metric("Est. ROI", f"{((filtered_recs['expected_revenue_saved'].sum() - filtered_recs['cost_per_customer'].sum()) / max(filtered_recs['cost_per_customer'].sum(), 1) * 100):.0f}%") st.markdown("---") # Display priority table display_cols = [ "customer_id", "segment_label", "risk_level", "churn_risk_score", "total_spent", "revenue_at_stake", "final_priority_score", "recommended_action", "recommended_channel", "recommended_offer", "urgency", "behavioral_enhancements" ] st.dataframe( filtered_recs[display_cols].head(100), use_container_width=True, column_config={ "churn_risk_score": st.column_config.ProgressColumn("Risk %", min_value=0, max_value=100, format="%.0f%%"), "final_priority_score": st.column_config.NumberColumn("Priority", format="%.1f"), "revenue_at_stake": st.column_config.NumberColumn("Revenue at Stake", format="$%.0f"), } ) # Campaign ROI summary st.markdown("---") st.subheader("💰 Campaign ROI Summary") roi = data["campaign_roi"] col_roi1, col_roi2 = st.columns(2) with col_roi1: st.markdown("**Full Campaign (All Customers)**") fc = roi["full_campaign"] st.write(f"- Customers: {fc['customers']:,}") st.write(f"- Revenue at Stake: ${fc['revenue_at_stake']:,.0f}") st.write(f"- Campaign Cost: ${fc['campaign_cost']:,.0f}") st.write(f"- Expected Saved: ${fc['expected_revenue_saved']:,.0f}") st.write(f"- ROI: **{fc['roi_percent']:.0f}%**") with col_roi2: st.markdown("**High Priority Only (Priority ≥ 7)**") hp = roi["high_priority_campaign"] st.write(f"- Customers: {hp['customers']:,}") st.write(f"- Revenue at Stake: ${hp['revenue_at_stake']:,.0f}") st.write(f"- Campaign Cost: ${hp['campaign_cost']:,.0f}") st.write(f"- Expected Saved: ${hp['expected_revenue_saved']:,.0f}") st.write(f"- ROI: **{hp['roi_percent']:.0f}%**") # ═══════════════════════════════════════════════════════════════════════════════ # PAGE 6: MODEL PERFORMANCE # ═══════════════════════════════════════════════════════════════════════════════ elif page == "🧠 Model Performance": st.markdown('
Model Performance & Explainability
', unsafe_allow_html=True) st.markdown('
Churn prediction accuracy, feature importance, and model health
', unsafe_allow_html=True) if not DATA_LOADED: st.error("Data not loaded.") st.stop() tab1, tab2, tab3 = st.tabs(["📊 Model Metrics", "🔍 Feature Importance", "📋 Explanations"]) with tab1: # Model comparison try: model_comp = pd.read_csv("/app/outputs/phase4/model_comparison.csv") fig = px.bar( model_comp, x="Model", y=["Accuracy", "Precision", "Recall", "F1-Score", "AUC-ROC"], barmode="group", title="Model Comparison", color_discrete_sequence=["#3498db", "#2ecc71", "#e74c3c", "#9b59b6", "#f1c40f"] ) fig.update_layout(height=500) st.plotly_chart(fig, use_container_width=True) st.dataframe(model_comp.round(3), use_container_width=True) except: st.info("Model comparison data not available.") # Confusion matrix st.subheader("Confusion Matrix (Best Model)") preds = data["model_preds"] from sklearn.metrics import confusion_matrix cm = confusion_matrix(preds["actual_churn"], preds["rf_pred"]) fig = px.imshow( cm, text_auto=True, labels=dict(x="Predicted", y="Actual", color="Count"), x=["Active", "Churned"], y=["Active", "Churned"], color_continuous_scale="Blues" ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) with tab2: st.subheader("Top Churn Drivers") try: # Feature importance from Phase 5 summary or recompute if data.get("model") and hasattr(data["model"], "feature_importances_"): importances = data["model"].feature_importances_ feat_names = data["feature_names"] imp_df = pd.DataFrame({ "Feature": [f.replace("_scaled", "") for f in feat_names], "Importance": importances }).sort_values("Importance", ascending=False).head(15) fig = px.bar( imp_df, x="Importance", y="Feature", orientation="h", color="Importance", color_continuous_scale="Viridis", title="Top 15 Feature Importances (Random Forest)" ) fig.update_layout(height=500, yaxis=dict(autorange="reversed")) st.plotly_chart(fig, use_container_width=True) else: st.info("Feature importance data not available.") except Exception as e: st.error(f"Could not load feature importance: {e}") with tab3: st.subheader("Customer Explanations") try: explanations = data["explanations"] cust_search = st.text_input("Search Customer ID:") if cust_search: exp = explanations[explanations["customer_id"].str.contains(cust_search, case=False, na=False)] else: exp = explanations.head(10) for _, row in exp.iterrows(): with st.expander(f"Customer {row['customer_id']} | P(churn)={row['predicted_churn_prob']:.1%} | {'🔴 CHURNED' if row['actual_churn']==1 else '🟢 ACTIVE'}"): st.markdown(f"**Top Risk Factors:** {row['top_reasons_for_churn']}") st.markdown(f"**Loyalty Signs:** {row['top_reasons_against_churn']}") except: st.info("Explanation data not available.") # ───────────────────────────────────────────────────────────────────────────── # FOOTER # ───────────────────────────────────────────────────────────────────────────── st.markdown("---") st.markdown("""
Customer Churn Intelligence Dashboard | Built with Streamlit | Data: Synthetic (5,000 customers, 33,037 transactions)
""", unsafe_allow_html=True)