plplpl183's picture
Upload app.py with huggingface_hub
4b8eb0d verified
"""
╔══════════════════════════════════════════════════════════════════════════════╗
║ PHASE 9: INTERACTIVE STREAMLIT DASHBOARD ║
║ Customer Churn Prediction & Sales Intelligence Dashboard ║
╚══════════════════════════════════════════════════════════════════════════════╝
═══════════════════════════════════════════════════════════════════════════════
HOW TO RUN THIS DASHBOARD:
═══════════════════════════════════════════════════════════════════════════════
1. Install dependencies: pip install streamlit plotly
2. Run: streamlit run dashboard.py
3. Open browser at http://localhost:8501
═══════════════════════════════════════════════════════════════════════════════
DASHBOARD ARCHITECTURE:
═══════════════════════════════════════════════════════════════════════════════
This dashboard connects ALL phases into a single business-facing product:
• Overview Tab: KPIs, churn rate, revenue, at-risk alerts
• Predict Tab: Interactive churn prediction (input → risk score + reasons)
• Segments Tab: Customer segments (VIP/Loyal/At Risk/Lost) with drill-down
• Sales Tab: Time-series trends, top products, growth rates
• Recommendations: Priority queue of customers needing action
• Model Health: Model performance, feature importance, data drift
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
import joblib
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")
# ─────────────────────────────────────────────────────────────────────────────
# PAGE CONFIGURATION
# ─────────────────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="Customer Churn Intelligence Dashboard",
page_icon="📊",
layout="wide",
initial_sidebar_state="expanded"
)
# ─────────────────────────────────────────────────────────────────────────────
# CUSTOM CSS FOR POLISHED LOOK
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: 700;
color: #1f2937;
margin-bottom: 0.5rem;
}
.sub-header {
font-size: 1.2rem;
color: #6b7280;
margin-bottom: 2rem;
}
.kpi-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 12px;
padding: 1.5rem;
color: white;
text-align: center;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
.kpi-card-green {
background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
}
.kpi-card-red {
background: linear-gradient(135deg, #cb2d3e 0%, #ef473a 100%);
}
.kpi-card-orange {
background: linear-gradient(135deg, #f7971e 0%, #ffd200 100%);
}
.kpi-value {
font-size: 2.2rem;
font-weight: 700;
margin: 0;
}
.kpi-label {
font-size: 0.9rem;
opacity: 0.9;
margin-top: 0.3rem;
}
.alert-critical {
background-color: #fee2e2;
border-left: 4px solid #dc2626;
padding: 1rem;
border-radius: 4px;
margin: 0.5rem 0;
}
.alert-warning {
background-color: #fef3c7;
border-left: 4px solid #f59e0b;
padding: 1rem;
border-radius: 4px;
margin: 0.5rem 0;
}
.stTabs [data-baseweb="tab-list"] {
gap: 8px;
}
.stTabs [data-baseweb="tab"] {
padding: 10px 20px;
border-radius: 8px 8px 0 0;
}
</style>
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# DATA LOADING (cached)
# ─────────────────────────────────────────────────────────────────────────────
@st.cache_data
def load_data():
"""Load all pre-computed datasets from previous phases."""
data = {}
# Core datasets
data["customers"] = pd.read_csv("/app/deploy/data/customer_data.csv")
data["transactions"] = pd.read_csv("/app/deploy/data/transaction_data.csv", parse_dates=["transaction_date"])
data["products"] = pd.read_csv("/app/deploy/data/product_data.csv")
# Phase 4: Model predictions
data["model_preds"] = pd.read_csv("/app/deploy/data/all_model_predictions.csv")
data["test_risk"] = pd.read_csv("/app/outputs/phase4/test_set_risk_scores.csv")
# Phase 5: Explanations
data["explanations"] = pd.read_csv("/app/deploy/data/customer_churn_explanations.csv")
# Phase 6: Segmentation
data["segmented"] = pd.read_csv("/app/deploy/data/customers_segmented.csv")
data["segment_summary"] = pd.read_csv("/app/deploy/data/segment_summary.csv")
# Phase 7: Sales
data["monthly_sales"] = pd.read_csv("/app/deploy/data/monthly_sales.csv", parse_dates=["period"])
data["product_revenue"] = pd.read_csv("/app/deploy/data/product_revenue.csv")
data["category_revenue"] = pd.read_csv("/app/deploy/data/category_revenue.csv")
with open("/app/deploy/data/sales_kpis.json", "r") as f:
data["sales_kpis"] = json.load(f)
# Phase 8: Recommendations
data["recommendations"] = pd.read_csv("/app/deploy/data/customer_recommendations.csv")
data["priority_queue"] = pd.read_csv("/app/deploy/data/priority_queue_top100.csv")
with open("/app/deploy/data/campaign_roi_summary.json", "r") as f:
data["campaign_roi"] = json.load(f)
# Model
try:
data["model"] = joblib.load("/app/deploy/models/best_churn_model.joblib")
data["scaler"] = joblib.load("/app/deploy/models/scaler.joblib")
data["feature_names"] = joblib.load("/app/deploy/models/feature_names.joblib")
except Exception as e:
st.warning(f"Model loading issue: {e}")
data["model"] = None
return data
try:
data = load_data()
DATA_LOADED = True
except Exception as e:
st.error(f"Failed to load data: {e}")
DATA_LOADED = False
data = {}
# ─────────────────────────────────────────────────────────────────────────────
# HELPER FUNCTIONS
# ─────────────────────────────────────────────────────────────────────────────
def get_churn_risk_color(score):
if score < 25: return "#2ecc71", "Low"
elif score < 50: return "#f1c40f", "Medium"
elif score < 75: return "#e67e22", "High"
else: return "#e74c3c", "Critical"
def format_currency(val):
return f"${val:,.0f}" if val >= 1000 else f"${val:,.2f}"
# ─────────────────────────────────────────────────────────────────────────────
# SIDEBAR
# ─────────────────────────────────────────────────────────────────────────────
with st.sidebar:
st.image("https://img.icons8.com/color/96/analytics.png", width=60)
st.title("Churn Intelligence")
st.markdown("---")
st.markdown("**Navigation**")
page = st.radio("Go to:", [
"📊 Overview & KPIs",
"🔮 Churn Predictor",
"👥 Customer Segments",
"📈 Sales Analytics",
"🎯 Action Recommendations",
"🧠 Model Performance"
])
st.markdown("---")
st.markdown("**About**")
st.info("""
This dashboard combines churn prediction, customer segmentation,
sales analytics, and automated recommendations into a single
business intelligence platform.
**Data**: 5,000 customers | 33,037 transactions | 150 products
""")
if st.button("🔄 Refresh Data"):
st.cache_data.clear()
st.rerun()
# ─────────────────────────────────────────────────────────────────────────────
# MAIN CONTENT AREA
# ─────────────────────────────────────────────────────────────────────────────
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 1: OVERVIEW & KPIs
# ═══════════════════════════════════════════════════════════════════════════════
if page == "📊 Overview & KPIs":
st.markdown('<div class="main-header">Customer Churn Intelligence Dashboard</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Real-time insights into customer health, revenue, and retention</div>', unsafe_allow_html=True)
if not DATA_LOADED:
st.error("Data not loaded. Please run all phases first.")
st.stop()
# KPI ROW
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
churn_rate = data["customers"]["churned"].mean() * 100
st.markdown(f"""
<div class="kpi-card kpi-card-red">
<div class="kpi-value">{churn_rate:.1f}%</div>
<div class="kpi-label">Churn Rate</div>
</div>
""", unsafe_allow_html=True)
with col2:
total_revenue = data["transactions"]["total_amount"].sum()
st.markdown(f"""
<div class="kpi-card">
<div class="kpi-value">{format_currency(total_revenue)}</div>
<div class="kpi-label">Total Revenue</div>
</div>
""", unsafe_allow_html=True)
with col3:
total_customers = len(data["customers"])
st.markdown(f"""
<div class="kpi-card kpi-card-green">
<div class="kpi-value">{total_customers:,}</div>
<div class="kpi-label">Total Customers</div>
</div>
""", unsafe_allow_html=True)
with col4:
at_risk = len(data["recommendations"][data["recommendations"]["priority"] >= 7])
st.markdown(f"""
<div class="kpi-card kpi-card-orange">
<div class="kpi-value">{at_risk:,}</div>
<div class="kpi-label">High Priority Actions</div>
</div>
""", unsafe_allow_html=True)
with col5:
revenue_at_stake = data["recommendations"]["revenue_at_stake"].sum()
st.markdown(f"""
<div class="kpi-card kpi-card-red">
<div class="kpi-value">{format_currency(revenue_at_stake)}</div>
<div class="kpi-label">Revenue at Stake</div>
</div>
""", unsafe_allow_html=True)
st.markdown("---")
# ALERTS SECTION
st.subheader("🚨 Live Alerts")
# Critical alerts
critical_count = len(data["recommendations"][data["recommendations"]["risk_level"] == "Critical"])
vip_critical = len(data["recommendations"][
(data["recommendations"]["segment_label"] == "VIP") &
(data["recommendations"]["risk_level"] == "Critical")
])
col_alert1, col_alert2 = st.columns(2)
with col_alert1:
st.markdown(f"""
<div class="alert-critical">
<strong>⚠️ {critical_count:,} customers at CRITICAL churn risk</strong><br>
Immediate action required. These customers have >75% churn probability.
</div>
""", unsafe_allow_html=True)
with col_alert2:
st.markdown(f"""
<div class="alert-warning">
<strong>💎 {vip_critical:,} VIP customers need urgent attention</strong><br>
High-value customers at risk. Executive intervention recommended.
</div>
""", unsafe_allow_html=True)
# Quick insight cards
st.markdown("---")
st.subheader("📋 Quick Insights")
col_q1, col_q2, col_q3 = st.columns(3)
with col_q1:
seg_dist = data["segmented"]["segment_label"].value_counts()
fig = px.pie(
values=seg_dist.values, names=seg_dist.index,
color=seg_dist.index,
color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71",
"At Risk": "#e67e22", "Lost": "#e74c3c"},
hole=0.4
)
fig.update_layout(showlegend=True, margin=dict(t=0, b=0, l=0, r=0),
height=300, title="Customer Segments")
st.plotly_chart(fig, use_container_width=True)
with col_q2:
monthly = data["monthly_sales"].tail(12)
fig = px.area(
monthly, x="period", y="revenue",
color_discrete_sequence=["#3498db"],
title="Revenue Trend (Last 12 Months)"
)
fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0))
st.plotly_chart(fig, use_container_width=True)
with col_q3:
risk_dist = data["recommendations"]["risk_level"].value_counts()
risk_order = ["Low", "Medium", "High", "Critical"]
risk_dist = risk_dist.reindex(risk_order).fillna(0)
fig = px.bar(
x=risk_dist.index, y=risk_dist.values,
color=risk_dist.index,
color_discrete_map={"Low": "#2ecc71", "Medium": "#f1c40f",
"High": "#e67e22", "Critical": "#e74c3c"},
title="Risk Distribution"
)
fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0), showlegend=False)
st.plotly_chart(fig, use_container_width=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 2: CHURN PREDICTOR
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "🔮 Churn Predictor":
st.markdown('<div class="main-header">Churn Risk Predictor</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Input customer data to predict churn probability and get actionable insights</div>', unsafe_allow_html=True)
if not DATA_LOADED or data.get("model") is None:
st.warning("⚠️ Model not available. Please run Phase 4 first.")
st.stop()
# TWO MODES: Single Customer or Batch Upload
pred_mode = st.radio("Prediction Mode:", ["Single Customer", "Batch Upload (CSV)"], horizontal=True)
if pred_mode == "Single Customer":
# Use an existing customer as template
sample_customers = data["customers"]["customer_id"].sample(5, random_state=42).tolist()
selected_customer = st.selectbox("Select a customer (or enter ID manually):",
["Manual Entry"] + sample_customers)
if selected_customer != "Manual Entry":
# Pre-fill with existing customer data
cust_row = data["customers"][data["customers"]["customer_id"] == selected_customer].iloc[0]
default_age = int(cust_row["age"])
default_tickets = int(cust_row["support_tickets_6m"])
default_email = float(cust_row["email_open_rate"])
default_web = int(cust_row["web_sessions_30d"])
else:
default_age, default_tickets, default_email, default_web = 35, 1, 0.3, 5
st.markdown("---")
# Input form
col1, col2 = st.columns(2)
with col1:
st.subheader("📋 Customer Profile")
age = st.slider("Age", 18, 80, default_age)
gender = st.selectbox("Gender", ["M", "F", "Other"])
region = st.selectbox("Region", ["North", "South", "East", "West", "Central"])
segment = st.selectbox("Business Segment", ["Retail", "SMB", "Enterprise"])
channel = st.selectbox("Acquisition Channel", ["Online", "In-Store", "Mobile App", "Phone"])
ltv_baseline = st.number_input("Baseline LTV ($)", 0.0, 15000.0, 500.0)
with col2:
st.subheader("📊 Behavioral Data")
support_tickets = st.slider("Support Tickets (6m)", 0, 10, default_tickets)
email_rate = st.slider("Email Open Rate", 0.0, 1.0, default_email)
web_sessions = st.slider("Web Sessions (30d)", 0, 30, default_web)
recency = st.slider("Days Since Last Purchase", 0, 500, 120)
total_spent = st.number_input("Total Spent ($)", 0.0, 20000.0, 800.0)
txn_count = st.slider("Transaction Count", 0, 50, 8)
st.markdown("---")
if st.button("🔮 Predict Churn Risk", type="primary", use_container_width=True):
# Build feature vector
# Create a DataFrame with same structure as training
feature_names = data["feature_names"]
# Compute derived features
tenure_days = 365 # placeholder
purchase_frequency = txn_count / (tenure_days / 30 + 1)
avg_txn_value = total_spent / (txn_count + 1)
rfm_score = 8 # placeholder
engagement = (email_rate * 0.35 + min(web_sessions/20, 1) * 0.4 +
(1 - min(support_tickets/5, 1)) * 0.25)
input_dict = {
"age": age,
"lifetime_value_baseline": ltv_baseline,
"support_tickets_6m": support_tickets,
"email_open_rate": email_rate,
"web_sessions_30d": web_sessions,
"total_spent": total_spent,
"txn_count": txn_count,
"avg_order_value": avg_txn_value,
"max_order_value": avg_txn_value * 1.5,
"min_order_value": avg_txn_value * 0.5,
"std_order_value": avg_txn_value * 0.3,
"total_quantity": txn_count * 2,
"total_discount": txn_count * 0.02,
"unique_products": min(txn_count, 20),
"recency_days": recency,
"R_score": max(1, 6 - recency // 60),
"F_score": min(5, max(1, txn_count // 3)),
"M_score": min(5, max(1, int(total_spent // 500))),
"RFM_score": 10, # placeholder
"tenure_days": tenure_days,
"days_between_first_last": tenure_days,
"purchase_frequency": purchase_frequency,
"avg_transaction_value": avg_txn_value,
"engagement_score": engagement,
"spending_trend_ratio": 1.0,
"purchase_regularity": 1.0,
"category_diversity": min(txn_count, 10),
"discount_sensitivity": 0.02,
}
# Gender dummies
for g in ["F", "M", "Other"]:
input_dict[f"gender_{g}"] = 1 if gender == g else 0
# Region dummies
for r in ["Central", "East", "North", "South", "West"]:
input_dict[f"region_{r}"] = 1 if region == r else 0
# Segment dummies
for s in ["Enterprise", "Retail", "SMB"]:
input_dict[f"segment_{s}"] = 1 if segment == s else 0
# Channel dummies
for c in ["In-Store", "Mobile App", "Online", "Phone"]:
input_dict[f"acquisition_channel_{c}"] = 1 if channel == c else 0
# Create feature vector
X_input = pd.DataFrame([{k: input_dict.get(k, 0) for k in feature_names}])
X_scaled = data["scaler"].transform(X_input)
# Predict
proba = data["model"].predict_proba(X_scaled)[0][1]
risk_score = proba * 100
color, risk_label = get_churn_risk_color(risk_score)
prediction = 1 if risk_score >= 50 else 0
# DISPLAY RESULTS
st.markdown("---")
res_col1, res_col2, res_col3 = st.columns([1, 2, 1])
with res_col2:
st.markdown(f"""
<div style="text-align: center; padding: 2rem; background: {color}20; border-radius: 16px; border: 3px solid {color};">
<div style="font-size: 1.2rem; color: {color}; font-weight: 600;">Churn Risk: {risk_label}</div>
<div style="font-size: 4rem; font-weight: 800; color: {color}; margin: 0.5rem 0;">{risk_score:.1f}%</div>
<div style="font-size: 1rem; color: #666;">Predicted Outcome: {'🔴 CHURN' if prediction == 1 else '🟢 RETAIN'}</div>
</div>
""", unsafe_allow_html=True)
# Risk meter
fig = go.Figure(go.Indicator(
mode="gauge+number",
value=risk_score,
domain={'x': [0, 1], 'y': [0, 1]},
gauge={
'axis': {'range': [0, 100]},
'bar': {'color': color},
'steps': [
{'range': [0, 25], 'color': '#d5f5e3'},
{'range': [25, 50], 'color': '#fcf3cf'},
{'range': [50, 75], 'color': '#f5cba7'},
{'range': [75, 100], 'color': '#f5b7b1'}
],
'threshold': {
'line': {'color': 'black', 'width': 4},
'thickness': 0.75,
'value': risk_score
}
}
))
fig.update_layout(height=300, margin=dict(t=0, b=0, l=0, r=0))
st.plotly_chart(fig, use_container_width=True)
# Reasons
st.subheader("📊 Key Risk Factors")
reasons = []
if recency > 90:
reasons.append(("No purchase in 90+ days", "HIGH", f"{recency} days since last purchase"))
if txn_count < 3:
reasons.append(("Very few transactions", "HIGH", f"Only {txn_count} total purchases"))
if total_spent < 200:
reasons.append(("Low lifetime spend", "MEDIUM", f"${total_spent:.0f} total spent"))
if email_rate < 0.2:
reasons.append(("Low email engagement", "MEDIUM", f"{email_rate:.1%} open rate"))
if support_tickets > 3:
reasons.append(("High support activity", "MEDIUM", f"{support_tickets} recent tickets"))
if not reasons:
reasons.append(("Overall profile stable", "LOW", "No major risk flags detected"))
for reason, severity, detail in reasons:
sev_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}[severity]
st.markdown(f"""
<div style="padding: 0.75rem; background: #f8f9fa; border-radius: 8px; margin: 0.3rem 0;">
<strong>{sev_color} {reason}</strong> — {detail}
</div>
""", unsafe_allow_html=True)
# Recommendation
st.subheader("🎯 Recommended Action")
if risk_score >= 75:
st.error("🚨 **URGENT**: Executive-level intervention recommended. Personal call + exclusive offer.")
elif risk_score >= 50:
st.warning("⚠️ **HIGH PRIORITY**: Re-engagement campaign + targeted discount.")
elif risk_score >= 25:
st.info("ℹ️ **MONITOR**: Increase touchpoints. Watch for further decline.")
else:
st.success("✅ **HEALTHY**: Consider upsell/cross-sell opportunities.")
else: # Batch mode
st.info("Upload a CSV with customer features to predict churn risk for multiple customers.")
uploaded = st.file_uploader("Upload CSV", type=["csv"])
if uploaded:
batch_df = pd.read_csv(uploaded)
st.write(f"📄 Loaded {len(batch_df)} customers")
st.dataframe(batch_df.head())
st.info("Batch prediction coming soon — single customer mode is fully functional above.")
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 3: CUSTOMER SEGMENTS
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "👥 Customer Segments":
st.markdown('<div class="main-header">Customer Segmentation</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Explore VIP, Loyal, At Risk, and Lost customer segments</div>', unsafe_allow_html=True)
if not DATA_LOADED:
st.error("Data not loaded.")
st.stop()
seg = data["segmented"]
# Segment selector
selected_segments = st.multiselect(
"Filter Segments:",
["VIP", "Loyal", "At Risk", "Lost"],
default=["VIP", "Loyal", "At Risk", "Lost"]
)
filtered = seg[seg["segment_label"].isin(selected_segments)]
col1, col2 = st.columns(2)
with col1:
# 3D-ish scatter: Recency vs Spend, colored by segment
fig = px.scatter(
filtered, x="recency_days", y="total_spent",
color="segment_label", size="txn_count",
hover_data=["customer_id", "RFM_score", "engagement_score"],
color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71",
"At Risk": "#e67e22", "Lost": "#e74c3c"},
title="Customers: Recency vs Lifetime Spend",
labels={"recency_days": "Days Since Last Purchase", "total_spent": "Total Spent ($)"}
)
fig.update_layout(height=500)
st.plotly_chart(fig, use_container_width=True)
with col2:
# RFM distribution by segment
fig = px.box(
filtered, x="segment_label", y="RFM_score",
color="segment_label",
color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71",
"At Risk": "#e67e22", "Lost": "#e74c3c"},
title="RFM Score Distribution by Segment",
category_orders={"segment_label": ["VIP", "Loyal", "At Risk", "Lost"]}
)
fig.update_layout(height=500, showlegend=False)
st.plotly_chart(fig, use_container_width=True)
# Segment metrics table
st.subheader("📊 Segment Metrics")
metrics = filtered.groupby("segment_label").agg(
Count=("customer_id", "count"),
Avg_Spend=("total_spent", "mean"),
Avg_Recency=("recency_days", "mean"),
Avg_Frequency=("txn_count", "mean"),
Avg_RFM=("RFM_score", "mean"),
Churn_Rate=("churned", "mean"),
).round(2)
metrics["Churn_Rate"] = (metrics["Churn_Rate"] * 100).round(1).astype(str) + "%"
metrics["Avg_Spend"] = "$" + metrics["Avg_Spend"].round(0).astype(int).astype(str)
st.dataframe(metrics, use_container_width=True)
# Customer table with search
st.subheader("🔍 Customer Explorer")
search_id = st.text_input("Search Customer ID (e.g., C00001):")
if search_id:
cust = seg[seg["customer_id"].str.contains(search_id, case=False, na=False)]
else:
cust = filtered.sample(min(20, len(filtered)), random_state=42)
display_cols = ["customer_id", "segment_label", "recency_days", "txn_count",
"total_spent", "RFM_score", "engagement_score", "churned"]
st.dataframe(cust[display_cols].reset_index(drop=True), use_container_width=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 4: SALES ANALYTICS
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "📈 Sales Analytics":
st.markdown('<div class="main-header">Sales Analytics</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Revenue trends, product performance, and growth metrics</div>', unsafe_allow_html=True)
if not DATA_LOADED:
st.error("Data not loaded.")
st.stop()
tab1, tab2, tab3, tab4 = st.tabs(["📊 Revenue Trends", "🏆 Top Products", "📅 Seasonality", "📈 Growth"])
with tab1:
monthly = data["monthly_sales"]
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
subplot_titles=("Monthly Revenue", "Monthly Orders"),
vertical_spacing=0.1)
fig.add_trace(
go.Scatter(x=monthly["period"], y=monthly["revenue"],
mode="lines+markers", name="Revenue", line=dict(color="#3498db")),
row=1, col=1
)
fig.add_trace(
go.Scatter(x=monthly["period"], y=monthly["orders"],
mode="lines+markers", name="Orders", line=dict(color="#e67e22")),
row=2, col=1
)
fig.update_layout(height=600, showlegend=False,
title_text="Revenue & Order Trends Over Time")
st.plotly_chart(fig, use_container_width=True)
# KPI row
kpi_col1, kpi_col2, kpi_col3 = st.columns(3)
with kpi_col1:
st.metric("Total Revenue", data["sales_kpis"]["Total Revenue (All Time)"])
with kpi_col2:
st.metric("Avg Order Value", data["sales_kpis"]["Average Order Value"])
with kpi_col3:
st.metric("Latest MoM Growth", data["sales_kpis"]["Revenue Growth (MoM, latest)"])
with tab2:
top_n = st.slider("Show Top N Products", 5, 50, 10)
top_products = data["product_revenue"].head(top_n)
fig = px.bar(
top_products, x="revenue", y="product_name",
orientation="h", color="category",
title=f"Top {top_n} Products by Revenue",
labels={"revenue": "Revenue ($)", "product_name": ""}
)
fig.update_layout(height=500, yaxis=dict(autorange="reversed"))
st.plotly_chart(fig, use_container_width=True)
# Category breakdown
fig2 = px.pie(
data["category_revenue"], values="revenue", names="category",
title="Revenue by Category", hole=0.4
)
fig2.update_layout(height=400)
st.plotly_chart(fig2, use_container_width=True)
with tab3:
# Seasonal patterns
seasonal = data["transactions"].copy()
seasonal["month"] = seasonal["transaction_date"].dt.month
seasonal["dayofweek"] = seasonal["transaction_date"].dt.dayofweek
seasonal["quarter"] = seasonal["transaction_date"].dt.quarter
month_pattern = seasonal.groupby("month")["total_amount"].sum().reset_index()
month_pattern["month_name"] = pd.to_datetime(month_pattern["month"], format="%m").dt.strftime("%b")
dow_pattern = seasonal.groupby("dayofweek")["total_amount"].sum().reset_index()
dow_pattern["day_name"] = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
col_s1, col_s2 = st.columns(2)
with col_s1:
fig = px.bar(
month_pattern, x="month_name", y="total_amount",
color_discrete_sequence=["#3498db"],
title="Revenue by Month (Seasonality)",
labels={"total_amount": "Revenue ($)", "month_name": "Month"}
)
fig.update_layout(height=350)
st.plotly_chart(fig, use_container_width=True)
with col_s2:
fig = px.bar(
dow_pattern, x="day_name", y="total_amount",
color_discrete_sequence=["#e67e22"],
title="Revenue by Day of Week",
labels={"total_amount": "Revenue ($)", "day_name": "Day"}
)
fig.update_layout(height=350)
st.plotly_chart(fig, use_container_width=True)
with tab4:
monthly = data["monthly_sales"].copy()
monthly["revenue_mom"] = monthly["revenue"].pct_change() * 100
monthly["revenue_3ma"] = monthly["revenue"].rolling(3, min_periods=1).mean()
fig = go.Figure()
fig.add_trace(go.Bar(
x=monthly["period"], y=monthly["revenue_mom"],
name="MoM Growth %", marker_color="#3498db"
))
fig.add_trace(go.Scatter(
x=monthly["period"], y=monthly["revenue_3ma"],
name="3-Month MA ($)", yaxis="y2", line=dict(color="#e74c3c")
))
fig.update_layout(
title="Month-over-Month Growth Rate",
yaxis=dict(title="MoM Growth (%)"),
yaxis2=dict(title="Revenue ($)", overlaying="y", side="right"),
height=400, legend=dict(orientation="h", yanchor="bottom", y=1.02)
)
st.plotly_chart(fig, use_container_width=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 5: ACTION RECOMMENDATIONS
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "🎯 Action Recommendations":
st.markdown('<div class="main-header">Action Recommendations</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Priority queue of customers needing immediate business action</div>', unsafe_allow_html=True)
if not DATA_LOADED:
st.error("Data not loaded.")
st.stop()
recs = data["recommendations"]
# Filters
col_f1, col_f2, col_f3 = st.columns(3)
with col_f1:
seg_filter = st.multiselect("Segment:", recs["segment_label"].unique(), default=recs["segment_label"].unique())
with col_f2:
risk_filter = st.multiselect("Risk Level:", recs["risk_level"].unique(), default=recs["risk_level"].unique())
with col_f3:
priority_min = st.slider("Min Priority Score:", 0, 130, 50)
filtered_recs = recs[
(recs["segment_label"].isin(seg_filter)) &
(recs["risk_level"].isin(risk_filter)) &
(recs["final_priority_score"] >= priority_min)
].sort_values("final_priority_score", ascending=False)
st.markdown(f"**{len(filtered_recs):,} customers match your filters**")
# Priority stats
stat_col1, stat_col2, stat_col3, stat_col4 = st.columns(4)
with stat_col1:
st.metric("Total at Risk", len(filtered_recs))
with stat_col2:
st.metric("Revenue at Stake", f"${filtered_recs['revenue_at_stake'].sum():,.0f}")
with stat_col3:
st.metric("Est. Campaign Cost", f"${filtered_recs['cost_per_customer'].sum():,.0f}")
with stat_col4:
st.metric("Est. ROI", f"{((filtered_recs['expected_revenue_saved'].sum() - filtered_recs['cost_per_customer'].sum()) / max(filtered_recs['cost_per_customer'].sum(), 1) * 100):.0f}%")
st.markdown("---")
# Display priority table
display_cols = [
"customer_id", "segment_label", "risk_level", "churn_risk_score",
"total_spent", "revenue_at_stake", "final_priority_score",
"recommended_action", "recommended_channel", "recommended_offer",
"urgency", "behavioral_enhancements"
]
st.dataframe(
filtered_recs[display_cols].head(100),
use_container_width=True,
column_config={
"churn_risk_score": st.column_config.ProgressColumn("Risk %", min_value=0, max_value=100, format="%.0f%%"),
"final_priority_score": st.column_config.NumberColumn("Priority", format="%.1f"),
"revenue_at_stake": st.column_config.NumberColumn("Revenue at Stake", format="$%.0f"),
}
)
# Campaign ROI summary
st.markdown("---")
st.subheader("💰 Campaign ROI Summary")
roi = data["campaign_roi"]
col_roi1, col_roi2 = st.columns(2)
with col_roi1:
st.markdown("**Full Campaign (All Customers)**")
fc = roi["full_campaign"]
st.write(f"- Customers: {fc['customers']:,}")
st.write(f"- Revenue at Stake: ${fc['revenue_at_stake']:,.0f}")
st.write(f"- Campaign Cost: ${fc['campaign_cost']:,.0f}")
st.write(f"- Expected Saved: ${fc['expected_revenue_saved']:,.0f}")
st.write(f"- ROI: **{fc['roi_percent']:.0f}%**")
with col_roi2:
st.markdown("**High Priority Only (Priority ≥ 7)**")
hp = roi["high_priority_campaign"]
st.write(f"- Customers: {hp['customers']:,}")
st.write(f"- Revenue at Stake: ${hp['revenue_at_stake']:,.0f}")
st.write(f"- Campaign Cost: ${hp['campaign_cost']:,.0f}")
st.write(f"- Expected Saved: ${hp['expected_revenue_saved']:,.0f}")
st.write(f"- ROI: **{hp['roi_percent']:.0f}%**")
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 6: MODEL PERFORMANCE
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "🧠 Model Performance":
st.markdown('<div class="main-header">Model Performance & Explainability</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Churn prediction accuracy, feature importance, and model health</div>', unsafe_allow_html=True)
if not DATA_LOADED:
st.error("Data not loaded.")
st.stop()
tab1, tab2, tab3 = st.tabs(["📊 Model Metrics", "🔍 Feature Importance", "📋 Explanations"])
with tab1:
# Model comparison
try:
model_comp = pd.read_csv("/app/outputs/phase4/model_comparison.csv")
fig = px.bar(
model_comp, x="Model", y=["Accuracy", "Precision", "Recall", "F1-Score", "AUC-ROC"],
barmode="group", title="Model Comparison",
color_discrete_sequence=["#3498db", "#2ecc71", "#e74c3c", "#9b59b6", "#f1c40f"]
)
fig.update_layout(height=500)
st.plotly_chart(fig, use_container_width=True)
st.dataframe(model_comp.round(3), use_container_width=True)
except:
st.info("Model comparison data not available.")
# Confusion matrix
st.subheader("Confusion Matrix (Best Model)")
preds = data["model_preds"]
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(preds["actual_churn"], preds["rf_pred"])
fig = px.imshow(
cm, text_auto=True,
labels=dict(x="Predicted", y="Actual", color="Count"),
x=["Active", "Churned"], y=["Active", "Churned"],
color_continuous_scale="Blues"
)
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
with tab2:
st.subheader("Top Churn Drivers")
try:
# Feature importance from Phase 5 summary or recompute
if data.get("model") and hasattr(data["model"], "feature_importances_"):
importances = data["model"].feature_importances_
feat_names = data["feature_names"]
imp_df = pd.DataFrame({
"Feature": [f.replace("_scaled", "") for f in feat_names],
"Importance": importances
}).sort_values("Importance", ascending=False).head(15)
fig = px.bar(
imp_df, x="Importance", y="Feature", orientation="h",
color="Importance", color_continuous_scale="Viridis",
title="Top 15 Feature Importances (Random Forest)"
)
fig.update_layout(height=500, yaxis=dict(autorange="reversed"))
st.plotly_chart(fig, use_container_width=True)
else:
st.info("Feature importance data not available.")
except Exception as e:
st.error(f"Could not load feature importance: {e}")
with tab3:
st.subheader("Customer Explanations")
try:
explanations = data["explanations"]
cust_search = st.text_input("Search Customer ID:")
if cust_search:
exp = explanations[explanations["customer_id"].str.contains(cust_search, case=False, na=False)]
else:
exp = explanations.head(10)
for _, row in exp.iterrows():
with st.expander(f"Customer {row['customer_id']} | P(churn)={row['predicted_churn_prob']:.1%} | {'🔴 CHURNED' if row['actual_churn']==1 else '🟢 ACTIVE'}"):
st.markdown(f"**Top Risk Factors:** {row['top_reasons_for_churn']}")
st.markdown(f"**Loyalty Signs:** {row['top_reasons_against_churn']}")
except:
st.info("Explanation data not available.")
# ─────────────────────────────────────────────────────────────────────────────
# FOOTER
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #9ca3af; padding: 1rem;">
<small>Customer Churn Intelligence Dashboard | Built with Streamlit | Data: Synthetic (5,000 customers, 33,037 transactions)</small>
</div>
""", unsafe_allow_html=True)