Spaces:

plplpl183
/

churn-intelligence-dashboard

Sleeping

App Files Files Community

churn-intelligence-dashboard / app.py

plplpl183

Upload app.py with huggingface_hub

4b8eb0d verified about 1 month ago

raw

history blame contribute delete

45.4 kB

	"""
	╔══════════════════════════════════════════════════════════════════════════════╗
	║ PHASE 9: INTERACTIVE STREAMLIT DASHBOARD ║
	║ Customer Churn Prediction & Sales Intelligence Dashboard ║
	╚══════════════════════════════════════════════════════════════════════════════╝

	═══════════════════════════════════════════════════════════════════════════════
	HOW TO RUN THIS DASHBOARD:
	═══════════════════════════════════════════════════════════════════════════════
	1. Install dependencies: pip install streamlit plotly
	2. Run: streamlit run dashboard.py
	3. Open browser at http://localhost:8501

	═══════════════════════════════════════════════════════════════════════════════
	DASHBOARD ARCHITECTURE:
	═══════════════════════════════════════════════════════════════════════════════
	This dashboard connects ALL phases into a single business-facing product:

	• Overview Tab: KPIs, churn rate, revenue, at-risk alerts
	• Predict Tab: Interactive churn prediction (input → risk score + reasons)
	• Segments Tab: Customer segments (VIP/Loyal/At Risk/Lost) with drill-down
	• Sales Tab: Time-series trends, top products, growth rates
	• Recommendations: Priority queue of customers needing action
	• Model Health: Model performance, feature importance, data drift
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import json
	import joblib
	from pathlib import Path
	import warnings
	warnings.filterwarnings("ignore")

	# ─────────────────────────────────────────────────────────────────────────────
	# PAGE CONFIGURATION
	# ─────────────────────────────────────────────────────────────────────────────
	st.set_page_config(
	page_title="Customer Churn Intelligence Dashboard",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# ─────────────────────────────────────────────────────────────────────────────
	# CUSTOM CSS FOR POLISHED LOOK
	# ─────────────────────────────────────────────────────────────────────────────
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: 700;
	color: #1f2937;
	margin-bottom: 0.5rem;
	}
	.sub-header {
	font-size: 1.2rem;
	color: #6b7280;
	margin-bottom: 2rem;
	}
	.kpi-card {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 12px;
	padding: 1.5rem;
	color: white;
	text-align: center;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	}
	.kpi-card-green {
	background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
	}
	.kpi-card-red {
	background: linear-gradient(135deg, #cb2d3e 0%, #ef473a 100%);
	}
	.kpi-card-orange {
	background: linear-gradient(135deg, #f7971e 0%, #ffd200 100%);
	}
	.kpi-value {
	font-size: 2.2rem;
	font-weight: 700;
	margin: 0;
	}
	.kpi-label {
	font-size: 0.9rem;
	opacity: 0.9;
	margin-top: 0.3rem;
	}
	.alert-critical {
	background-color: #fee2e2;
	border-left: 4px solid #dc2626;
	padding: 1rem;
	border-radius: 4px;
	margin: 0.5rem 0;
	}
	.alert-warning {
	background-color: #fef3c7;
	border-left: 4px solid #f59e0b;
	padding: 1rem;
	border-radius: 4px;
	margin: 0.5rem 0;
	}
	.stTabs [data-baseweb="tab-list"] {
	gap: 8px;
	}
	.stTabs [data-baseweb="tab"] {
	padding: 10px 20px;
	border-radius: 8px 8px 0 0;
	}
	</style>
	""", unsafe_allow_html=True)

	# ─────────────────────────────────────────────────────────────────────────────
	# DATA LOADING (cached)
	# ─────────────────────────────────────────────────────────────────────────────
	@st.cache_data
	def load_data():
	"""Load all pre-computed datasets from previous phases."""
	data = {}

	# Core datasets
	data["customers"] = pd.read_csv("/app/deploy/data/customer_data.csv")
	data["transactions"] = pd.read_csv("/app/deploy/data/transaction_data.csv", parse_dates=["transaction_date"])
	data["products"] = pd.read_csv("/app/deploy/data/product_data.csv")

	# Phase 4: Model predictions
	data["model_preds"] = pd.read_csv("/app/deploy/data/all_model_predictions.csv")
	data["test_risk"] = pd.read_csv("/app/outputs/phase4/test_set_risk_scores.csv")

	# Phase 5: Explanations
	data["explanations"] = pd.read_csv("/app/deploy/data/customer_churn_explanations.csv")

	# Phase 6: Segmentation
	data["segmented"] = pd.read_csv("/app/deploy/data/customers_segmented.csv")
	data["segment_summary"] = pd.read_csv("/app/deploy/data/segment_summary.csv")

	# Phase 7: Sales
	data["monthly_sales"] = pd.read_csv("/app/deploy/data/monthly_sales.csv", parse_dates=["period"])
	data["product_revenue"] = pd.read_csv("/app/deploy/data/product_revenue.csv")
	data["category_revenue"] = pd.read_csv("/app/deploy/data/category_revenue.csv")

	with open("/app/deploy/data/sales_kpis.json", "r") as f:
	data["sales_kpis"] = json.load(f)

	# Phase 8: Recommendations
	data["recommendations"] = pd.read_csv("/app/deploy/data/customer_recommendations.csv")
	data["priority_queue"] = pd.read_csv("/app/deploy/data/priority_queue_top100.csv")

	with open("/app/deploy/data/campaign_roi_summary.json", "r") as f:
	data["campaign_roi"] = json.load(f)

	# Model
	try:
	data["model"] = joblib.load("/app/deploy/models/best_churn_model.joblib")
	data["scaler"] = joblib.load("/app/deploy/models/scaler.joblib")
	data["feature_names"] = joblib.load("/app/deploy/models/feature_names.joblib")
	except Exception as e:
	st.warning(f"Model loading issue: {e}")
	data["model"] = None

	return data

	try:
	data = load_data()
	DATA_LOADED = True
	except Exception as e:
	st.error(f"Failed to load data: {e}")
	DATA_LOADED = False
	data = {}

	# ─────────────────────────────────────────────────────────────────────────────
	# HELPER FUNCTIONS
	# ─────────────────────────────────────────────────────────────────────────────
	def get_churn_risk_color(score):
	if score < 25: return "#2ecc71", "Low"
	elif score < 50: return "#f1c40f", "Medium"
	elif score < 75: return "#e67e22", "High"
	else: return "#e74c3c", "Critical"

	def format_currency(val):
	return f"${val:,.0f}" if val >= 1000 else f"${val:,.2f}"

	# ─────────────────────────────────────────────────────────────────────────────
	# SIDEBAR
	# ─────────────────────────────────────────────────────────────────────────────
	with st.sidebar:
	st.image("https://img.icons8.com/color/96/analytics.png", width=60)
	st.title("Churn Intelligence")
	st.markdown("---")
	st.markdown("Navigation")

	page = st.radio("Go to:", [
	"📊 Overview & KPIs",
	"🔮 Churn Predictor",
	"👥 Customer Segments",
	"📈 Sales Analytics",
	"🎯 Action Recommendations",
	"🧠 Model Performance"
	])

	st.markdown("---")
	st.markdown("About")
	st.info("""
	This dashboard combines churn prediction, customer segmentation,
	sales analytics, and automated recommendations into a single
	business intelligence platform.

	Data: 5,000 customers \| 33,037 transactions \| 150 products
	""")

	if st.button("🔄 Refresh Data"):
	st.cache_data.clear()
	st.rerun()

	# ─────────────────────────────────────────────────────────────────────────────
	# MAIN CONTENT AREA
	# ─────────────────────────────────────────────────────────────────────────────

	# ═══════════════════════════════════════════════════════════════════════════════
	# PAGE 1: OVERVIEW & KPIs
	# ═══════════════════════════════════════════════════════════════════════════════
	if page == "📊 Overview & KPIs":
	st.markdown('<div class="main-header">Customer Churn Intelligence Dashboard</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-header">Real-time insights into customer health, revenue, and retention</div>', unsafe_allow_html=True)

	if not DATA_LOADED:
	st.error("Data not loaded. Please run all phases first.")
	st.stop()

	# KPI ROW
	col1, col2, col3, col4, col5 = st.columns(5)

	with col1:
	churn_rate = data["customers"]["churned"].mean() * 100
	st.markdown(f"""
	<div class="kpi-card kpi-card-red">
	<div class="kpi-value">{churn_rate:.1f}%</div>
	<div class="kpi-label">Churn Rate</div>
	</div>
	""", unsafe_allow_html=True)

	with col2:
	total_revenue = data["transactions"]["total_amount"].sum()
	st.markdown(f"""
	<div class="kpi-card">
	<div class="kpi-value">{format_currency(total_revenue)}</div>
	<div class="kpi-label">Total Revenue</div>
	</div>
	""", unsafe_allow_html=True)

	with col3:
	total_customers = len(data["customers"])
	st.markdown(f"""
	<div class="kpi-card kpi-card-green">
	<div class="kpi-value">{total_customers:,}</div>
	<div class="kpi-label">Total Customers</div>
	</div>
	""", unsafe_allow_html=True)

	with col4:
	at_risk = len(data["recommendations"][data["recommendations"]["priority"] >= 7])
	st.markdown(f"""
	<div class="kpi-card kpi-card-orange">
	<div class="kpi-value">{at_risk:,}</div>
	<div class="kpi-label">High Priority Actions</div>
	</div>
	""", unsafe_allow_html=True)

	with col5:
	revenue_at_stake = data["recommendations"]["revenue_at_stake"].sum()
	st.markdown(f"""
	<div class="kpi-card kpi-card-red">
	<div class="kpi-value">{format_currency(revenue_at_stake)}</div>
	<div class="kpi-label">Revenue at Stake</div>
	</div>
	""", unsafe_allow_html=True)

	st.markdown("---")

	# ALERTS SECTION
	st.subheader("🚨 Live Alerts")

	# Critical alerts
	critical_count = len(data["recommendations"][data["recommendations"]["risk_level"] == "Critical"])
	vip_critical = len(data["recommendations"][
	(data["recommendations"]["segment_label"] == "VIP") &
	(data["recommendations"]["risk_level"] == "Critical")
	])

	col_alert1, col_alert2 = st.columns(2)

	with col_alert1:
	st.markdown(f"""
	<div class="alert-critical">
	<strong>⚠️ {critical_count:,} customers at CRITICAL churn risk</strong><br>
	Immediate action required. These customers have >75% churn probability.
	</div>
	""", unsafe_allow_html=True)

	with col_alert2:
	st.markdown(f"""
	<div class="alert-warning">
	<strong>💎 {vip_critical:,} VIP customers need urgent attention</strong><br>
	High-value customers at risk. Executive intervention recommended.
	</div>
	""", unsafe_allow_html=True)

	# Quick insight cards
	st.markdown("---")
	st.subheader("📋 Quick Insights")

	col_q1, col_q2, col_q3 = st.columns(3)

	with col_q1:
	seg_dist = data["segmented"]["segment_label"].value_counts()
	fig = px.pie(
	values=seg_dist.values, names=seg_dist.index,
	color=seg_dist.index,
	color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71",
	"At Risk": "#e67e22", "Lost": "#e74c3c"},
	hole=0.4
	)
	fig.update_layout(showlegend=True, margin=dict(t=0, b=0, l=0, r=0),
	height=300, title="Customer Segments")
	st.plotly_chart(fig, use_container_width=True)

	with col_q2:
	monthly = data["monthly_sales"].tail(12)
	fig = px.area(
	monthly, x="period", y="revenue",
	color_discrete_sequence=["#3498db"],
	title="Revenue Trend (Last 12 Months)"
	)
	fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0))
	st.plotly_chart(fig, use_container_width=True)

	with col_q3:
	risk_dist = data["recommendations"]["risk_level"].value_counts()
	risk_order = ["Low", "Medium", "High", "Critical"]
	risk_dist = risk_dist.reindex(risk_order).fillna(0)
	fig = px.bar(
	x=risk_dist.index, y=risk_dist.values,
	color=risk_dist.index,
	color_discrete_map={"Low": "#2ecc71", "Medium": "#f1c40f",
	"High": "#e67e22", "Critical": "#e74c3c"},
	title="Risk Distribution"
	)
	fig.update_layout(height=300, margin=dict(t=30, b=0, l=0, r=0), showlegend=False)
	st.plotly_chart(fig, use_container_width=True)

	# ═══════════════════════════════════════════════════════════════════════════════
	# PAGE 2: CHURN PREDICTOR
	# ═══════════════════════════════════════════════════════════════════════════════
	elif page == "🔮 Churn Predictor":
	st.markdown('<div class="main-header">Churn Risk Predictor</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-header">Input customer data to predict churn probability and get actionable insights</div>', unsafe_allow_html=True)

	if not DATA_LOADED or data.get("model") is None:
	st.warning("⚠️ Model not available. Please run Phase 4 first.")
	st.stop()

	# TWO MODES: Single Customer or Batch Upload
	pred_mode = st.radio("Prediction Mode:", ["Single Customer", "Batch Upload (CSV)"], horizontal=True)

	if pred_mode == "Single Customer":
	# Use an existing customer as template
	sample_customers = data["customers"]["customer_id"].sample(5, random_state=42).tolist()
	selected_customer = st.selectbox("Select a customer (or enter ID manually):",
	["Manual Entry"] + sample_customers)

	if selected_customer != "Manual Entry":
	# Pre-fill with existing customer data
	cust_row = data["customers"][data["customers"]["customer_id"] == selected_customer].iloc[0]
	default_age = int(cust_row["age"])
	default_tickets = int(cust_row["support_tickets_6m"])
	default_email = float(cust_row["email_open_rate"])
	default_web = int(cust_row["web_sessions_30d"])
	else:
	default_age, default_tickets, default_email, default_web = 35, 1, 0.3, 5

	st.markdown("---")

	# Input form
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📋 Customer Profile")
	age = st.slider("Age", 18, 80, default_age)
	gender = st.selectbox("Gender", ["M", "F", "Other"])
	region = st.selectbox("Region", ["North", "South", "East", "West", "Central"])
	segment = st.selectbox("Business Segment", ["Retail", "SMB", "Enterprise"])
	channel = st.selectbox("Acquisition Channel", ["Online", "In-Store", "Mobile App", "Phone"])
	ltv_baseline = st.number_input("Baseline LTV ($)", 0.0, 15000.0, 500.0)

	with col2:
	st.subheader("📊 Behavioral Data")
	support_tickets = st.slider("Support Tickets (6m)", 0, 10, default_tickets)
	email_rate = st.slider("Email Open Rate", 0.0, 1.0, default_email)
	web_sessions = st.slider("Web Sessions (30d)", 0, 30, default_web)
	recency = st.slider("Days Since Last Purchase", 0, 500, 120)
	total_spent = st.number_input("Total Spent ($)", 0.0, 20000.0, 800.0)
	txn_count = st.slider("Transaction Count", 0, 50, 8)

	st.markdown("---")

	if st.button("🔮 Predict Churn Risk", type="primary", use_container_width=True):
	# Build feature vector
	# Create a DataFrame with same structure as training
	feature_names = data["feature_names"]

	# Compute derived features
	tenure_days = 365 # placeholder
	purchase_frequency = txn_count / (tenure_days / 30 + 1)
	avg_txn_value = total_spent / (txn_count + 1)
	rfm_score = 8 # placeholder
	engagement = (email_rate * 0.35 + min(web_sessions/20, 1) * 0.4 +
	(1 - min(support_tickets/5, 1)) * 0.25)

	input_dict = {
	"age": age,
	"lifetime_value_baseline": ltv_baseline,
	"support_tickets_6m": support_tickets,
	"email_open_rate": email_rate,
	"web_sessions_30d": web_sessions,
	"total_spent": total_spent,
	"txn_count": txn_count,
	"avg_order_value": avg_txn_value,
	"max_order_value": avg_txn_value * 1.5,
	"min_order_value": avg_txn_value * 0.5,
	"std_order_value": avg_txn_value * 0.3,
	"total_quantity": txn_count * 2,
	"total_discount": txn_count * 0.02,
	"unique_products": min(txn_count, 20),
	"recency_days": recency,
	"R_score": max(1, 6 - recency // 60),
	"F_score": min(5, max(1, txn_count // 3)),
	"M_score": min(5, max(1, int(total_spent // 500))),
	"RFM_score": 10, # placeholder
	"tenure_days": tenure_days,
	"days_between_first_last": tenure_days,
	"purchase_frequency": purchase_frequency,
	"avg_transaction_value": avg_txn_value,
	"engagement_score": engagement,
	"spending_trend_ratio": 1.0,
	"purchase_regularity": 1.0,
	"category_diversity": min(txn_count, 10),
	"discount_sensitivity": 0.02,
	}

	# Gender dummies
	for g in ["F", "M", "Other"]:
	input_dict[f"gender_{g}"] = 1 if gender == g else 0

	# Region dummies
	for r in ["Central", "East", "North", "South", "West"]:
	input_dict[f"region_{r}"] = 1 if region == r else 0

	# Segment dummies
	for s in ["Enterprise", "Retail", "SMB"]:
	input_dict[f"segment_{s}"] = 1 if segment == s else 0

	# Channel dummies
	for c in ["In-Store", "Mobile App", "Online", "Phone"]:
	input_dict[f"acquisition_channel_{c}"] = 1 if channel == c else 0

	# Create feature vector
	X_input = pd.DataFrame([{k: input_dict.get(k, 0) for k in feature_names}])
	X_scaled = data["scaler"].transform(X_input)

	# Predict
	proba = data["model"].predict_proba(X_scaled)[0][1]
	risk_score = proba * 100
	color, risk_label = get_churn_risk_color(risk_score)
	prediction = 1 if risk_score >= 50 else 0

	# DISPLAY RESULTS
	st.markdown("---")

	res_col1, res_col2, res_col3 = st.columns([1, 2, 1])

	with res_col2:
	st.markdown(f"""
	<div style="text-align: center; padding: 2rem; background: {color}20; border-radius: 16px; border: 3px solid {color};">
	<div style="font-size: 1.2rem; color: {color}; font-weight: 600;">Churn Risk: {risk_label}</div>
	<div style="font-size: 4rem; font-weight: 800; color: {color}; margin: 0.5rem 0;">{risk_score:.1f}%</div>
	<div style="font-size: 1rem; color: #666;">Predicted Outcome: {'🔴 CHURN' if prediction == 1 else '🟢 RETAIN'}</div>
	</div>
	""", unsafe_allow_html=True)

	# Risk meter
	fig = go.Figure(go.Indicator(
	mode="gauge+number",
	value=risk_score,
	domain={'x': [0, 1], 'y': [0, 1]},
	gauge={
	'axis': {'range': [0, 100]},
	'bar': {'color': color},
	'steps': [
	{'range': [0, 25], 'color': '#d5f5e3'},
	{'range': [25, 50], 'color': '#fcf3cf'},
	{'range': [50, 75], 'color': '#f5cba7'},
	{'range': [75, 100], 'color': '#f5b7b1'}
	],
	'threshold': {
	'line': {'color': 'black', 'width': 4},
	'thickness': 0.75,
	'value': risk_score
	}
	}
	))
	fig.update_layout(height=300, margin=dict(t=0, b=0, l=0, r=0))
	st.plotly_chart(fig, use_container_width=True)

	# Reasons
	st.subheader("📊 Key Risk Factors")

	reasons = []
	if recency > 90:
	reasons.append(("No purchase in 90+ days", "HIGH", f"{recency} days since last purchase"))
	if txn_count < 3:
	reasons.append(("Very few transactions", "HIGH", f"Only {txn_count} total purchases"))
	if total_spent < 200:
	reasons.append(("Low lifetime spend", "MEDIUM", f"${total_spent:.0f} total spent"))
	if email_rate < 0.2:
	reasons.append(("Low email engagement", "MEDIUM", f"{email_rate:.1%} open rate"))
	if support_tickets > 3:
	reasons.append(("High support activity", "MEDIUM", f"{support_tickets} recent tickets"))

	if not reasons:
	reasons.append(("Overall profile stable", "LOW", "No major risk flags detected"))

	for reason, severity, detail in reasons:
	sev_color = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}[severity]
	st.markdown(f"""
	<div style="padding: 0.75rem; background: #f8f9fa; border-radius: 8px; margin: 0.3rem 0;">
	<strong>{sev_color} {reason}</strong> — {detail}
	</div>
	""", unsafe_allow_html=True)

	# Recommendation
	st.subheader("🎯 Recommended Action")
	if risk_score >= 75:
	st.error("🚨 URGENT: Executive-level intervention recommended. Personal call + exclusive offer.")
	elif risk_score >= 50:
	st.warning("⚠️ HIGH PRIORITY: Re-engagement campaign + targeted discount.")
	elif risk_score >= 25:
	st.info("ℹ️ MONITOR: Increase touchpoints. Watch for further decline.")
	else:
	st.success("✅ HEALTHY: Consider upsell/cross-sell opportunities.")

	else: # Batch mode
	st.info("Upload a CSV with customer features to predict churn risk for multiple customers.")
	uploaded = st.file_uploader("Upload CSV", type=["csv"])

	if uploaded:
	batch_df = pd.read_csv(uploaded)
	st.write(f"📄 Loaded {len(batch_df)} customers")
	st.dataframe(batch_df.head())
	st.info("Batch prediction coming soon — single customer mode is fully functional above.")

	# ═══════════════════════════════════════════════════════════════════════════════
	# PAGE 3: CUSTOMER SEGMENTS
	# ═══════════════════════════════════════════════════════════════════════════════
	elif page == "👥 Customer Segments":
	st.markdown('<div class="main-header">Customer Segmentation</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-header">Explore VIP, Loyal, At Risk, and Lost customer segments</div>', unsafe_allow_html=True)

	if not DATA_LOADED:
	st.error("Data not loaded.")
	st.stop()

	seg = data["segmented"]

	# Segment selector
	selected_segments = st.multiselect(
	"Filter Segments:",
	["VIP", "Loyal", "At Risk", "Lost"],
	default=["VIP", "Loyal", "At Risk", "Lost"]
	)

	filtered = seg[seg["segment_label"].isin(selected_segments)]

	col1, col2 = st.columns(2)

	with col1:
	# 3D-ish scatter: Recency vs Spend, colored by segment
	fig = px.scatter(
	filtered, x="recency_days", y="total_spent",
	color="segment_label", size="txn_count",
	hover_data=["customer_id", "RFM_score", "engagement_score"],
	color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71",
	"At Risk": "#e67e22", "Lost": "#e74c3c"},
	title="Customers: Recency vs Lifetime Spend",
	labels={"recency_days": "Days Since Last Purchase", "total_spent": "Total Spent ($)"}
	)
	fig.update_layout(height=500)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# RFM distribution by segment
	fig = px.box(
	filtered, x="segment_label", y="RFM_score",
	color="segment_label",
	color_discrete_map={"VIP": "#FFD700", "Loyal": "#2ecc71",
	"At Risk": "#e67e22", "Lost": "#e74c3c"},
	title="RFM Score Distribution by Segment",
	category_orders={"segment_label": ["VIP", "Loyal", "At Risk", "Lost"]}
	)
	fig.update_layout(height=500, showlegend=False)
	st.plotly_chart(fig, use_container_width=True)

	# Segment metrics table
	st.subheader("📊 Segment Metrics")

	metrics = filtered.groupby("segment_label").agg(
	Count=("customer_id", "count"),
	Avg_Spend=("total_spent", "mean"),
	Avg_Recency=("recency_days", "mean"),
	Avg_Frequency=("txn_count", "mean"),
	Avg_RFM=("RFM_score", "mean"),
	Churn_Rate=("churned", "mean"),
	).round(2)

	metrics["Churn_Rate"] = (metrics["Churn_Rate"] * 100).round(1).astype(str) + "%"
	metrics["Avg_Spend"] = "$" + metrics["Avg_Spend"].round(0).astype(int).astype(str)

	st.dataframe(metrics, use_container_width=True)

	# Customer table with search
	st.subheader("🔍 Customer Explorer")

	search_id = st.text_input("Search Customer ID (e.g., C00001):")
	if search_id:
	cust = seg[seg["customer_id"].str.contains(search_id, case=False, na=False)]
	else:
	cust = filtered.sample(min(20, len(filtered)), random_state=42)

	display_cols = ["customer_id", "segment_label", "recency_days", "txn_count",
	"total_spent", "RFM_score", "engagement_score", "churned"]
	st.dataframe(cust[display_cols].reset_index(drop=True), use_container_width=True)

	# ═══════════════════════════════════════════════════════════════════════════════
	# PAGE 4: SALES ANALYTICS
	# ═══════════════════════════════════════════════════════════════════════════════
	elif page == "📈 Sales Analytics":
	st.markdown('<div class="main-header">Sales Analytics</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-header">Revenue trends, product performance, and growth metrics</div>', unsafe_allow_html=True)

	if not DATA_LOADED:
	st.error("Data not loaded.")
	st.stop()

	tab1, tab2, tab3, tab4 = st.tabs(["📊 Revenue Trends", "🏆 Top Products", "📅 Seasonality", "📈 Growth"])

	with tab1:
	monthly = data["monthly_sales"]

	fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
	subplot_titles=("Monthly Revenue", "Monthly Orders"),
	vertical_spacing=0.1)

	fig.add_trace(
	go.Scatter(x=monthly["period"], y=monthly["revenue"],
	mode="lines+markers", name="Revenue", line=dict(color="#3498db")),
	row=1, col=1
	)
	fig.add_trace(
	go.Scatter(x=monthly["period"], y=monthly["orders"],
	mode="lines+markers", name="Orders", line=dict(color="#e67e22")),
	row=2, col=1
	)

	fig.update_layout(height=600, showlegend=False,
	title_text="Revenue & Order Trends Over Time")
	st.plotly_chart(fig, use_container_width=True)

	# KPI row
	kpi_col1, kpi_col2, kpi_col3 = st.columns(3)
	with kpi_col1:
	st.metric("Total Revenue", data["sales_kpis"]["Total Revenue (All Time)"])
	with kpi_col2:
	st.metric("Avg Order Value", data["sales_kpis"]["Average Order Value"])
	with kpi_col3:
	st.metric("Latest MoM Growth", data["sales_kpis"]["Revenue Growth (MoM, latest)"])

	with tab2:
	top_n = st.slider("Show Top N Products", 5, 50, 10)
	top_products = data["product_revenue"].head(top_n)

	fig = px.bar(
	top_products, x="revenue", y="product_name",
	orientation="h", color="category",
	title=f"Top {top_n} Products by Revenue",
	labels={"revenue": "Revenue ($)", "product_name": ""}
	)
	fig.update_layout(height=500, yaxis=dict(autorange="reversed"))
	st.plotly_chart(fig, use_container_width=True)

	# Category breakdown
	fig2 = px.pie(
	data["category_revenue"], values="revenue", names="category",
	title="Revenue by Category", hole=0.4
	)
	fig2.update_layout(height=400)
	st.plotly_chart(fig2, use_container_width=True)

	with tab3:
	# Seasonal patterns
	seasonal = data["transactions"].copy()
	seasonal["month"] = seasonal["transaction_date"].dt.month
	seasonal["dayofweek"] = seasonal["transaction_date"].dt.dayofweek
	seasonal["quarter"] = seasonal["transaction_date"].dt.quarter

	month_pattern = seasonal.groupby("month")["total_amount"].sum().reset_index()
	month_pattern["month_name"] = pd.to_datetime(month_pattern["month"], format="%m").dt.strftime("%b")

	dow_pattern = seasonal.groupby("dayofweek")["total_amount"].sum().reset_index()
	dow_pattern["day_name"] = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

	col_s1, col_s2 = st.columns(2)

	with col_s1:
	fig = px.bar(
	month_pattern, x="month_name", y="total_amount",
	color_discrete_sequence=["#3498db"],
	title="Revenue by Month (Seasonality)",
	labels={"total_amount": "Revenue ($)", "month_name": "Month"}
	)
	fig.update_layout(height=350)
	st.plotly_chart(fig, use_container_width=True)

	with col_s2:
	fig = px.bar(
	dow_pattern, x="day_name", y="total_amount",
	color_discrete_sequence=["#e67e22"],
	title="Revenue by Day of Week",
	labels={"total_amount": "Revenue ($)", "day_name": "Day"}
	)
	fig.update_layout(height=350)
	st.plotly_chart(fig, use_container_width=True)

	with tab4:
	monthly = data["monthly_sales"].copy()
	monthly["revenue_mom"] = monthly["revenue"].pct_change() * 100
	monthly["revenue_3ma"] = monthly["revenue"].rolling(3, min_periods=1).mean()

	fig = go.Figure()
	fig.add_trace(go.Bar(
	x=monthly["period"], y=monthly["revenue_mom"],
	name="MoM Growth %", marker_color="#3498db"
	))
	fig.add_trace(go.Scatter(
	x=monthly["period"], y=monthly["revenue_3ma"],
	name="3-Month MA ($)", yaxis="y2", line=dict(color="#e74c3c")
	))

	fig.update_layout(
	title="Month-over-Month Growth Rate",
	yaxis=dict(title="MoM Growth (%)"),
	yaxis2=dict(title="Revenue ($)", overlaying="y", side="right"),
	height=400, legend=dict(orientation="h", yanchor="bottom", y=1.02)
	)
	st.plotly_chart(fig, use_container_width=True)

	# ═══════════════════════════════════════════════════════════════════════════════
	# PAGE 5: ACTION RECOMMENDATIONS
	# ═══════════════════════════════════════════════════════════════════════════════
	elif page == "🎯 Action Recommendations":
	st.markdown('<div class="main-header">Action Recommendations</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-header">Priority queue of customers needing immediate business action</div>', unsafe_allow_html=True)

	if not DATA_LOADED:
	st.error("Data not loaded.")
	st.stop()

	recs = data["recommendations"]

	# Filters
	col_f1, col_f2, col_f3 = st.columns(3)
	with col_f1:
	seg_filter = st.multiselect("Segment:", recs["segment_label"].unique(), default=recs["segment_label"].unique())
	with col_f2:
	risk_filter = st.multiselect("Risk Level:", recs["risk_level"].unique(), default=recs["risk_level"].unique())
	with col_f3:
	priority_min = st.slider("Min Priority Score:", 0, 130, 50)

	filtered_recs = recs[
	(recs["segment_label"].isin(seg_filter)) &
	(recs["risk_level"].isin(risk_filter)) &
	(recs["final_priority_score"] >= priority_min)
	].sort_values("final_priority_score", ascending=False)

	st.markdown(f"{len(filtered_recs):,} customers match your filters")

	# Priority stats
	stat_col1, stat_col2, stat_col3, stat_col4 = st.columns(4)
	with stat_col1:
	st.metric("Total at Risk", len(filtered_recs))
	with stat_col2:
	st.metric("Revenue at Stake", f"${filtered_recs['revenue_at_stake'].sum():,.0f}")
	with stat_col3:
	st.metric("Est. Campaign Cost", f"${filtered_recs['cost_per_customer'].sum():,.0f}")
	with stat_col4:
	st.metric("Est. ROI", f"{((filtered_recs['expected_revenue_saved'].sum() - filtered_recs['cost_per_customer'].sum()) / max(filtered_recs['cost_per_customer'].sum(), 1) * 100):.0f}%")

	st.markdown("---")

	# Display priority table
	display_cols = [
	"customer_id", "segment_label", "risk_level", "churn_risk_score",
	"total_spent", "revenue_at_stake", "final_priority_score",
	"recommended_action", "recommended_channel", "recommended_offer",
	"urgency", "behavioral_enhancements"
	]

	st.dataframe(
	filtered_recs[display_cols].head(100),
	use_container_width=True,
	column_config={
	"churn_risk_score": st.column_config.ProgressColumn("Risk %", min_value=0, max_value=100, format="%.0f%%"),
	"final_priority_score": st.column_config.NumberColumn("Priority", format="%.1f"),
	"revenue_at_stake": st.column_config.NumberColumn("Revenue at Stake", format="$%.0f"),
	}
	)

	# Campaign ROI summary
	st.markdown("---")
	st.subheader("💰 Campaign ROI Summary")

	roi = data["campaign_roi"]
	col_roi1, col_roi2 = st.columns(2)

	with col_roi1:
	st.markdown("Full Campaign (All Customers)")
	fc = roi["full_campaign"]
	st.write(f"- Customers: {fc['customers']:,}")
	st.write(f"- Revenue at Stake: ${fc['revenue_at_stake']:,.0f}")
	st.write(f"- Campaign Cost: ${fc['campaign_cost']:,.0f}")
	st.write(f"- Expected Saved: ${fc['expected_revenue_saved']:,.0f}")
	st.write(f"- ROI: {fc['roi_percent']:.0f}%")

	with col_roi2:
	st.markdown("High Priority Only (Priority ≥ 7)")
	hp = roi["high_priority_campaign"]
	st.write(f"- Customers: {hp['customers']:,}")
	st.write(f"- Revenue at Stake: ${hp['revenue_at_stake']:,.0f}")
	st.write(f"- Campaign Cost: ${hp['campaign_cost']:,.0f}")
	st.write(f"- Expected Saved: ${hp['expected_revenue_saved']:,.0f}")
	st.write(f"- ROI: {hp['roi_percent']:.0f}%")

	# ═══════════════════════════════════════════════════════════════════════════════
	# PAGE 6: MODEL PERFORMANCE
	# ═══════════════════════════════════════════════════════════════════════════════
	elif page == "🧠 Model Performance":
	st.markdown('<div class="main-header">Model Performance & Explainability</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-header">Churn prediction accuracy, feature importance, and model health</div>', unsafe_allow_html=True)

	if not DATA_LOADED:
	st.error("Data not loaded.")
	st.stop()

	tab1, tab2, tab3 = st.tabs(["📊 Model Metrics", "🔍 Feature Importance", "📋 Explanations"])

	with tab1:
	# Model comparison
	try:
	model_comp = pd.read_csv("/app/outputs/phase4/model_comparison.csv")

	fig = px.bar(
	model_comp, x="Model", y=["Accuracy", "Precision", "Recall", "F1-Score", "AUC-ROC"],
	barmode="group", title="Model Comparison",
	color_discrete_sequence=["#3498db", "#2ecc71", "#e74c3c", "#9b59b6", "#f1c40f"]
	)
	fig.update_layout(height=500)
	st.plotly_chart(fig, use_container_width=True)

	st.dataframe(model_comp.round(3), use_container_width=True)
	except:
	st.info("Model comparison data not available.")

	# Confusion matrix
	st.subheader("Confusion Matrix (Best Model)")

	preds = data["model_preds"]
	from sklearn.metrics import confusion_matrix
	cm = confusion_matrix(preds["actual_churn"], preds["rf_pred"])

	fig = px.imshow(
	cm, text_auto=True,
	labels=dict(x="Predicted", y="Actual", color="Count"),
	x=["Active", "Churned"], y=["Active", "Churned"],
	color_continuous_scale="Blues"
	)
	fig.update_layout(height=400)
	st.plotly_chart(fig, use_container_width=True)

	with tab2:
	st.subheader("Top Churn Drivers")

	try:
	# Feature importance from Phase 5 summary or recompute
	if data.get("model") and hasattr(data["model"], "feature_importances_"):
	importances = data["model"].feature_importances_
	feat_names = data["feature_names"]

	imp_df = pd.DataFrame({
	"Feature": [f.replace("_scaled", "") for f in feat_names],
	"Importance": importances
	}).sort_values("Importance", ascending=False).head(15)

	fig = px.bar(
	imp_df, x="Importance", y="Feature", orientation="h",
	color="Importance", color_continuous_scale="Viridis",
	title="Top 15 Feature Importances (Random Forest)"
	)
	fig.update_layout(height=500, yaxis=dict(autorange="reversed"))
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.info("Feature importance data not available.")
	except Exception as e:
	st.error(f"Could not load feature importance: {e}")

	with tab3:
	st.subheader("Customer Explanations")

	try:
	explanations = data["explanations"]

	cust_search = st.text_input("Search Customer ID:")
	if cust_search:
	exp = explanations[explanations["customer_id"].str.contains(cust_search, case=False, na=False)]
	else:
	exp = explanations.head(10)

	for _, row in exp.iterrows():
	with st.expander(f"Customer {row['customer_id']} \| P(churn)={row['predicted_churn_prob']:.1%} \| {'🔴 CHURNED' if row['actual_churn']==1 else '🟢 ACTIVE'}"):
	st.markdown(f"Top Risk Factors: {row['top_reasons_for_churn']}")
	st.markdown(f"Loyalty Signs: {row['top_reasons_against_churn']}")
	except:
	st.info("Explanation data not available.")

	# ─────────────────────────────────────────────────────────────────────────────
	# FOOTER
	# ─────────────────────────────────────────────────────────────────────────────
	st.markdown("---")
	st.markdown("""
	<div style="text-align: center; color: #9ca3af; padding: 1rem;">
	<small>Customer Churn Intelligence Dashboard \| Built with Streamlit \| Data: Synthetic (5,000 customers, 33,037 transactions)</small>
	</div>
	""", unsafe_allow_html=True)