Spaces:

Divya499
/

segmentx-behavioral-intelligence

Sleeping

segmentx-behavioral-intelligence / app.py

DIVYANSHI SINGH

Feature Recovery: Restored Top Products, Pareto, Churn Probability, and CLV with fixed Plotly color attributes

c2616da 2 months ago

12.7 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from sklearn.decomposition import PCA
	import joblib
	import os
	from datetime import datetime

	# Import paths
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	SCALED_DATA_PATH = os.path.join(BASE_DIR, "data", "processed", "scaled_rfm_data.pkl")
	KMEANS_MODEL_PATH = os.path.join(BASE_DIR, "models", "kmeans_model.pkl")
	CUSTOMER_SEGMENTS_PATH = os.path.join(BASE_DIR, "outputs", "customer_segments.csv")
	SEGMENT_PRODUCTS_PATH = os.path.join(BASE_DIR, "outputs", "segment_products.csv")

	# Set Page Config
	st.set_page_config(
	page_title="SegmentX \| Customer Intelligence Portal",
	page_icon="💎",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# --- Industry-Grade UI Refinement ---
	st.markdown("""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap');

	html, body, [class*="css"] {
	font-family: 'Inter', sans-serif;
	}

	.main {
	background-color: #0f172a;
	color: #f8fafc;
	}

	/* Stabilized Content Wrapper */
	.block-container {
	max-width: 1400px;
	padding: 2rem 5rem !important;
	}

	.stMetric {
	background: rgba(30, 41, 59, 0.7);
	backdrop-filter: blur(8px);
	padding: 24px;
	border-radius: 16px;
	border: 1px solid rgba(148, 163, 184, 0.1);
	box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
	}

	.stMetric label {
	color: #94a3b8 !important;
	font-weight: 500 !important;
	}

	h1, h2, h3 {
	color: #f8fafc;
	letter-spacing: -0.025em;
	}

	/* Brand Header */
	.brand-header {
	display: flex;
	align-items: center;
	gap: 12px;
	padding-bottom: 2rem;
	margin-bottom: 2rem;
	border-bottom: 1px solid rgba(148, 163, 184, 0.1);
	}

	.brand-tag {
	background: #3b82f6;
	color: white;
	padding: 4px 12px;
	border-radius: 20px;
	font-size: 0.8rem;
	font-weight: 600;
	text-transform: uppercase;
	}
	</style>
	""", unsafe_allow_html=True)

	@st.cache_data
	def load_data():
	if not os.path.exists(CUSTOMER_SEGMENTS_PATH):
	return None, None
	df = pd.read_csv(CUSTOMER_SEGMENTS_PATH, index_col='Customer ID')

	# Load raw cleaned data for time-series analysis
	RAW_CLEANED_PATH = os.path.join(BASE_DIR, "data", "processed", "cleaned_retail_data.csv")
	if os.path.exists(RAW_CLEANED_PATH):
	df_raw = pd.read_csv(RAW_CLEANED_PATH, parse_dates=['InvoiceDate'])
	else:
	df_raw = None
	return df, df_raw

	@st.cache_resource
	def load_model():
	if not os.path.exists(KMEANS_MODEL_PATH) or not os.path.exists(SCALED_DATA_PATH):
	return None, None
	model = joblib.load(KMEANS_MODEL_PATH)
	data_dict = joblib.load(SCALED_DATA_PATH)
	return model, data_dict

	@st.cache_data
	def get_pca_data(scaled_data, labels):
	pca = PCA(n_components=2)
	X_pca = pca.fit_transform(scaled_data)
	pca_df = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'], index=scaled_data.index)
	pca_df['Segment'] = labels
	return pca_df

	def main():
	df, df_raw = load_data()
	model, data_dict = load_model()

	if df is None or model is None:
	st.error("Project data or models not found. Please run the pipeline scripts first.")
	return

	# Modern Sidebar
	st.sidebar.markdown("<h2 style='color:#3b82f6'>SegmentX</h2>", unsafe_allow_html=True)
	st.sidebar.markdown("---")
	page = st.sidebar.radio("Console Navigation", ["Overview", "Segment Profiles", "Customer Lookup"])

	segments_list = df['Segment'].unique().tolist()
	selected_segments = st.sidebar.multiselect("Global Segment Filter", segments_list, default=segments_list)
	df_filtered = df[df['Segment'].isin(selected_segments)]

	st.markdown("""
	<div class="brand-header">
	<span class="brand-tag">Intelligence Console</span>
	<h1 style="margin:0">Behavioral Portal <span style="color:#3b82f6; font-weight:300">v2.0</span></h1>
	</div>
	""", unsafe_allow_html=True)

	if page == "Overview":
	# Interactive Overview
	c1, c2, c3, c4 = st.columns(4)
	c1.metric("Revenue Impact", f"£{df_filtered['Monetary'].sum():,.0f}")
	c2.metric("Customer Scale", f"{len(df_filtered):,}")
	c3.metric("Retention Risk", f"{(len(df_filtered[df_filtered['Recency'] > 90]) / len(df_filtered) * 100):.1f}%")
	c4.metric("Avg. Order Value", f"£{df_filtered['Monetary'].mean():,.1f}")

	st.markdown("<br>", unsafe_allow_html=True)

	# Interactive Row 1
	r1_c1, r1_c2 = st.columns([1, 1.2])

	with r1_c1:
	st.markdown("### Segment Distribution")
	counts = df['Segment'].value_counts()
	fig = px.pie(
	values=counts.values,
	names=counts.index,
	hole=0.5,
	color_discrete_sequence=px.colors.sequential.ice_r
	)
	fig.update_layout(
	paper_bgcolor='rgba(0,0,0,0)',
	plot_bgcolor='rgba(0,0,0,0)',
	font_color="#f8fafc",
	margin=dict(t=0, b=0, l=0, r=0),
	legend=dict(orientation="h", yanchor="bottom", y=-0.1, xanchor="center", x=0.5)
	)
	st.plotly_chart(fig, use_container_width=True)

	with r1_c2:
	st.markdown("### 2D Projection Topology")
	pca_df = get_pca_data(data_dict['rfm_scaled'], df['Segment'])
	fig = px.scatter(
	pca_df, x='PCA1', y='PCA2', color='Segment',
	opacity=0.6,
	color_discrete_sequence=px.colors.sequential.ice_r,
	hover_data=[pca_df.index]
	)
	fig.update_layout(
	paper_bgcolor='rgba(0,0,0,0)',
	plot_bgcolor='rgba(15, 23, 42, 0.5)',
	font_color="#f8fafc",
	margin=dict(t=10, b=10, l=10, r=10),
	xaxis=dict(showgrid=False),
	yaxis=dict(showgrid=False)
	)
	st.plotly_chart(fig, use_container_width=True)

	st.markdown("---")
	st.markdown("### 📈 Revenue Benchmarking")
	if df_raw is not None:
	df_raw['Month'] = df_raw['InvoiceDate'].dt.to_period('M').astype(str)
	df_raw['Revenue'] = df_raw['Quantity'] * df_raw['Price']
	monthly_rev = df_raw.groupby('Month')['Revenue'].sum().reset_index()

	fig = px.line(
	monthly_rev, x='Month', y='Revenue',
	color_discrete_sequence=['#3b82f6'],
	render_mode='svg'
	)
	fig.update_layout(
	paper_bgcolor='rgba(0,0,0,0)',
	plot_bgcolor='rgba(15, 23, 42, 0.5)',
	font_color="#f8fafc",
	xaxis_title=None,
	yaxis_title="Total Revenue (GBP)",
	margin=dict(t=20, b=20, l=20, r=20)
	)
	fig.update_traces(line_width=3, fill='tozeroy', fillcolor='rgba(59, 130, 246, 0.1)')
	st.plotly_chart(fig, use_container_width=True)

	st.markdown("<br>", unsafe_allow_html=True)
	st.markdown("### 📊 Revenue Concentration (Pareto)")
	seg_rev = df_filtered.groupby('Segment')['Monetary'].sum().sort_values(ascending=False).reset_index()
	fig_bar = px.bar(
	seg_rev, x='Segment', y='Monetary',
	color='Monetary',
	color_continuous_scale='ice'
	)
	fig_bar.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(15, 23, 42, 0.5)', font_color="#f8fafc")
	st.plotly_chart(fig_bar, use_container_width=True)

	st.markdown("---")
	st.subheader("🚨 Risk Analytics")
	high_risk = len(df_filtered[df_filtered['Recency'] > 90])
	risk_pct = (high_risk / len(df_filtered)) * 100
	if risk_pct > 30:
	st.warning(f"Critical Warning: {risk_pct:.1f}% of selected customers are churn-risk (90+ days inactive).")
	else:
	st.success(f"Healthy Signal: Retention is stable with only {risk_pct:.1f}% churn-risk.")

	st.markdown("<br>", unsafe_allow_html=True)
	st.subheader("📥 Data Export & Actions")
	csv = df_filtered.to_csv().encode('utf-8')
	st.download_button("Export Intelligence Report (CSV)", data=csv, file_name='segmentx_report.csv', mime='text/csv')

	elif page == "Segment Profiles":
	st.subheader("Cluster Behavioral Heatmap")
	profile_stats = df.groupby('Segment')[['Recency', 'Frequency', 'Monetary']].mean()
	profile_norm = (profile_stats - profile_stats.min()) / (profile_stats.max() - profile_stats.min())

	fig = px.imshow(
	profile_norm.T,
	labels=dict(x="Segment", y="Metric", color="Score"),
	color_continuous_scale='Blues'
	)
	fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', font_color="#f8fafc")
	st.plotly_chart(fig, use_container_width=True)

	st.markdown("#### Mean Values Matrix")
	st.table(profile_stats.style.format(lambda x: f"£{x:,.2f}" if x > 100 else f"{x:.2f}"))

	st.markdown("---")
	st.subheader("🛍️ Segment Affinity: Top 10 Products")
	if os.path.exists(SEGMENT_PRODUCTS_PATH):
	all_top_prods = pd.read_csv(SEGMENT_PRODUCTS_PATH)
	display_segs = selected_segments[:3]
	cols = st.columns(len(display_segs)) if display_segs else [st.container()]

	for i, seg in enumerate(display_segs):
	with cols[i]:
	st.markdown(f"{seg}")
	seg_prods = all_top_prods[all_top_prods['Segment'] == seg].head(10)
	if not seg_prods.empty:
	seg_prods['Description'] = seg_prods['Description'].str.slice(0, 30) + '...'
	st.table(seg_prods[['Description', 'Quantity']].set_index('Description'))
	else: st.info("No data.")
	else:
	st.info("Run product pipeline to see affinities.")

	elif page == "Customer Lookup":
	st.subheader("🔍 Intelligent Query")

	if st.button("🎲 Randomized ID Picker"):
	random_id = np.random.choice(df.index)
	st.session_state.customer_lookup_id = int(random_id)

	all_ids = sorted(df.index.unique().tolist())
	if 'customer_lookup_id' not in st.session_state:
	st.session_state.customer_lookup_id = all_ids[0]

	customer_id = st.selectbox(
	"Target Customer ID",
	options=all_ids,
	index=all_ids.index(st.session_state.customer_lookup_id) if st.session_state.customer_lookup_id in all_ids else 0
	)
	st.session_state.customer_lookup_id = customer_id

	cust_data = df.loc[customer_id]

	l1, l2, l3 = st.columns(3)
	l1.metric("Segment Identity", cust_data['Segment'])
	l2.metric("Orders", f"{cust_data['Frequency']:.0f}")
	l3.metric("LTV GBP", f"£{cust_data['Monetary']:,.2f}")

	st.markdown("---")
	st.markdown("### 🛡️ Strategic Intelligence")

	ci1, ci2 = st.columns(2)
	# Churn Probability Logic
	avg_rec = df['Recency'].mean()
	churn_prob = 1 - np.exp(-cust_data['Recency'] / (avg_rec * 1.5))
	churn_pct = min(max(churn_prob * 100, 0), 100)
	ci1.metric("Churn Risk Score", f"{churn_pct:.1f}%")

	# Predicted CLV
	avg_order = cust_data['Monetary'] / cust_data['Frequency']
	projected_clv = cust_data['Monetary'] + (avg_order * cust_data['Frequency'])
	ci2.metric("Projected 1Y-LTV", f"£{projected_clv:,.2f}")

	st.markdown("<br>", unsafe_allow_html=True)
	recommendations = {
	"Champions": "High Value, Low Churn. Goal: Retention. Strategy: Early Access, Loyalty Rewards.",
	"Loyal Customers": "Consistent Value. Goal: Growth. Strategy: Cross-sell related categories.",
	"At-Risk": "Recent Inactivity. Goal: Re-activation. Strategy: Limited-time win-back discounts.",
	"Lost/Hibernating": "Historical only. Goal: Win-back or Pause. Strategy: Reactivate only high LTV types."
	}
	st.info(f"Execution Strategy: {recommendations.get(cust_data['Segment'], 'Maintain baseline engagement.')}")

	if __name__ == "__main__":
	main()