DIVYANSHI SINGH
Feature Recovery: Restored Top Products, Pareto, Churn Probability, and CLV with fixed Plotly color attributes
c2616da | import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from sklearn.decomposition import PCA | |
| import joblib | |
| import os | |
| from datetime import datetime | |
| # Import paths | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| SCALED_DATA_PATH = os.path.join(BASE_DIR, "data", "processed", "scaled_rfm_data.pkl") | |
| KMEANS_MODEL_PATH = os.path.join(BASE_DIR, "models", "kmeans_model.pkl") | |
| CUSTOMER_SEGMENTS_PATH = os.path.join(BASE_DIR, "outputs", "customer_segments.csv") | |
| SEGMENT_PRODUCTS_PATH = os.path.join(BASE_DIR, "outputs", "segment_products.csv") | |
| # Set Page Config | |
| st.set_page_config( | |
| page_title="SegmentX | Customer Intelligence Portal", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # --- Industry-Grade UI Refinement --- | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap'); | |
| html, body, [class*="css"] { | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .main { | |
| background-color: #0f172a; | |
| color: #f8fafc; | |
| } | |
| /* Stabilized Content Wrapper */ | |
| .block-container { | |
| max-width: 1400px; | |
| padding: 2rem 5rem !important; | |
| } | |
| .stMetric { | |
| background: rgba(30, 41, 59, 0.7); | |
| backdrop-filter: blur(8px); | |
| padding: 24px; | |
| border-radius: 16px; | |
| border: 1px solid rgba(148, 163, 184, 0.1); | |
| box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); | |
| } | |
| .stMetric label { | |
| color: #94a3b8 !important; | |
| font-weight: 500 !important; | |
| } | |
| h1, h2, h3 { | |
| color: #f8fafc; | |
| letter-spacing: -0.025em; | |
| } | |
| /* Brand Header */ | |
| .brand-header { | |
| display: flex; | |
| align-items: center; | |
| gap: 12px; | |
| padding-bottom: 2rem; | |
| margin-bottom: 2rem; | |
| border-bottom: 1px solid rgba(148, 163, 184, 0.1); | |
| } | |
| .brand-tag { | |
| background: #3b82f6; | |
| color: white; | |
| padding: 4px 12px; | |
| border-radius: 20px; | |
| font-size: 0.8rem; | |
| font-weight: 600; | |
| text-transform: uppercase; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def load_data(): | |
| if not os.path.exists(CUSTOMER_SEGMENTS_PATH): | |
| return None, None | |
| df = pd.read_csv(CUSTOMER_SEGMENTS_PATH, index_col='Customer ID') | |
| # Load raw cleaned data for time-series analysis | |
| RAW_CLEANED_PATH = os.path.join(BASE_DIR, "data", "processed", "cleaned_retail_data.csv") | |
| if os.path.exists(RAW_CLEANED_PATH): | |
| df_raw = pd.read_csv(RAW_CLEANED_PATH, parse_dates=['InvoiceDate']) | |
| else: | |
| df_raw = None | |
| return df, df_raw | |
| def load_model(): | |
| if not os.path.exists(KMEANS_MODEL_PATH) or not os.path.exists(SCALED_DATA_PATH): | |
| return None, None | |
| model = joblib.load(KMEANS_MODEL_PATH) | |
| data_dict = joblib.load(SCALED_DATA_PATH) | |
| return model, data_dict | |
| def get_pca_data(scaled_data, labels): | |
| pca = PCA(n_components=2) | |
| X_pca = pca.fit_transform(scaled_data) | |
| pca_df = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'], index=scaled_data.index) | |
| pca_df['Segment'] = labels | |
| return pca_df | |
| def main(): | |
| df, df_raw = load_data() | |
| model, data_dict = load_model() | |
| if df is None or model is None: | |
| st.error("Project data or models not found. Please run the pipeline scripts first.") | |
| return | |
| # Modern Sidebar | |
| st.sidebar.markdown("<h2 style='color:#3b82f6'>SegmentX</h2>", unsafe_allow_html=True) | |
| st.sidebar.markdown("---") | |
| page = st.sidebar.radio("Console Navigation", ["Overview", "Segment Profiles", "Customer Lookup"]) | |
| segments_list = df['Segment'].unique().tolist() | |
| selected_segments = st.sidebar.multiselect("Global Segment Filter", segments_list, default=segments_list) | |
| df_filtered = df[df['Segment'].isin(selected_segments)] | |
| st.markdown(""" | |
| <div class="brand-header"> | |
| <span class="brand-tag">Intelligence Console</span> | |
| <h1 style="margin:0">Behavioral Portal <span style="color:#3b82f6; font-weight:300">v2.0</span></h1> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if page == "Overview": | |
| # Interactive Overview | |
| c1, c2, c3, c4 = st.columns(4) | |
| c1.metric("Revenue Impact", f"Β£{df_filtered['Monetary'].sum():,.0f}") | |
| c2.metric("Customer Scale", f"{len(df_filtered):,}") | |
| c3.metric("Retention Risk", f"{(len(df_filtered[df_filtered['Recency'] > 90]) / len(df_filtered) * 100):.1f}%") | |
| c4.metric("Avg. Order Value", f"Β£{df_filtered['Monetary'].mean():,.1f}") | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| # Interactive Row 1 | |
| r1_c1, r1_c2 = st.columns([1, 1.2]) | |
| with r1_c1: | |
| st.markdown("### Segment Distribution") | |
| counts = df['Segment'].value_counts() | |
| fig = px.pie( | |
| values=counts.values, | |
| names=counts.index, | |
| hole=0.5, | |
| color_discrete_sequence=px.colors.sequential.ice_r | |
| ) | |
| fig.update_layout( | |
| paper_bgcolor='rgba(0,0,0,0)', | |
| plot_bgcolor='rgba(0,0,0,0)', | |
| font_color="#f8fafc", | |
| margin=dict(t=0, b=0, l=0, r=0), | |
| legend=dict(orientation="h", yanchor="bottom", y=-0.1, xanchor="center", x=0.5) | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with r1_c2: | |
| st.markdown("### 2D Projection Topology") | |
| pca_df = get_pca_data(data_dict['rfm_scaled'], df['Segment']) | |
| fig = px.scatter( | |
| pca_df, x='PCA1', y='PCA2', color='Segment', | |
| opacity=0.6, | |
| color_discrete_sequence=px.colors.sequential.ice_r, | |
| hover_data=[pca_df.index] | |
| ) | |
| fig.update_layout( | |
| paper_bgcolor='rgba(0,0,0,0)', | |
| plot_bgcolor='rgba(15, 23, 42, 0.5)', | |
| font_color="#f8fafc", | |
| margin=dict(t=10, b=10, l=10, r=10), | |
| xaxis=dict(showgrid=False), | |
| yaxis=dict(showgrid=False) | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.markdown("---") | |
| st.markdown("### π Revenue Benchmarking") | |
| if df_raw is not None: | |
| df_raw['Month'] = df_raw['InvoiceDate'].dt.to_period('M').astype(str) | |
| df_raw['Revenue'] = df_raw['Quantity'] * df_raw['Price'] | |
| monthly_rev = df_raw.groupby('Month')['Revenue'].sum().reset_index() | |
| fig = px.line( | |
| monthly_rev, x='Month', y='Revenue', | |
| color_discrete_sequence=['#3b82f6'], | |
| render_mode='svg' | |
| ) | |
| fig.update_layout( | |
| paper_bgcolor='rgba(0,0,0,0)', | |
| plot_bgcolor='rgba(15, 23, 42, 0.5)', | |
| font_color="#f8fafc", | |
| xaxis_title=None, | |
| yaxis_title="Total Revenue (GBP)", | |
| margin=dict(t=20, b=20, l=20, r=20) | |
| ) | |
| fig.update_traces(line_width=3, fill='tozeroy', fillcolor='rgba(59, 130, 246, 0.1)') | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| st.markdown("### π Revenue Concentration (Pareto)") | |
| seg_rev = df_filtered.groupby('Segment')['Monetary'].sum().sort_values(ascending=False).reset_index() | |
| fig_bar = px.bar( | |
| seg_rev, x='Segment', y='Monetary', | |
| color='Monetary', | |
| color_continuous_scale='ice' | |
| ) | |
| fig_bar.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(15, 23, 42, 0.5)', font_color="#f8fafc") | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| st.markdown("---") | |
| st.subheader("π¨ Risk Analytics") | |
| high_risk = len(df_filtered[df_filtered['Recency'] > 90]) | |
| risk_pct = (high_risk / len(df_filtered)) * 100 | |
| if risk_pct > 30: | |
| st.warning(f"**Critical Warning**: {risk_pct:.1f}% of selected customers are churn-risk (90+ days inactive).") | |
| else: | |
| st.success(f"**Healthy Signal**: Retention is stable with only {risk_pct:.1f}% churn-risk.") | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| st.subheader("π₯ Data Export & Actions") | |
| csv = df_filtered.to_csv().encode('utf-8') | |
| st.download_button("Export Intelligence Report (CSV)", data=csv, file_name='segmentx_report.csv', mime='text/csv') | |
| elif page == "Segment Profiles": | |
| st.subheader("Cluster Behavioral Heatmap") | |
| profile_stats = df.groupby('Segment')[['Recency', 'Frequency', 'Monetary']].mean() | |
| profile_norm = (profile_stats - profile_stats.min()) / (profile_stats.max() - profile_stats.min()) | |
| fig = px.imshow( | |
| profile_norm.T, | |
| labels=dict(x="Segment", y="Metric", color="Score"), | |
| color_continuous_scale='Blues' | |
| ) | |
| fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', font_color="#f8fafc") | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.markdown("#### Mean Values Matrix") | |
| st.table(profile_stats.style.format(lambda x: f"Β£{x:,.2f}" if x > 100 else f"{x:.2f}")) | |
| st.markdown("---") | |
| st.subheader("ποΈ Segment Affinity: Top 10 Products") | |
| if os.path.exists(SEGMENT_PRODUCTS_PATH): | |
| all_top_prods = pd.read_csv(SEGMENT_PRODUCTS_PATH) | |
| display_segs = selected_segments[:3] | |
| cols = st.columns(len(display_segs)) if display_segs else [st.container()] | |
| for i, seg in enumerate(display_segs): | |
| with cols[i]: | |
| st.markdown(f"**{seg}**") | |
| seg_prods = all_top_prods[all_top_prods['Segment'] == seg].head(10) | |
| if not seg_prods.empty: | |
| seg_prods['Description'] = seg_prods['Description'].str.slice(0, 30) + '...' | |
| st.table(seg_prods[['Description', 'Quantity']].set_index('Description')) | |
| else: st.info("No data.") | |
| else: | |
| st.info("Run product pipeline to see affinities.") | |
| elif page == "Customer Lookup": | |
| st.subheader("π Intelligent Query") | |
| if st.button("π² Randomized ID Picker"): | |
| random_id = np.random.choice(df.index) | |
| st.session_state.customer_lookup_id = int(random_id) | |
| all_ids = sorted(df.index.unique().tolist()) | |
| if 'customer_lookup_id' not in st.session_state: | |
| st.session_state.customer_lookup_id = all_ids[0] | |
| customer_id = st.selectbox( | |
| "Target Customer ID", | |
| options=all_ids, | |
| index=all_ids.index(st.session_state.customer_lookup_id) if st.session_state.customer_lookup_id in all_ids else 0 | |
| ) | |
| st.session_state.customer_lookup_id = customer_id | |
| cust_data = df.loc[customer_id] | |
| l1, l2, l3 = st.columns(3) | |
| l1.metric("Segment Identity", cust_data['Segment']) | |
| l2.metric("Orders", f"{cust_data['Frequency']:.0f}") | |
| l3.metric("LTV GBP", f"Β£{cust_data['Monetary']:,.2f}") | |
| st.markdown("---") | |
| st.markdown("### π‘οΈ Strategic Intelligence") | |
| ci1, ci2 = st.columns(2) | |
| # Churn Probability Logic | |
| avg_rec = df['Recency'].mean() | |
| churn_prob = 1 - np.exp(-cust_data['Recency'] / (avg_rec * 1.5)) | |
| churn_pct = min(max(churn_prob * 100, 0), 100) | |
| ci1.metric("Churn Risk Score", f"{churn_pct:.1f}%") | |
| # Predicted CLV | |
| avg_order = cust_data['Monetary'] / cust_data['Frequency'] | |
| projected_clv = cust_data['Monetary'] + (avg_order * cust_data['Frequency']) | |
| ci2.metric("Projected 1Y-LTV", f"Β£{projected_clv:,.2f}") | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| recommendations = { | |
| "Champions": "High Value, Low Churn. Goal: Retention. Strategy: Early Access, Loyalty Rewards.", | |
| "Loyal Customers": "Consistent Value. Goal: Growth. Strategy: Cross-sell related categories.", | |
| "At-Risk": "Recent Inactivity. Goal: Re-activation. Strategy: Limited-time win-back discounts.", | |
| "Lost/Hibernating": "Historical only. Goal: Win-back or Pause. Strategy: Reactivate only high LTV types." | |
| } | |
| st.info(f"**Execution Strategy**: {recommendations.get(cust_data['Segment'], 'Maintain baseline engagement.')}") | |
| if __name__ == "__main__": | |
| main() | |