""" Enhanced Telecom Customer Segmentation Dashboard =============================================== ✅ Interactive visual insights ✅ Communication: Location, Intl calls, Frequency, Duration, Time (Morning/Noon/Night) ✅ Internet: Download, Upload, Overall ✅ SMS: Frequency ✅ Customer lookup by ID with LLM suggestions ✅ Dynamic clustering & visualizations """ import streamlit as st import requests import pandas as pd import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import json import io from datetime import datetime # ============================================ # CONFIGURATION # ============================================ st.set_page_config( page_title="📊 Telecom Analytics Dashboard", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # Backend URL - auto-detects environment import os # Priority: 1. Streamlit secrets, 2. Environment variable, 3. Local default BACKEND_URL = "http://localhost:7860" # Default for local development try: # Try to get from Streamlit secrets (HuggingFace deployment) BACKEND_URL = st.secrets.get("BACKEND_URL", BACKEND_URL) except (FileNotFoundError, KeyError, AttributeError): # Try environment variable BACKEND_URL = os.getenv('BACKEND_URL', BACKEND_URL) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # ============================================ # UTILITY FUNCTIONS # ============================================ def show_active_filters_banner(): """Display a banner showing currently active filters""" if 'filters' not in st.session_state: return False filters = st.session_state.filters active_filters = [] if filters.get('clusters'): cluster_list = ', '.join(map(str, filters['clusters'])) active_filters.append(f"🎯 Clusters: {cluster_list}") if filters.get('usage_level', 'All') != 'All': active_filters.append(f"⚡ Usage: {filters['usage_level']}") if filters.get('international', 'All') != 'All': active_filters.append(f"🌍 {filters['international']}") if active_filters: filter_text = " | ".join(active_filters) st.info(f"🔍 **Active Filters:** {filter_text}") return True return False # ============================================ # API FUNCTIONS # ============================================ @st.cache_data(ttl=300) def get_stats(): try: response = requests.get(f"{BACKEND_URL}/api/stats", timeout=10) response.raise_for_status() return response.json() except Exception as e: st.error(f"Backend error: {e}") return None @st.cache_data(ttl=300) def get_time_analysis(): try: response = requests.get(f"{BACKEND_URL}/api/time-analysis", timeout=10) response.raise_for_status() return response.json() except: return None @st.cache_data(ttl=300) def get_clusters(cluster_type="kmeans"): try: response = requests.get(f"{BACKEND_URL}/api/clusters", params={"cluster_type": cluster_type}, timeout=10) response.raise_for_status() return response.json() except: return None def get_customer(customer_id): try: response = requests.get(f"{BACKEND_URL}/api/customers/{customer_id}", timeout=10) response.raise_for_status() return response.json() except Exception as e: return None def query_ai(question): try: response = requests.post(f"{BACKEND_URL}/api/query", json={"question": question}, timeout=60) response.raise_for_status() return response.json() except requests.exceptions.Timeout: return {"error": "AI service is taking longer than expected. Please try again."} except requests.exceptions.ConnectionError: return {"error": "Cannot connect to AI service. Please check backend connection."} except Exception as e: return {"error": "AI service temporarily unavailable. Please try again later."} def get_visualization(viz_type): try: response = requests.get(f"{BACKEND_URL}/api/visualizations/{viz_type}", timeout=15) response.raise_for_status() return response.json() except: return None def run_clustering(n_clusters, algorithm): try: response = requests.post( f"{BACKEND_URL}/api/cluster/run", json={"n_clusters": n_clusters, "algorithm": algorithm}, timeout=60 ) response.raise_for_status() return response.json() except Exception as e: return {"error": str(e)} # ============================================ # UI COMPONENTS # ============================================ # Export/Download Functions def export_to_csv(data, filename="export.csv"): """Convert data to CSV for download""" if isinstance(data, dict): df = pd.DataFrame([data]) elif isinstance(data, list): df = pd.DataFrame(data) else: df = data return df.to_csv(index=False).encode('utf-8') def export_chart_to_image(fig): """Convert Plotly figure to PNG bytes""" return fig.to_image(format="png", width=1200, height=600) def create_export_buttons(data=None, chart=None, prefix="report"): """Create standardized export buttons""" col1, col2, col3 = st.columns([1, 1, 4]) with col1: if data is not None: csv_data = export_to_csv(data) st.download_button( label="📥 Export CSV", data=csv_data, file_name=f"{prefix}_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", mime="text/csv", use_container_width=True ) with col2: if chart is not None: try: img_bytes = export_chart_to_image(chart) st.download_button( label="📊 Save Chart", data=img_bytes, file_name=f"{prefix}_chart_{datetime.now().strftime('%Y%m%d_%H%M')}.png", mime="image/png", use_container_width=True ) except: st.caption("Chart export requires kaleido") # AI Insights Panel def show_ai_insights(context, view_name="current view"): """Display AI-generated insights for current view""" with st.expander("💡 AI Insights & Recommendations", expanded=False): with st.spinner("🔮 Analyzing data..."): prompt = f"""Analyze the {view_name} data and provide exactly 3 actionable business insights (NOT package recommendations). Use this EXACT format with markdown: ### 🎯 Insight 1: [Catchy Title] **Key Finding:** [Main point with specific numbers and percentages] **Action:** [Clear business action to take] ### 📊 Insight 2: [Catchy Title] **Key Finding:** [Main point with specific numbers and percentages] **Action:** [Clear business action to take] ### 💡 Insight 3: [Catchy Title] **Key Finding:** [Main point with specific numbers and percentages] **Action:** [Clear business action to take] DO NOT provide package recommendations or pricing. Focus on trends, opportunities, and strategic actions. Context: {context} """ response = query_ai(prompt) if response and 'answer' in response: st.markdown("---") # Display formatted insights with better styling st.markdown(response['answer']) st.markdown("---") elif response and 'error' in response: st.warning(f"⚠️ {response['error']}") else: st.warning("⚠️ AI insights temporarily unavailable") def render_header(): st.markdown('
📊 Telecom Customer Analytics
', unsafe_allow_html=True) st.markdown("### Complete Customer Insights & AI-Powered Analysis") st.markdown("---") def render_overview_metrics(stats): if not stats: st.warning("Could not load statistics") return col1, col2, col3, col4, col5 = st.columns(5) with col1: st.metric("👥 Total Customers", f"{stats['total_customers']:,}") with col2: st.metric("🌍 International", f"{stats['international_users']:,}", delta=f"{stats['international_percentage']:.1f}%") with col3: st.metric("📞 Avg Calls", f"{stats['avg_voice_calls']:.0f}") with col4: st.metric("📱 Avg Data", f"{stats['avg_data_mb']:.0f} MB") with col5: st.metric("💬 Total SMS", f"{stats['total_sms']:,}") def render_communication_insights(stats, time_analysis): st.subheader("📞 Communication Insights") col1, col2 = st.columns(2) with col1: st.markdown("#### Call Frequency & Duration") st.metric("Total Voice Minutes", f"{stats['total_voice_mins']:,.0f}") st.metric("Average per User", f"{stats['avg_voice_mins']:.1f} mins") st.metric("Call Lovers", f"{stats['call_lovers']:,}") with col2: st.markdown("#### International Calls") st.metric("International Users", f"{stats['international_users']:,}") st.metric("Percentage", f"{stats['international_percentage']:.2f}%") st.info("🌍 Location data: Available in customer details") # Time distribution if time_analysis: st.markdown("#### Time Distribution (Morning/Evening/Night)") time_data = time_analysis['overall'] # Create pie chart fig = go.Figure(data=[go.Pie( labels=['🌅 Morning', '🌆 Evening', '🌙 Night'], values=[time_data['morning_calls'], time_data['evening_calls'], time_data['night_calls']], hole=.4, marker_colors=['#FDB462', '#80B1D3', '#8DD3C7'] )]) fig.update_layout( title="Calls by Time of Day", showlegend=True, height=400 ) st.plotly_chart(fig, use_container_width=True) col1, col2, col3 = st.columns(3) with col1: st.metric("🌅 Morning", f"{time_data['morning_calls']:,}", f"{time_data['morning_pct']:.1f}%") with col2: st.metric("🌆 Evening", f"{time_data['evening_calls']:,}", f"{time_data['evening_pct']:.1f}%") with col3: st.metric("🌙 Night", f"{time_data['night_calls']:,}", f"{time_data['night_pct']:.1f}%") def render_internet_insights(stats): st.subheader("🌐 Internet Usage") col1, col2, col3 = st.columns(3) with col1: st.markdown("#### Download") st.metric("Total (GB)", f"{stats['avg_download_mb'] * stats['total_customers'] / 1024:.1f}") st.metric("Avg per User (MB)", f"{stats['avg_download_mb']:.1f}") st.info(f"📥 {stats['download_lovers']:,} Download Lovers") with col2: st.markdown("#### Upload") st.metric("Total (GB)", f"{stats['avg_upload_mb'] * stats['total_customers'] / 1024:.1f}") st.metric("Avg per User (MB)", f"{stats['avg_upload_mb']:.1f}") st.info(f"📤 {stats['upload_lovers']:,} Upload Lovers") with col3: st.markdown("#### Overall") st.metric("Total (GB)", f"{stats['total_data_gb']:.1f}") st.metric("Avg per User (MB)", f"{stats['avg_data_mb']:.1f}") st.success(f"📊 {stats['data_lovers']:,} Data Lovers") # Visualization viz_data = get_visualization("data-breakdown") if viz_data and 'chart' in viz_data: fig = go.Figure(json.loads(viz_data['chart'])) st.plotly_chart(fig, use_container_width=True) def render_sms_insights(stats): st.subheader("💬 SMS Insights") # Calculate meaningful metrics sms_adoption_rate = (stats['sms_users'] / stats['total_customers'] * 100) avg_per_active = stats.get('avg_sms_per_active_user', 2.0) # Fallback if backend not updated # Alert if SMS usage is very low if sms_adoption_rate < 5: st.warning(f"⚠️ **Low SMS Adoption:** Only {sms_adoption_rate:.1f}% of customers use SMS. Most likely prefer OTT messaging apps (WhatsApp, Telegram, etc.)") col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Messages", f"{stats['total_sms']:,}") with col2: st.metric("SMS Users", f"{stats['sms_users']:,}", f"{sms_adoption_rate:.1f}% adoption") with col3: st.metric("Avg (Active Users)", f"{avg_per_active:.1f} msgs", help="Average messages among customers who actually use SMS") with col4: st.metric("Avg (All Users)", f"{stats['avg_sms_per_user']:.2f} msgs", help="Average across all customers (includes 98%+ with 0 SMS)") # Insight box st.info(f""" **📊 SMS Analysis:** - **{stats['sms_users']:,}** customers sent SMS (only **{sms_adoption_rate:.1f}%** of total) - Active SMS users average **{avg_per_active:.1f} messages** each - **{stats['total_customers'] - stats['sms_users']:,}** customers (**{100-sms_adoption_rate:.1f}%**) sent **ZERO** SMS - This suggests heavy reliance on OTT messaging apps (WhatsApp, Telegram, etc.) """) # Frequency distribution freq_high = int(stats['sms_users'] * 0.25) # Estimate freq_medium = int(stats['sms_users'] * 0.35) freq_low = stats['sms_users'] - freq_high - freq_medium fig = px.bar( x=['High Frequency', 'Medium Frequency', 'Low Frequency'], y=[freq_high, freq_medium, freq_low], title="SMS Frequency Distribution (Among Active Users Only)", labels={'x': 'Frequency', 'y': 'Number of Users'}, color=['High', 'Medium', 'Low'], color_discrete_sequence=['#E74C3C', '#F39C12', '#3498DB'] ) st.plotly_chart(fig, use_container_width=True) def render_customer_lookup(): st.subheader("🔍 Customer Lookup by Subscriber ID") col1, col2 = st.columns([2, 1]) with col1: customer_id = st.number_input("Enter Subscriber ID:", min_value=1, step=1, format="%d") with col2: st.markdown("
", unsafe_allow_html=True) search_btn = st.button("🔎 Search Customer", type="primary", use_container_width=True) if search_btn and customer_id: with st.spinner("Fetching customer data..."): customer = get_customer(customer_id) if customer: st.success(f"✅ Found Customer {customer_id}") # Communication Section with st.expander("📞 Communication Analysis", expanded=True): comm = customer['communication'] time_dist = comm['time_distribution'] col1, col2, col3 = st.columns(3) with col1: st.metric("Total Calls", f"{comm['voice_total_calls']:.0f}") st.metric("Total Duration", f"{comm['voice_total_duration_mins']:.1f} mins") with col2: st.metric("Avg Call Duration", f"{comm['voice_avg_duration_mins']:.1f} mins") with col3: st.markdown("**Time Distribution:**") st.write(f"🌅 Morning: {time_dist['morning_calls']} ({time_dist['morning_pct']:.1f}%)") st.write(f"🌆 Evening: {time_dist['evening_calls']} ({time_dist['evening_pct']:.1f}%)") st.write(f"🌙 Night: {time_dist['night_calls']} ({time_dist['night_pct']:.1f}%)") # International Section with st.expander("🌍 International Details", expanded=True): intl = customer['international'] if intl['is_international_user']: col1, col2, col3 = st.columns(3) with col1: st.metric("Total Calls", f"{intl['total_calls']:.0f}") st.metric("Total Duration", f"{intl['total_duration_mins']:.1f} mins") with col2: st.metric("Countries Called", f"{intl['countries_called']}") st.info(f"Top: {intl['top_country']}") with col3: st.markdown("**All Countries:**") st.write(intl['all_countries']) else: st.info("❌ Not an international user") # Internet Section with st.expander("🌐 Internet Usage", expanded=True): internet = customer['internet'] col1, col2, col3 = st.columns(3) with col1: st.metric("Download", f"{internet['download_mb']:.1f} MB", f"{internet['download_pct']:.1f}%") with col2: st.metric("Upload", f"{internet['upload_mb']:.1f} MB", f"{internet['upload_pct']:.1f}%") with col3: st.metric("Total", f"{internet['total_mb']:.1f} MB") # Pie chart for upload/download if internet['total_mb'] > 0: fig = go.Figure(data=[go.Pie( labels=['Download', 'Upload'], values=[internet['download_mb'], internet['upload_mb']], hole=.3, marker_colors=['#66C2A5', '#FC8D62'] )]) fig.update_layout(title="Data Breakdown", height=300) st.plotly_chart(fig, use_container_width=True) # SMS Section with st.expander("💬 SMS Activity", expanded=True): sms = customer['sms'] col1, col2 = st.columns(2) with col1: st.metric("Total Messages", f"{sms['total_messages']}") with col2: st.metric("Frequency Level", sms['frequency']) # AI Suggestions st.markdown("---") st.markdown("### 🤖 AI-Powered Recommendations") with st.spinner("🔮 Analyzing customer profile and generating personalized recommendations..."): # Build context for LLM with full time distribution morning_pct = time_dist.get('Morning', 0) / sum(time_dist.values()) * 100 if sum(time_dist.values()) > 0 else 0 evening_pct = time_dist.get('Evening', 0) / sum(time_dist.values()) * 100 if sum(time_dist.values()) > 0 else 0 night_pct = time_dist.get('Night', 0) / sum(time_dist.values()) * 100 if sum(time_dist.values()) > 0 else 0 context = f""" Provide a PACKAGE RECOMMENDATION for this individual customer: **Customer Profile:** • Voice: {comm['voice_total_calls']:.0f} calls, {comm['voice_total_duration_mins']:.1f} mins • Time Distribution: Morning {morning_pct:.1f}%, Evening {evening_pct:.1f}%, Night {night_pct:.1f}% • Data: {internet['total_mb']:.0f} MB (Download: {internet['download_pct']:.0f}%, Upload: {internet['upload_pct']:.0f}%) • SMS: {sms['total_messages']} messages • International: {'Yes' if intl['is_international_user'] else 'No'} {f"• Countries called: {intl['all_countries']}" if intl['is_international_user'] else ''} Use this EXACT format with proper markdown: ### 📋 USAGE PROFILE **Pattern:** [Describe time distribution - bimodal/uniform/concentrated] **Behavior:** [Customer type based on usage] • List all significant time periods (>25%) • Identify usage patterns clearly ### 🎁 RECOMMENDED PACKAGE **Package Name:** [Creative name matching usage] **Details:** [Specific allocations - voice/data/SMS] **Price:** PKR [amount]/month • Size to cover 120-150% of usage • EXCLUDE zero-usage services • **ZONG PAKISTAN ACTUAL PRICING:** - Daily: PKR 5-23/day (call-only/data-only/SMS for ultra-low users) - Weekly Light: PKR 120-200/week (500 mins + 500MB-4GB) - Weekly Mid-Premium: PKR 290-600/week (8-100GB + unlimited calls) - Monthly Ultra-light: PKR 50-240/month (150MB-12GB data only) - Monthly Basic: PKR 420-575/month (1-8GB + 1000-3000 mins) - Monthly Mid: PKR 1200-1300/month (12-20GB + unlimited on-net) - Monthly Premium: PKR 1500-2000/month (50-200GB + 3000-3500 mins) - Monthly Heavy: PKR 4000/month (400GB + 5000 mins) • Choose package validity (daily/weekly/monthly) based on usage patterns! ### ✨ KEY BENEFITS • **[Benefit 1]:** [Quantified value proposition] • **[Benefit 2]:** [Cost savings or coverage details] • **[Benefit 3]:** [Flexibility or convenience] • **[Benefit 4]:** [Additional advantage] ### 💰 PRICING STRATEGY **Discount Offer:** [Specific PKR discount] **Business Logic:** [Why - ARPU/churn impact] **Upsell Opportunity:** [Services to promote] **Expected Impact:** [Quantified results] **Match validity to usage: Daily packages (5-23/day) for ultra-low, Weekly (120-600/week) for light, Monthly (150-4000/month) for regular users. Use actual Zong package names when possible!** """ response = query_ai(context) # Check for errors first if response and 'error' in response: st.error(f"❌ {response['error']}") st.info("💡 Please check your backend connection and Groq API key configuration.") elif response and 'answer' in response: # Beautiful formatted output st.markdown("""
💡 Personalized Package Recommendation
""", unsafe_allow_html=True) # Display formatted recommendation with st.container(): st.markdown('
', unsafe_allow_html=True) # Usage summary badges col1, col2, col3, col4 = st.columns(4) with col1: usage_level = "High" if comm['voice_total_calls'] > 500 else "Moderate" if comm['voice_total_calls'] > 200 else "Low" st.markdown(f'
📞 Voice: {usage_level}
', unsafe_allow_html=True) with col2: data_level = "High" if internet['total_mb'] > 1000 else "Moderate" if internet['total_mb'] > 500 else "Low" st.markdown(f'
📊 Data: {data_level}
', unsafe_allow_html=True) with col3: sms_level = "High" if sms['total_messages'] > 200 else "Moderate" if sms['total_messages'] > 50 else "Low" st.markdown(f'
💬 SMS: {sms_level}
', unsafe_allow_html=True) with col4: if intl['is_international_user']: st.markdown(f'
🌍 International
', unsafe_allow_html=True) else: st.markdown(f'
🏠 Domestic Only
', unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # AI Response with markdown formatting st.markdown("---") st.markdown(response['answer']) st.markdown("---") st.markdown('
', unsafe_allow_html=True) else: st.warning("⚠️ Unable to generate AI recommendation. Please try again later.") else: st.error(f"❌ Customer {customer_id} not found") def render_cluster_visualization(): st.subheader("📊 Customer Segments") cluster_type = st.radio("Clustering Algorithm:", ["kmeans", "dbscan"], horizontal=True) clusters = get_clusters(cluster_type) if clusters: df_clusters = pd.DataFrame(clusters['clusters']) total_clusters = len(df_clusters) # Apply cluster filter if active if 'filters' in st.session_state and st.session_state.filters.get('clusters'): selected_cluster_ids = st.session_state.filters['clusters'] df_clusters = df_clusters[df_clusters['cluster_id'].isin(selected_cluster_ids)] if df_clusters.empty: st.warning(f"⚠️ No clusters match your filter selection: {selected_cluster_ids}") return st.success(f"✅ Showing {len(df_clusters)} filtered cluster(s) out of {total_clusters} total") # Pie chart fig = px.pie( df_clusters, values='size', names='cluster_id', title=f'Customer Distribution - {cluster_type.upper()}', hole=.4 ) st.plotly_chart(fig, use_container_width=True) # Bar comparison fig = go.Figure() fig.add_trace(go.Bar(name='Voice (mins)', x=df_clusters['cluster_id'], y=df_clusters['avg_voice_mins'])) fig.add_trace(go.Bar(name='Data (MB)', x=df_clusters['cluster_id'], y=df_clusters['avg_data_mb'])) fig.add_trace(go.Bar(name='SMS', x=df_clusters['cluster_id'], y=df_clusters['avg_sms'])) fig.update_layout(barmode='group', title='Average Usage by Cluster') st.plotly_chart(fig, use_container_width=True) # Table st.dataframe(df_clusters, use_container_width=True) def render_dynamic_clustering(): st.subheader("🔧 Run Custom Clustering") col1, col2 = st.columns(2) with col1: algorithm = st.selectbox("Algorithm", ["kmeans", "dbscan"]) with col2: if algorithm == "kmeans": n_clusters = st.slider("Number of Clusters", 2, 12, 6) else: n_clusters = 0 if st.button("▶️ Run Clustering", type="primary"): with st.spinner("Running clustering analysis..."): result = run_clustering(n_clusters if algorithm == "kmeans" else 6, algorithm) if 'error' not in result: st.success(f"✅ Found {result['n_clusters']} clusters") if result.get('silhouette_score'): st.metric("Silhouette Score", f"{result['silhouette_score']:.4f}") df_result = pd.DataFrame(result['clusters']) st.dataframe(df_result, use_container_width=True) else: st.error(f"Error: {result['error']}") def render_ai_chat(): st.subheader("💬 Ask AI About Your Data") # Example questions with st.expander("💡 Example Questions"): st.markdown(""" - What time of day has the most calls? - How many customers use SMS frequently? - What's the ratio of download to upload data? - Which customers should get international packages? - What's the average data usage for heavy users? """) if 'chat_history' not in st.session_state: st.session_state.chat_history = [] user_question = st.text_input("Ask a question:", placeholder="e.g., What time has peak call volume?") if st.button("🔍 Ask AI", type="primary"): if user_question: with st.spinner("Thinking..."): response = query_ai(user_question) if response and 'answer' in response: st.session_state.chat_history.append({ 'question': user_question, 'answer': response['answer'] }) elif response and 'error' in response: st.error(f"❌ {response['error']}") else: st.warning("⚠️ Unable to get AI response. Please try again.") # Display history if st.session_state.chat_history: st.markdown("---") for chat in reversed(st.session_state.chat_history[-10:]): st.markdown(f"**You:** {chat['question']}") st.info(f"**AI:** {chat['answer']}") st.markdown("---") def render_cohort_comparison(): """Side-by-side cohort/cluster comparison with delta highlights""" st.subheader("🔄 Cohort Comparison") st.markdown("Compare two customer segments side-by-side") # Show active filters banner filters_active = show_active_filters_banner() clusters_data = get_clusters("kmeans") if not clusters_data or 'clusters' not in clusters_data: st.error("Unable to load cluster data") return df_clusters = pd.DataFrame(clusters_data['clusters']) # Apply cluster filter if active if 'filters' in st.session_state and st.session_state.filters.get('clusters'): selected_cluster_ids = st.session_state.filters['clusters'] df_clusters = df_clusters[df_clusters['cluster_id'].isin(selected_cluster_ids)] if df_clusters.empty: st.warning(f"No clusters match your filter selection: {selected_cluster_ids}") return st.success(f"✅ Showing {len(df_clusters)} filtered cluster(s) out of {len(clusters_data['clusters'])} total") cluster_options = df_clusters['cluster_id'].tolist() col1, col2 = st.columns(2) with col1: cohort_a = st.selectbox("Select Cohort A", cluster_options, index=0, key="cohort_a") with col2: cohort_b = st.selectbox("Select Cohort B", cluster_options, index=min(1, len(cluster_options)-1), key="cohort_b") # Get cohort data cluster_a = df_clusters[df_clusters['cluster_id'] == cohort_a].iloc[0] cluster_b = df_clusters[df_clusters['cluster_id'] == cohort_b].iloc[0] # Comparison header st.markdown("---") col1, col2, col3 = st.columns([2, 1, 2]) with col1: st.markdown(f"### 📊 Cluster {cohort_a}") st.caption(f"**{cluster_a['size']:,} customers**") with col2: st.markdown("### VS") with col3: st.markdown(f"### 📊 Cluster {cohort_b}") st.caption(f"**{cluster_b['size']:,} customers**") st.markdown("---") # Define metrics to compare metrics = [ ('avg_voice_mins', 'Voice Usage', 'mins'), ('avg_data_mb', 'Data Usage', 'MB'), ('avg_sms', 'SMS Count', 'messages') ] comparison_data = [] for metric_key, metric_name, unit in metrics: val_a = cluster_a.get(metric_key, 0) val_b = cluster_b.get(metric_key, 0) # Calculate percentage difference if val_a > 0: delta_pct = ((val_b - val_a) / val_a) * 100 else: delta_pct = 0 comparison_data.append({ 'Metric': metric_name, f'Cluster {cohort_a}': f"{val_a:.1f} {unit}", f'Cluster {cohort_b}': f"{val_b:.1f} {unit}", 'Difference': f"{delta_pct:+.1f}%" }) # Visual comparison col1, col2, col3 = st.columns([2, 2, 1]) with col1: st.metric( label=f"{metric_name}", value=f"{val_a:.1f} {unit}", delta=None ) with col2: delta_color = "normal" if delta_pct >= 0 else "inverse" st.metric( label=f"{metric_name}", value=f"{val_b:.1f} {unit}", delta=f"{delta_pct:+.1f}%", delta_color=delta_color ) with col3: if abs(delta_pct) > 50: st.markdown(f"**⚠️ {abs(delta_pct):.0f}%**") elif abs(delta_pct) > 20: st.markdown(f"**⚡ {abs(delta_pct):.0f}%**") else: st.markdown(f"✓ {abs(delta_pct):.0f}%") st.markdown("---") # Summary table st.markdown("### 📋 Comparison Summary") df_comparison = pd.DataFrame(comparison_data) st.dataframe(df_comparison, use_container_width=True, hide_index=True) # Export button create_export_buttons(data=df_comparison, prefix="cohort_comparison") # Visual comparison chart st.markdown("---") st.markdown("### 📊 Visual Comparison") fig = go.Figure() metrics_names = [m[1] for m in metrics] values_a = [cluster_a.get(m[0], 0) for m in metrics] values_b = [cluster_b.get(m[0], 0) for m in metrics] fig.add_trace(go.Bar( name=f'Cluster {cohort_a}', x=metrics_names, y=values_a, marker_color='#667eea' )) fig.add_trace(go.Bar( name=f'Cluster {cohort_b}', x=metrics_names, y=values_b, marker_color='#f093fb' )) fig.update_layout( barmode='group', title=f"Cluster {cohort_a} vs Cluster {cohort_b}", xaxis_title="Metric", yaxis_title="Value", height=400 ) st.plotly_chart(fig, use_container_width=True) create_export_buttons(chart=fig, prefix="cohort_comparison_chart") # AI Insights for comparison - comprehensive cohort analysis voice_a = cluster_a.get('avg_voice_mins', 0) voice_b = cluster_b.get('avg_voice_mins', 0) data_a = cluster_a.get('avg_data_mb', 0) data_b = cluster_b.get('avg_data_mb', 0) sms_a = cluster_a.get('avg_sms', 0) sms_b = cluster_b.get('avg_sms', 0) voice_diff = ((voice_b - voice_a) / voice_a * 100) if voice_a > 0 else 0 data_diff = ((data_b - data_a) / data_a * 100) if data_a > 0 else 0 sms_diff = ((sms_b - sms_a) / sms_a * 100) if sms_a > 0 else 0 context = f""" COHORT COMPARISON - Comparing Cluster {cohort_a} vs Cluster {cohort_b} **Segment Sizes:** • Cluster {cohort_a}: {cluster_a['size']:,} customers • Cluster {cohort_b}: {cluster_b['size']:,} customers **Voice Usage:** • Cluster {cohort_a}: {voice_a:.1f} mins/customer • Cluster {cohort_b}: {voice_b:.1f} mins/customer • Difference: {voice_diff:+.1f}% **Data Usage:** • Cluster {cohort_a}: {data_a:.1f} MB/customer • Cluster {cohort_b}: {data_b:.1f} MB/customer • Difference: {data_diff:+.1f}% **SMS Usage:** • Cluster {cohort_a}: {sms_a:.1f} messages/customer • Cluster {cohort_b}: {sms_b:.1f} messages/customer • Difference: {sms_diff:+.1f}% Provide 3 strategic insights mentioning SPECIFIC CLUSTER numbers ({cohort_a} and {cohort_b}): 1. Key behavioral differences between Cluster {cohort_a} and Cluster {cohort_b} 2. Which cluster is more valuable and has better growth potential 3. Specific targeting strategies for each cluster """ show_ai_insights(context, "cohort comparison") # ============================================ # MAIN APP # ============================================ def main(): # Sidebar with st.sidebar: st.image("https://img.icons8.com/color/96/000000/phone.png", width=80) st.title("📊 Navigation") page = st.radio( "Select View:", [ "🏠 Overview Dashboard", "👤 Customer Lookup", "🔄 Cohort Comparison", "📈 Visual Insights", "🔬 Clustering Analysis", "💬 AI Assistant" ] ) st.markdown("---") st.markdown("### 🔌 Backend Status") # Backend connection check backend_status = None customer_count = None try: response = requests.get(f"{BACKEND_URL}/", timeout=5) if response.status_code == 200: backend_status = "connected" try: data = response.json() # Backend returns 'customers' key, not 'total_customers' customer_count = data.get('customers') or data.get('total_customers', 'N/A') except: pass else: backend_status = "error" except requests.exceptions.Timeout: backend_status = "timeout" except requests.exceptions.ConnectionError: backend_status = "offline" except: backend_status = "offline" # Display status if backend_status == "connected": st.success("✅ Connected") if customer_count and isinstance(customer_count, (int, float)): st.caption(f"Customers: {customer_count:,}") elif customer_count: st.caption(f"Customers: {customer_count}") elif backend_status == "timeout": st.warning("⏱️ Timeout") elif backend_status == "error": st.error("❌ Error") else: st.error("❌ Offline") st.markdown("---") st.markdown("### ℹ️ About") st.markdown(""" **New Features:** - 🎛️ Real-time filters - 🔄 Cohort comparison - 📥 Export/download - 💡 AI insights panels **Core Features:** - ⏰ Time analysis - 🌐 Data split - 🌍 International details - 🤖 AI recommendations """) # Main content render_header() # Load data stats = get_stats() if page == "🏠 Overview Dashboard": # Show active filters banner show_active_filters_banner() if stats: render_overview_metrics(stats) # Export button for overview st.markdown("---") overview_data = { 'Total Customers': stats.get('total_customers'), 'Total Voice Calls': stats.get('voice', {}).get('total_calls'), 'Total Data MB': stats.get('data', {}).get('total_mb'), 'Total SMS': stats.get('sms', {}).get('total_messages') } create_export_buttons(data=overview_data, prefix="overview_dashboard") st.markdown("---") tabs = st.tabs(["📞 Communication", "🌐 Internet", "💬 SMS"]) with tabs[0]: time_analysis = get_time_analysis() render_communication_insights(stats, time_analysis) # AI Insights for communication - ONLY voice/call related voice_stats = stats.get('voice', {}) intl_stats = stats.get('international', {}) time_dist = time_analysis.get('time_distribution', {}) if time_analysis else {} context = f""" VOICE COMMUNICATION ANALYSIS (focus ONLY on voice/calls, no data/SMS): Call Volume: - Total voice calls: {voice_stats.get('total_calls', 0):,} - Total voice minutes: {voice_stats.get('total_minutes', 0):,.0f} - Average calls per customer: {voice_stats.get('avg_calls_per_customer', 0):.1f} - Average duration per call: {voice_stats.get('avg_duration_per_call', 0):.1f} mins Time Distribution: - Morning (6am-12pm): {time_dist.get('Morning', 0):,} calls ({time_dist.get('Morning', 0) / sum(time_dist.values()) * 100 if sum(time_dist.values()) > 0 else 0:.1f}%) - Evening (12pm-6pm): {time_dist.get('Evening', 0):,} calls ({time_dist.get('Evening', 0) / sum(time_dist.values()) * 100 if sum(time_dist.values()) > 0 else 0:.1f}%) - Night (6pm-6am): {time_dist.get('Night', 0):,} calls ({time_dist.get('Night', 0) / sum(time_dist.values()) * 100 if sum(time_dist.values()) > 0 else 0:.1f}%) International Calling: - International users: {intl_stats.get('total_users', 0):,} ({intl_stats.get('percentage', 0):.2f}%) - Total international calls: {intl_stats.get('total_calls', 0):,} - Countries reached: {intl_stats.get('unique_countries', 0)} Provide 3 insights specifically about VOICE CALLING patterns, peak times, and international calling opportunities. """ show_ai_insights(context, "communication analysis") with tabs[1]: render_internet_insights(stats) # AI Insights for internet - ONLY data/internet related data_stats = stats.get('data', {}) total_data_mb = data_stats.get('total_mb', 0) upload_mb = data_stats.get('upload_mb', 0) download_mb = data_stats.get('download_mb', 0) avg_data_mb = data_stats.get('avg_mb_per_customer', 0) context = f""" INTERNET DATA USAGE ANALYSIS (focus ONLY on data/internet, no voice/SMS): Data Volume: - Total data consumed: {total_data_mb:,.0f} MB ({total_data_mb/1024:.1f} GB) - Average per customer: {avg_data_mb:.1f} MB - Data heavy users (>1GB): {data_stats.get('data_lovers', 0):,} customers Upload vs Download: - Total upload: {upload_mb:,.0f} MB ({upload_mb/1024:.1f} GB) - Total download: {download_mb:,.0f} MB ({download_mb/1024:.1f} GB) - Upload percentage: {(upload_mb / total_data_mb * 100) if total_data_mb > 0 else 0:.1f}% - Download percentage: {(download_mb / total_data_mb * 100) if total_data_mb > 0 else 0:.1f}% - Upload to download ratio: {(upload_mb / download_mb) if download_mb > 0 else 0:.2f}:1 Provide 3 insights specifically about DATA USAGE patterns, upload vs download behavior, and data package opportunities. """ show_ai_insights(context, "internet usage") with tabs[2]: render_sms_insights(stats) # AI Insights for SMS - ONLY SMS related sms_stats = stats.get('sms', {}) total_customers = stats.get('total_customers', 1) context = f""" SMS MESSAGING ANALYSIS (focus ONLY on SMS/text messaging, no voice/data): SMS Volume: - Total SMS messages: {sms_stats.get('total_messages', 0):,} - Average per customer: {sms_stats.get('avg_per_customer', 0):.2f} messages - Total customers: {total_customers:,} - SMS active users: {sms_stats.get('active_users', 0):,} Usage Context: - SMS appears to be {'very low' if sms_stats.get('avg_per_customer', 0) < 1 else 'moderate' if sms_stats.get('avg_per_customer', 0) < 50 else 'high'} usage - This suggests customers may prefer messaging apps over traditional SMS Provide 3 insights specifically about SMS USAGE patterns, why SMS might be low/high, and SMS package/strategy recommendations. """ show_ai_insights(context, "SMS analysis") elif page == "👤 Customer Lookup": # Show active filters banner show_active_filters_banner() render_customer_lookup() elif page == "🔄 Cohort Comparison": # Show active filters banner show_active_filters_banner() render_cohort_comparison() elif page == "📈 Visual Insights": # Show active filters banner show_active_filters_banner() st.subheader("📊 Visual Insights") viz_option = st.selectbox( "Select Visualization:", ["Time Distribution", "Data Breakdown", "Customer Segments"] ) fig = None if viz_option == "Time Distribution": viz = get_visualization("time-distribution") if viz and 'chart' in viz: fig = go.Figure(json.loads(viz['chart'])) st.plotly_chart(fig, use_container_width=True) elif viz_option == "Data Breakdown": viz = get_visualization("data-breakdown") if viz and 'chart' in viz: fig = go.Figure(json.loads(viz['chart'])) st.plotly_chart(fig, use_container_width=True) elif viz_option == "Customer Segments": viz = get_visualization("customer-segments") if viz and 'chart' in viz: fig = go.Figure(json.loads(viz['chart'])) st.plotly_chart(fig, use_container_width=True) # Export for visualizations if fig: create_export_buttons(chart=fig, prefix=f"viz_{viz_option.lower().replace(' ', '_')}") elif page == "🔬 Clustering Analysis": # Show active filters banner show_active_filters_banner() tab1, tab2 = st.tabs(["📊 View Clusters", "🔧 Run Custom Clustering"]) with tab1: render_cluster_visualization() # AI Insights for clustering - comprehensive cluster patterns clusters_data = get_clusters("kmeans") if clusters_data and 'clusters' in clusters_data: df_clusters = pd.DataFrame(clusters_data['clusters']) total_customers = df_clusters['size'].sum() # Build detailed cluster breakdown cluster_details = [] for _, row in df_clusters.iterrows(): cluster_details.append(f""" **Cluster {row['cluster_id']}:** {row['size']:,} customers ({row['size']/total_customers*100:.1f}%) • Voice: {row['avg_voice_mins']:.1f} mins | Data: {row['avg_data_mb']:.1f} MB | SMS: {row['avg_sms']:.1f} msg""") context = f""" CLUSTERING PATTERN ANALYSIS - {total_customers:,} customers across {len(df_clusters)} clusters 📊 **Cluster Breakdown:** {chr(10).join(cluster_details)} **Overall Statistics:** • Largest cluster: {df_clusters['size'].max():,} customers ({df_clusters['size'].max()/total_customers*100:.1f}%) • Smallest cluster: {df_clusters['size'].min():,} customers ({df_clusters['size'].min()/total_customers*100:.1f}%) • Average voice usage range: {df_clusters['avg_voice_mins'].min():.1f} - {df_clusters['avg_voice_mins'].max():.1f} mins • Average data usage range: {df_clusters['avg_data_mb'].min():.1f} - {df_clusters['avg_data_mb'].max():.1f} MB Provide 3 strategic insights referencing SPECIFIC CLUSTER IDs: 1. Which specific cluster (by number) represents highest-value customers and why 2. Which specific cluster shows risk and needs attention 3. Actionable strategies mentioning specific cluster IDs to target """ show_ai_insights(context, "clustering analysis") with tab2: render_dynamic_clustering() elif page == "💬 AI Assistant": # Show active filters banner show_active_filters_banner() render_ai_chat() if __name__ == "__main__": main()