import streamlit as st import plotly.express as px import plotly.graph_objects as go import pandas as pd import os from processor import get_integrated_data, clean_sales_data from models import perform_customer_segmentation, forecast_sales, analyze_sentiment from rag_engine import RAGSystem # Page Config st.set_page_config(page_title="Business Analytics System", layout="wide", page_icon="📊") # Custom CSS st.markdown(""" """, unsafe_allow_html=True) @st.cache_data def load_all_data(): data = get_integrated_data() sales = clean_sales_data(data['sales']) reviews = analyze_sentiment(data['reviews']) logs = data['logs'] rfm = perform_customer_segmentation(sales) return sales, reviews, logs, rfm @st.cache_resource def get_rag_system(): # Construct absolute path to data directory data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') rag = RAGSystem(data_dir) return rag try: sales_df, reviews_df, logs_df, rfm_df = load_all_data() except FileNotFoundError: st.error("Data files not found. Please run data generation first.") st.stop() # Sidebar st.sidebar.title("Analytics Portal") st.sidebar.image( "https://img.icons8.com/color/96/000000/monitor--v1.png", width=50) page = st.sidebar.radio("Navigate", ["Overview", "Sales Performance", "Customer Segments", "Forecasting", "Feedback Analysis", "Business Q&A"]) st.sidebar.markdown("---") st.sidebar.info("Integrated Business Analytics System v1.0") # --- Overview --- if page == "Overview": st.title("📊 Integrated Business Analytics Dashboard") st.markdown("Real-time insights on Sales, Customers, and Operations.") # KPI Row col1, col2, col3, col4 = st.columns(4) total_revenue = sales_df['TotalPrice'].sum() total_orders = len(sales_df) avg_order_value = total_revenue / total_orders avg_sentiment = reviews_df['Sentiment_Score'].mean() with col1: st.metric("Total Revenue", f"${total_revenue:,.2f}") with col2: st.metric("Total Orders", f"{total_orders}") with col3: st.metric("Avg Order Value", f"${avg_order_value:,.2f}") with col4: st.metric("Avg Sentiment", f"{avg_sentiment:.2f}", delta_color="normal") st.markdown("### Recent Activity") c1, c2 = st.columns(2) with c1: st.subheader("Recent Sales") st.dataframe(sales_df.sort_values('Date', ascending=False).head(5)[ ['Date', 'Product', 'TotalPrice', 'Region']]) with c2: st.subheader("Recent Reviews") st.dataframe(reviews_df.sort_values('Date', ascending=False).head(5)[ ['Date', 'Product', 'Sentiment_Label', 'ReviewText']]) # --- Sales Performance --- elif page == "Sales Performance": st.title("📈 Sales Performance") # Filters region = st.selectbox( "Select Region", ["All"] + list(sales_df['Region'].unique())) if region != "All": filtered_sales = sales_df[sales_df['Region'] == region] else: filtered_sales = sales_df # Charts col1, col2 = st.columns(2) with col1: # Time series daily = filtered_sales.set_index('Date').resample( 'D')['TotalPrice'].sum().reset_index() fig_line = px.line(daily, x='Date', y='TotalPrice', title='Daily Revenue Trend') st.plotly_chart(fig_line, use_container_width=True) with col2: # Category Bar cat_sales = filtered_sales.groupby( 'Category')['TotalPrice'].sum().reset_index() fig_bar = px.bar(cat_sales, x='Category', y='TotalPrice', title='Revenue by Category', color='Category') st.plotly_chart(fig_bar, use_container_width=True) # Product drilldown st.subheader("Top Selling Products") top_prods = filtered_sales.groupby('Product').agg( {'TotalPrice': 'sum', 'Quantity': 'sum'}).sort_values('TotalPrice', ascending=False).reset_index() st.dataframe(top_prods) # --- Customer Segments --- elif page == "Customer Segments": st.title("👥 Customer Segmentation (RFM)") st.markdown( "Using K-Means clustering on Recency, Frequency, and Monetary value.") col1, col2 = st.columns([3, 1]) with col1: # 3D Scatter works best, or 2D pair fig_clust = px.scatter_3d(rfm_df.reset_index(), x='Recency', y='Frequency', z='Monetary', color='Cluster', title="Customer Clusters (RFM)", opacity=0.7) st.plotly_chart(fig_clust, use_container_width=True) with col2: st.subheader("Cluster Stats") cluster_stats = rfm_df.groupby('Cluster').mean().reset_index() st.dataframe(cluster_stats.round(2)) st.markdown("### Insights") st.info("Clusters allow identifying loyal customers (High Frequency/Monetary) vs Churn risk (High Recency).") # --- Forecasting --- elif page == "Forecasting": st.title("🔮 Sales Forecasting") days = st.slider("Forecast Days", 7, 90, 30) hist_df, pred_df = forecast_sales(sales_df, days) # Combine for plot hist_df['Type'] = 'Historical' pred_df['Type'] = 'Predicted' pred_df.rename(columns={'Predicted_Sales': 'TotalPrice'}, inplace=True) combined = pd.concat([hist_df, pred_df]) fig = px.line(combined, x='Date', y='TotalPrice', color='Type', title=f"Sales Forecast (Next {days} Days)") fig.add_vline(x=hist_df['Date'].max(), line_dash="dash", line_color="gray") st.plotly_chart(fig, use_container_width=True) # --- Feedback Analysis --- elif page == "Feedback Analysis": st.title("💬 Sentiment Analysis") # Sentiment Distribution col1, col2 = st.columns(2) with col1: counts = reviews_df['Sentiment_Label'].value_counts() fig_pie = px.pie(names=counts.index, values=counts.values, title='Sentiment Distribution', color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'gray'}) st.plotly_chart(fig_pie, use_container_width=True) with col2: avg_by_prod = reviews_df.groupby( 'Product')['Sentiment_Score'].mean().sort_values().reset_index() fig_bar = px.bar(avg_by_prod, x='Sentiment_Score', y='Product', orientation='h', title='Avg Sentiment by Product', color='Sentiment_Score', color_continuous_scale='RdYlGn') st.plotly_chart(fig_bar, use_container_width=True) st.subheader("Review Explorer") sentiment_filter = st.multiselect( "Filter Sentiment", ['Positive', 'Neutral', 'Negative'], default=['Negative']) st.dataframe(reviews_df[reviews_df['Sentiment_Label'].isin( sentiment_filter)][['Date', 'Product', 'Rating', 'ReviewText']]) # --- Business Q&A --- elif page == "Business Q&A": st.title("🤖 Business Q&A") rag = get_rag_system() # Initialize index if not ready if rag.vector_store is None: with st.spinner("Initializing Knowledge Base (this may take a moment)..."): success = rag.build_index() if success: rag.init_llm() st.success("Knowledge Base Ready!") else: st.error("Could not build index. Please check if data files exist in the 'data' directory.") query = st.text_input("Ask a question:", placeholder="e.g., What are customers saying about the Pro Laptop?") if query: with st.spinner("Thinking..."): try: result = rag.query(query) st.markdown("### Answer") st.write(result['answer']) with st.expander("View Retrieved Context"): if result['context']: for i, doc in enumerate(result['context']): st.markdown(f"**Source {i+1} ({doc.metadata.get('source', 'unknown')}):**") st.text(doc.page_content) else: st.text("No context retrieved.") except Exception as e: st.error(f"An error occurred: {e}")