Spaces:

EdwardSamuel13
/

business-analytics-dashboard

Sleeping

File size: 8,809 Bytes

8f69dec

import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import os
from processor import get_integrated_data, clean_sales_data
from models import perform_customer_segmentation, forecast_sales, analyze_sentiment
from rag_engine import RAGSystem

# Page Config
st.set_page_config(page_title="Business Analytics System",
                   layout="wide", page_icon="📊")

# Custom CSS
st.markdown("""

<style>

    .metric-card {

        background-color: #f0f2f6;

        padding: 20px;

        border-radius: 10px;

        text-align: center;

    }

    .metric-title {

        font-size: 16px;

        color: #555;

    }

    .metric-value {

        font-size: 32px;

        font-weight: bold;

        color: #000;

    }

</style>

""", unsafe_allow_html=True)


@st.cache_data
def load_all_data():
    data = get_integrated_data()
    sales = clean_sales_data(data['sales'])
    reviews = analyze_sentiment(data['reviews'])
    logs = data['logs']
    rfm = perform_customer_segmentation(sales)
    return sales, reviews, logs, rfm

@st.cache_resource
def get_rag_system():
    # Construct absolute path to data directory
    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
    rag = RAGSystem(data_dir)
    return rag



try:
    sales_df, reviews_df, logs_df, rfm_df = load_all_data()
except FileNotFoundError:
    st.error("Data files not found. Please run data generation first.")
    st.stop()

# Sidebar
st.sidebar.title("Analytics Portal")
st.sidebar.image(
    "https://img.icons8.com/color/96/000000/monitor--v1.png", width=50)
page = st.sidebar.radio("Navigate", ["Overview", "Sales Performance",
                        "Customer Segments", "Forecasting", "Feedback Analysis", "Business Q&A"])

st.sidebar.markdown("---")
st.sidebar.info("Integrated Business Analytics System v1.0")

# --- Overview ---
if page == "Overview":
    st.title("📊 Integrated Business Analytics Dashboard")
    st.markdown("Real-time insights on Sales, Customers, and Operations.")

    # KPI Row
    col1, col2, col3, col4 = st.columns(4)

    total_revenue = sales_df['TotalPrice'].sum()
    total_orders = len(sales_df)
    avg_order_value = total_revenue / total_orders
    avg_sentiment = reviews_df['Sentiment_Score'].mean()

    with col1:
        st.metric("Total Revenue", f"${total_revenue:,.2f}")
    with col2:
        st.metric("Total Orders", f"{total_orders}")
    with col3:
        st.metric("Avg Order Value", f"${avg_order_value:,.2f}")
    with col4:
        st.metric("Avg Sentiment",
                  f"{avg_sentiment:.2f}", delta_color="normal")

    st.markdown("### Recent Activity")
    c1, c2 = st.columns(2)
    with c1:
        st.subheader("Recent Sales")
        st.dataframe(sales_df.sort_values('Date', ascending=False).head(5)[
                     ['Date', 'Product', 'TotalPrice', 'Region']])
    with c2:
        st.subheader("Recent Reviews")
        st.dataframe(reviews_df.sort_values('Date', ascending=False).head(5)[
                     ['Date', 'Product', 'Sentiment_Label', 'ReviewText']])

# --- Sales Performance ---
elif page == "Sales Performance":
    st.title("📈 Sales Performance")

    # Filters
    region = st.selectbox(
        "Select Region", ["All"] + list(sales_df['Region'].unique()))
    if region != "All":
        filtered_sales = sales_df[sales_df['Region'] == region]
    else:
        filtered_sales = sales_df

    # Charts
    col1, col2 = st.columns(2)

    with col1:
        # Time series
        daily = filtered_sales.set_index('Date').resample(
            'D')['TotalPrice'].sum().reset_index()
        fig_line = px.line(daily, x='Date', y='TotalPrice',
                           title='Daily Revenue Trend')
        st.plotly_chart(fig_line, use_container_width=True)

    with col2:
        # Category Bar
        cat_sales = filtered_sales.groupby(
            'Category')['TotalPrice'].sum().reset_index()
        fig_bar = px.bar(cat_sales, x='Category', y='TotalPrice',
                         title='Revenue by Category', color='Category')
        st.plotly_chart(fig_bar, use_container_width=True)

    # Product drilldown
    st.subheader("Top Selling Products")
    top_prods = filtered_sales.groupby('Product').agg(
        {'TotalPrice': 'sum', 'Quantity': 'sum'}).sort_values('TotalPrice', ascending=False).reset_index()
    st.dataframe(top_prods)

# --- Customer Segments ---
elif page == "Customer Segments":
    st.title("👥 Customer Segmentation (RFM)")
    st.markdown(
        "Using K-Means clustering on Recency, Frequency, and Monetary value.")

    col1, col2 = st.columns([3, 1])

    with col1:
        # 3D Scatter works best, or 2D pair
        fig_clust = px.scatter_3d(rfm_df.reset_index(), x='Recency', y='Frequency', z='Monetary', color='Cluster',
                                  title="Customer Clusters (RFM)", opacity=0.7)
        st.plotly_chart(fig_clust, use_container_width=True)

    with col2:
        st.subheader("Cluster Stats")
        cluster_stats = rfm_df.groupby('Cluster').mean().reset_index()
        st.dataframe(cluster_stats.round(2))

    st.markdown("### Insights")
    st.info("Clusters allow identifying loyal customers (High Frequency/Monetary) vs Churn risk (High Recency).")

# --- Forecasting ---
elif page == "Forecasting":
    st.title("🔮 Sales Forecasting")

    days = st.slider("Forecast Days", 7, 90, 30)

    hist_df, pred_df = forecast_sales(sales_df, days)

    # Combine for plot
    hist_df['Type'] = 'Historical'
    pred_df['Type'] = 'Predicted'
    pred_df.rename(columns={'Predicted_Sales': 'TotalPrice'}, inplace=True)

    combined = pd.concat([hist_df, pred_df])

    fig = px.line(combined, x='Date', y='TotalPrice', color='Type',
                  title=f"Sales Forecast (Next {days} Days)")
    fig.add_vline(x=hist_df['Date'].max(), line_dash="dash", line_color="gray")
    st.plotly_chart(fig, use_container_width=True)

# --- Feedback Analysis ---
elif page == "Feedback Analysis":
    st.title("💬 Sentiment Analysis")

    # Sentiment Distribution
    col1, col2 = st.columns(2)

    with col1:
        counts = reviews_df['Sentiment_Label'].value_counts()
        fig_pie = px.pie(names=counts.index, values=counts.values, title='Sentiment Distribution',
                         color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'gray'})
        st.plotly_chart(fig_pie, use_container_width=True)

    with col2:
        avg_by_prod = reviews_df.groupby(
            'Product')['Sentiment_Score'].mean().sort_values().reset_index()
        fig_bar = px.bar(avg_by_prod, x='Sentiment_Score', y='Product', orientation='h',
                         title='Avg Sentiment by Product', color='Sentiment_Score', color_continuous_scale='RdYlGn')
        st.plotly_chart(fig_bar, use_container_width=True)

    st.subheader("Review Explorer")
    sentiment_filter = st.multiselect(
        "Filter Sentiment", ['Positive', 'Neutral', 'Negative'], default=['Negative'])
    st.dataframe(reviews_df[reviews_df['Sentiment_Label'].isin(
        sentiment_filter)][['Date', 'Product', 'Rating', 'ReviewText']])

# --- Business Q&A ---
elif page == "Business Q&A":
    st.title("🤖 Business Q&A")
    rag = get_rag_system()

    # Initialize index if not ready
    if rag.vector_store is None:
        with st.spinner("Initializing Knowledge Base (this may take a moment)..."):
            success = rag.build_index()
            if success:
                rag.init_llm()
                st.success("Knowledge Base Ready!")
            else:
                st.error("Could not build index. Please check if data files exist in the 'data' directory.")
            
    query = st.text_input("Ask a question:", placeholder="e.g., What are customers saying about the Pro Laptop?")
    
    if query:
        with st.spinner("Thinking..."):
            try:
                result = rag.query(query)
                st.markdown("### Answer")
                st.write(result['answer'])
                
                with st.expander("View Retrieved Context"):
                    if result['context']:
                        for i, doc in enumerate(result['context']):
                            st.markdown(f"**Source {i+1} ({doc.metadata.get('source', 'unknown')}):**")
                            st.text(doc.page_content)
                    else:
                        st.text("No context retrieved.")
            except Exception as e:
                st.error(f"An error occurred: {e}")