File size: 8,809 Bytes
8f69dec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import os
from processor import get_integrated_data, clean_sales_data
from models import perform_customer_segmentation, forecast_sales, analyze_sentiment
from rag_engine import RAGSystem

# Page Config
st.set_page_config(page_title="Business Analytics System",
                   layout="wide", page_icon="๐Ÿ“Š")

# Custom CSS
st.markdown("""

<style>

    .metric-card {

        background-color: #f0f2f6;

        padding: 20px;

        border-radius: 10px;

        text-align: center;

    }

    .metric-title {

        font-size: 16px;

        color: #555;

    }

    .metric-value {

        font-size: 32px;

        font-weight: bold;

        color: #000;

    }

</style>

""", unsafe_allow_html=True)


@st.cache_data
def load_all_data():
    data = get_integrated_data()
    sales = clean_sales_data(data['sales'])
    reviews = analyze_sentiment(data['reviews'])
    logs = data['logs']
    rfm = perform_customer_segmentation(sales)
    return sales, reviews, logs, rfm

@st.cache_resource
def get_rag_system():
    # Construct absolute path to data directory
    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
    rag = RAGSystem(data_dir)
    return rag



try:
    sales_df, reviews_df, logs_df, rfm_df = load_all_data()
except FileNotFoundError:
    st.error("Data files not found. Please run data generation first.")
    st.stop()

# Sidebar
st.sidebar.title("Analytics Portal")
st.sidebar.image(
    "https://img.icons8.com/color/96/000000/monitor--v1.png", width=50)
page = st.sidebar.radio("Navigate", ["Overview", "Sales Performance",
                        "Customer Segments", "Forecasting", "Feedback Analysis", "Business Q&A"])

st.sidebar.markdown("---")
st.sidebar.info("Integrated Business Analytics System v1.0")

# --- Overview ---
if page == "Overview":
    st.title("๐Ÿ“Š Integrated Business Analytics Dashboard")
    st.markdown("Real-time insights on Sales, Customers, and Operations.")

    # KPI Row
    col1, col2, col3, col4 = st.columns(4)

    total_revenue = sales_df['TotalPrice'].sum()
    total_orders = len(sales_df)
    avg_order_value = total_revenue / total_orders
    avg_sentiment = reviews_df['Sentiment_Score'].mean()

    with col1:
        st.metric("Total Revenue", f"${total_revenue:,.2f}")
    with col2:
        st.metric("Total Orders", f"{total_orders}")
    with col3:
        st.metric("Avg Order Value", f"${avg_order_value:,.2f}")
    with col4:
        st.metric("Avg Sentiment",
                  f"{avg_sentiment:.2f}", delta_color="normal")

    st.markdown("### Recent Activity")
    c1, c2 = st.columns(2)
    with c1:
        st.subheader("Recent Sales")
        st.dataframe(sales_df.sort_values('Date', ascending=False).head(5)[
                     ['Date', 'Product', 'TotalPrice', 'Region']])
    with c2:
        st.subheader("Recent Reviews")
        st.dataframe(reviews_df.sort_values('Date', ascending=False).head(5)[
                     ['Date', 'Product', 'Sentiment_Label', 'ReviewText']])

# --- Sales Performance ---
elif page == "Sales Performance":
    st.title("๐Ÿ“ˆ Sales Performance")

    # Filters
    region = st.selectbox(
        "Select Region", ["All"] + list(sales_df['Region'].unique()))
    if region != "All":
        filtered_sales = sales_df[sales_df['Region'] == region]
    else:
        filtered_sales = sales_df

    # Charts
    col1, col2 = st.columns(2)

    with col1:
        # Time series
        daily = filtered_sales.set_index('Date').resample(
            'D')['TotalPrice'].sum().reset_index()
        fig_line = px.line(daily, x='Date', y='TotalPrice',
                           title='Daily Revenue Trend')
        st.plotly_chart(fig_line, use_container_width=True)

    with col2:
        # Category Bar
        cat_sales = filtered_sales.groupby(
            'Category')['TotalPrice'].sum().reset_index()
        fig_bar = px.bar(cat_sales, x='Category', y='TotalPrice',
                         title='Revenue by Category', color='Category')
        st.plotly_chart(fig_bar, use_container_width=True)

    # Product drilldown
    st.subheader("Top Selling Products")
    top_prods = filtered_sales.groupby('Product').agg(
        {'TotalPrice': 'sum', 'Quantity': 'sum'}).sort_values('TotalPrice', ascending=False).reset_index()
    st.dataframe(top_prods)

# --- Customer Segments ---
elif page == "Customer Segments":
    st.title("๐Ÿ‘ฅ Customer Segmentation (RFM)")
    st.markdown(
        "Using K-Means clustering on Recency, Frequency, and Monetary value.")

    col1, col2 = st.columns([3, 1])

    with col1:
        # 3D Scatter works best, or 2D pair
        fig_clust = px.scatter_3d(rfm_df.reset_index(), x='Recency', y='Frequency', z='Monetary', color='Cluster',
                                  title="Customer Clusters (RFM)", opacity=0.7)
        st.plotly_chart(fig_clust, use_container_width=True)

    with col2:
        st.subheader("Cluster Stats")
        cluster_stats = rfm_df.groupby('Cluster').mean().reset_index()
        st.dataframe(cluster_stats.round(2))

    st.markdown("### Insights")
    st.info("Clusters allow identifying loyal customers (High Frequency/Monetary) vs Churn risk (High Recency).")

# --- Forecasting ---
elif page == "Forecasting":
    st.title("๐Ÿ”ฎ Sales Forecasting")

    days = st.slider("Forecast Days", 7, 90, 30)

    hist_df, pred_df = forecast_sales(sales_df, days)

    # Combine for plot
    hist_df['Type'] = 'Historical'
    pred_df['Type'] = 'Predicted'
    pred_df.rename(columns={'Predicted_Sales': 'TotalPrice'}, inplace=True)

    combined = pd.concat([hist_df, pred_df])

    fig = px.line(combined, x='Date', y='TotalPrice', color='Type',
                  title=f"Sales Forecast (Next {days} Days)")
    fig.add_vline(x=hist_df['Date'].max(), line_dash="dash", line_color="gray")
    st.plotly_chart(fig, use_container_width=True)

# --- Feedback Analysis ---
elif page == "Feedback Analysis":
    st.title("๐Ÿ’ฌ Sentiment Analysis")

    # Sentiment Distribution
    col1, col2 = st.columns(2)

    with col1:
        counts = reviews_df['Sentiment_Label'].value_counts()
        fig_pie = px.pie(names=counts.index, values=counts.values, title='Sentiment Distribution',
                         color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'gray'})
        st.plotly_chart(fig_pie, use_container_width=True)

    with col2:
        avg_by_prod = reviews_df.groupby(
            'Product')['Sentiment_Score'].mean().sort_values().reset_index()
        fig_bar = px.bar(avg_by_prod, x='Sentiment_Score', y='Product', orientation='h',
                         title='Avg Sentiment by Product', color='Sentiment_Score', color_continuous_scale='RdYlGn')
        st.plotly_chart(fig_bar, use_container_width=True)

    st.subheader("Review Explorer")
    sentiment_filter = st.multiselect(
        "Filter Sentiment", ['Positive', 'Neutral', 'Negative'], default=['Negative'])
    st.dataframe(reviews_df[reviews_df['Sentiment_Label'].isin(
        sentiment_filter)][['Date', 'Product', 'Rating', 'ReviewText']])

# --- Business Q&A ---
elif page == "Business Q&A":
    st.title("๐Ÿค– Business Q&A")
    rag = get_rag_system()

    # Initialize index if not ready
    if rag.vector_store is None:
        with st.spinner("Initializing Knowledge Base (this may take a moment)..."):
            success = rag.build_index()
            if success:
                rag.init_llm()
                st.success("Knowledge Base Ready!")
            else:
                st.error("Could not build index. Please check if data files exist in the 'data' directory.")
            
    query = st.text_input("Ask a question:", placeholder="e.g., What are customers saying about the Pro Laptop?")
    
    if query:
        with st.spinner("Thinking..."):
            try:
                result = rag.query(query)
                st.markdown("### Answer")
                st.write(result['answer'])
                
                with st.expander("View Retrieved Context"):
                    if result['context']:
                        for i, doc in enumerate(result['context']):
                            st.markdown(f"**Source {i+1} ({doc.metadata.get('source', 'unknown')}):**")
                            st.text(doc.page_content)
                    else:
                        st.text("No context retrieved.")
            except Exception as e:
                st.error(f"An error occurred: {e}")