|
|
import streamlit as st
|
|
|
import plotly.express as px
|
|
|
import plotly.graph_objects as go
|
|
|
import pandas as pd
|
|
|
import os
|
|
|
from processor import get_integrated_data, clean_sales_data
|
|
|
from models import perform_customer_segmentation, forecast_sales, analyze_sentiment
|
|
|
from rag_engine import RAGSystem
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Business Analytics System",
|
|
|
layout="wide", page_icon="๐")
|
|
|
|
|
|
|
|
|
st.markdown("""
|
|
|
<style>
|
|
|
.metric-card {
|
|
|
background-color: #f0f2f6;
|
|
|
padding: 20px;
|
|
|
border-radius: 10px;
|
|
|
text-align: center;
|
|
|
}
|
|
|
.metric-title {
|
|
|
font-size: 16px;
|
|
|
color: #555;
|
|
|
}
|
|
|
.metric-value {
|
|
|
font-size: 32px;
|
|
|
font-weight: bold;
|
|
|
color: #000;
|
|
|
}
|
|
|
</style>
|
|
|
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
@st.cache_data
|
|
|
def load_all_data():
|
|
|
data = get_integrated_data()
|
|
|
sales = clean_sales_data(data['sales'])
|
|
|
reviews = analyze_sentiment(data['reviews'])
|
|
|
logs = data['logs']
|
|
|
rfm = perform_customer_segmentation(sales)
|
|
|
return sales, reviews, logs, rfm
|
|
|
|
|
|
@st.cache_resource
|
|
|
def get_rag_system():
|
|
|
|
|
|
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
|
|
rag = RAGSystem(data_dir)
|
|
|
return rag
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
sales_df, reviews_df, logs_df, rfm_df = load_all_data()
|
|
|
except FileNotFoundError:
|
|
|
st.error("Data files not found. Please run data generation first.")
|
|
|
st.stop()
|
|
|
|
|
|
|
|
|
st.sidebar.title("Analytics Portal")
|
|
|
st.sidebar.image(
|
|
|
"https://img.icons8.com/color/96/000000/monitor--v1.png", width=50)
|
|
|
page = st.sidebar.radio("Navigate", ["Overview", "Sales Performance",
|
|
|
"Customer Segments", "Forecasting", "Feedback Analysis", "Business Q&A"])
|
|
|
|
|
|
st.sidebar.markdown("---")
|
|
|
st.sidebar.info("Integrated Business Analytics System v1.0")
|
|
|
|
|
|
|
|
|
if page == "Overview":
|
|
|
st.title("๐ Integrated Business Analytics Dashboard")
|
|
|
st.markdown("Real-time insights on Sales, Customers, and Operations.")
|
|
|
|
|
|
|
|
|
col1, col2, col3, col4 = st.columns(4)
|
|
|
|
|
|
total_revenue = sales_df['TotalPrice'].sum()
|
|
|
total_orders = len(sales_df)
|
|
|
avg_order_value = total_revenue / total_orders
|
|
|
avg_sentiment = reviews_df['Sentiment_Score'].mean()
|
|
|
|
|
|
with col1:
|
|
|
st.metric("Total Revenue", f"${total_revenue:,.2f}")
|
|
|
with col2:
|
|
|
st.metric("Total Orders", f"{total_orders}")
|
|
|
with col3:
|
|
|
st.metric("Avg Order Value", f"${avg_order_value:,.2f}")
|
|
|
with col4:
|
|
|
st.metric("Avg Sentiment",
|
|
|
f"{avg_sentiment:.2f}", delta_color="normal")
|
|
|
|
|
|
st.markdown("### Recent Activity")
|
|
|
c1, c2 = st.columns(2)
|
|
|
with c1:
|
|
|
st.subheader("Recent Sales")
|
|
|
st.dataframe(sales_df.sort_values('Date', ascending=False).head(5)[
|
|
|
['Date', 'Product', 'TotalPrice', 'Region']])
|
|
|
with c2:
|
|
|
st.subheader("Recent Reviews")
|
|
|
st.dataframe(reviews_df.sort_values('Date', ascending=False).head(5)[
|
|
|
['Date', 'Product', 'Sentiment_Label', 'ReviewText']])
|
|
|
|
|
|
|
|
|
elif page == "Sales Performance":
|
|
|
st.title("๐ Sales Performance")
|
|
|
|
|
|
|
|
|
region = st.selectbox(
|
|
|
"Select Region", ["All"] + list(sales_df['Region'].unique()))
|
|
|
if region != "All":
|
|
|
filtered_sales = sales_df[sales_df['Region'] == region]
|
|
|
else:
|
|
|
filtered_sales = sales_df
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
|
|
|
with col1:
|
|
|
|
|
|
daily = filtered_sales.set_index('Date').resample(
|
|
|
'D')['TotalPrice'].sum().reset_index()
|
|
|
fig_line = px.line(daily, x='Date', y='TotalPrice',
|
|
|
title='Daily Revenue Trend')
|
|
|
st.plotly_chart(fig_line, use_container_width=True)
|
|
|
|
|
|
with col2:
|
|
|
|
|
|
cat_sales = filtered_sales.groupby(
|
|
|
'Category')['TotalPrice'].sum().reset_index()
|
|
|
fig_bar = px.bar(cat_sales, x='Category', y='TotalPrice',
|
|
|
title='Revenue by Category', color='Category')
|
|
|
st.plotly_chart(fig_bar, use_container_width=True)
|
|
|
|
|
|
|
|
|
st.subheader("Top Selling Products")
|
|
|
top_prods = filtered_sales.groupby('Product').agg(
|
|
|
{'TotalPrice': 'sum', 'Quantity': 'sum'}).sort_values('TotalPrice', ascending=False).reset_index()
|
|
|
st.dataframe(top_prods)
|
|
|
|
|
|
|
|
|
elif page == "Customer Segments":
|
|
|
st.title("๐ฅ Customer Segmentation (RFM)")
|
|
|
st.markdown(
|
|
|
"Using K-Means clustering on Recency, Frequency, and Monetary value.")
|
|
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
|
|
|
with col1:
|
|
|
|
|
|
fig_clust = px.scatter_3d(rfm_df.reset_index(), x='Recency', y='Frequency', z='Monetary', color='Cluster',
|
|
|
title="Customer Clusters (RFM)", opacity=0.7)
|
|
|
st.plotly_chart(fig_clust, use_container_width=True)
|
|
|
|
|
|
with col2:
|
|
|
st.subheader("Cluster Stats")
|
|
|
cluster_stats = rfm_df.groupby('Cluster').mean().reset_index()
|
|
|
st.dataframe(cluster_stats.round(2))
|
|
|
|
|
|
st.markdown("### Insights")
|
|
|
st.info("Clusters allow identifying loyal customers (High Frequency/Monetary) vs Churn risk (High Recency).")
|
|
|
|
|
|
|
|
|
elif page == "Forecasting":
|
|
|
st.title("๐ฎ Sales Forecasting")
|
|
|
|
|
|
days = st.slider("Forecast Days", 7, 90, 30)
|
|
|
|
|
|
hist_df, pred_df = forecast_sales(sales_df, days)
|
|
|
|
|
|
|
|
|
hist_df['Type'] = 'Historical'
|
|
|
pred_df['Type'] = 'Predicted'
|
|
|
pred_df.rename(columns={'Predicted_Sales': 'TotalPrice'}, inplace=True)
|
|
|
|
|
|
combined = pd.concat([hist_df, pred_df])
|
|
|
|
|
|
fig = px.line(combined, x='Date', y='TotalPrice', color='Type',
|
|
|
title=f"Sales Forecast (Next {days} Days)")
|
|
|
fig.add_vline(x=hist_df['Date'].max(), line_dash="dash", line_color="gray")
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
elif page == "Feedback Analysis":
|
|
|
st.title("๐ฌ Sentiment Analysis")
|
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
|
|
|
with col1:
|
|
|
counts = reviews_df['Sentiment_Label'].value_counts()
|
|
|
fig_pie = px.pie(names=counts.index, values=counts.values, title='Sentiment Distribution',
|
|
|
color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'gray'})
|
|
|
st.plotly_chart(fig_pie, use_container_width=True)
|
|
|
|
|
|
with col2:
|
|
|
avg_by_prod = reviews_df.groupby(
|
|
|
'Product')['Sentiment_Score'].mean().sort_values().reset_index()
|
|
|
fig_bar = px.bar(avg_by_prod, x='Sentiment_Score', y='Product', orientation='h',
|
|
|
title='Avg Sentiment by Product', color='Sentiment_Score', color_continuous_scale='RdYlGn')
|
|
|
st.plotly_chart(fig_bar, use_container_width=True)
|
|
|
|
|
|
st.subheader("Review Explorer")
|
|
|
sentiment_filter = st.multiselect(
|
|
|
"Filter Sentiment", ['Positive', 'Neutral', 'Negative'], default=['Negative'])
|
|
|
st.dataframe(reviews_df[reviews_df['Sentiment_Label'].isin(
|
|
|
sentiment_filter)][['Date', 'Product', 'Rating', 'ReviewText']])
|
|
|
|
|
|
|
|
|
elif page == "Business Q&A":
|
|
|
st.title("๐ค Business Q&A")
|
|
|
rag = get_rag_system()
|
|
|
|
|
|
|
|
|
if rag.vector_store is None:
|
|
|
with st.spinner("Initializing Knowledge Base (this may take a moment)..."):
|
|
|
success = rag.build_index()
|
|
|
if success:
|
|
|
rag.init_llm()
|
|
|
st.success("Knowledge Base Ready!")
|
|
|
else:
|
|
|
st.error("Could not build index. Please check if data files exist in the 'data' directory.")
|
|
|
|
|
|
query = st.text_input("Ask a question:", placeholder="e.g., What are customers saying about the Pro Laptop?")
|
|
|
|
|
|
if query:
|
|
|
with st.spinner("Thinking..."):
|
|
|
try:
|
|
|
result = rag.query(query)
|
|
|
st.markdown("### Answer")
|
|
|
st.write(result['answer'])
|
|
|
|
|
|
with st.expander("View Retrieved Context"):
|
|
|
if result['context']:
|
|
|
for i, doc in enumerate(result['context']):
|
|
|
st.markdown(f"**Source {i+1} ({doc.metadata.get('source', 'unknown')}):**")
|
|
|
st.text(doc.page_content)
|
|
|
else:
|
|
|
st.text("No context retrieved.")
|
|
|
except Exception as e:
|
|
|
st.error(f"An error occurred: {e}")
|
|
|
|