EdwardSamuel13's picture
Upload 14 files
8f69dec verified
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import os
from processor import get_integrated_data, clean_sales_data
from models import perform_customer_segmentation, forecast_sales, analyze_sentiment
from rag_engine import RAGSystem
# Page Config
st.set_page_config(page_title="Business Analytics System",
layout="wide", page_icon="๐Ÿ“Š")
# Custom CSS
st.markdown("""
<style>
.metric-card {
background-color: #f0f2f6;
padding: 20px;
border-radius: 10px;
text-align: center;
}
.metric-title {
font-size: 16px;
color: #555;
}
.metric-value {
font-size: 32px;
font-weight: bold;
color: #000;
}
</style>
""", unsafe_allow_html=True)
@st.cache_data
def load_all_data():
data = get_integrated_data()
sales = clean_sales_data(data['sales'])
reviews = analyze_sentiment(data['reviews'])
logs = data['logs']
rfm = perform_customer_segmentation(sales)
return sales, reviews, logs, rfm
@st.cache_resource
def get_rag_system():
# Construct absolute path to data directory
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
rag = RAGSystem(data_dir)
return rag
try:
sales_df, reviews_df, logs_df, rfm_df = load_all_data()
except FileNotFoundError:
st.error("Data files not found. Please run data generation first.")
st.stop()
# Sidebar
st.sidebar.title("Analytics Portal")
st.sidebar.image(
"https://img.icons8.com/color/96/000000/monitor--v1.png", width=50)
page = st.sidebar.radio("Navigate", ["Overview", "Sales Performance",
"Customer Segments", "Forecasting", "Feedback Analysis", "Business Q&A"])
st.sidebar.markdown("---")
st.sidebar.info("Integrated Business Analytics System v1.0")
# --- Overview ---
if page == "Overview":
st.title("๐Ÿ“Š Integrated Business Analytics Dashboard")
st.markdown("Real-time insights on Sales, Customers, and Operations.")
# KPI Row
col1, col2, col3, col4 = st.columns(4)
total_revenue = sales_df['TotalPrice'].sum()
total_orders = len(sales_df)
avg_order_value = total_revenue / total_orders
avg_sentiment = reviews_df['Sentiment_Score'].mean()
with col1:
st.metric("Total Revenue", f"${total_revenue:,.2f}")
with col2:
st.metric("Total Orders", f"{total_orders}")
with col3:
st.metric("Avg Order Value", f"${avg_order_value:,.2f}")
with col4:
st.metric("Avg Sentiment",
f"{avg_sentiment:.2f}", delta_color="normal")
st.markdown("### Recent Activity")
c1, c2 = st.columns(2)
with c1:
st.subheader("Recent Sales")
st.dataframe(sales_df.sort_values('Date', ascending=False).head(5)[
['Date', 'Product', 'TotalPrice', 'Region']])
with c2:
st.subheader("Recent Reviews")
st.dataframe(reviews_df.sort_values('Date', ascending=False).head(5)[
['Date', 'Product', 'Sentiment_Label', 'ReviewText']])
# --- Sales Performance ---
elif page == "Sales Performance":
st.title("๐Ÿ“ˆ Sales Performance")
# Filters
region = st.selectbox(
"Select Region", ["All"] + list(sales_df['Region'].unique()))
if region != "All":
filtered_sales = sales_df[sales_df['Region'] == region]
else:
filtered_sales = sales_df
# Charts
col1, col2 = st.columns(2)
with col1:
# Time series
daily = filtered_sales.set_index('Date').resample(
'D')['TotalPrice'].sum().reset_index()
fig_line = px.line(daily, x='Date', y='TotalPrice',
title='Daily Revenue Trend')
st.plotly_chart(fig_line, use_container_width=True)
with col2:
# Category Bar
cat_sales = filtered_sales.groupby(
'Category')['TotalPrice'].sum().reset_index()
fig_bar = px.bar(cat_sales, x='Category', y='TotalPrice',
title='Revenue by Category', color='Category')
st.plotly_chart(fig_bar, use_container_width=True)
# Product drilldown
st.subheader("Top Selling Products")
top_prods = filtered_sales.groupby('Product').agg(
{'TotalPrice': 'sum', 'Quantity': 'sum'}).sort_values('TotalPrice', ascending=False).reset_index()
st.dataframe(top_prods)
# --- Customer Segments ---
elif page == "Customer Segments":
st.title("๐Ÿ‘ฅ Customer Segmentation (RFM)")
st.markdown(
"Using K-Means clustering on Recency, Frequency, and Monetary value.")
col1, col2 = st.columns([3, 1])
with col1:
# 3D Scatter works best, or 2D pair
fig_clust = px.scatter_3d(rfm_df.reset_index(), x='Recency', y='Frequency', z='Monetary', color='Cluster',
title="Customer Clusters (RFM)", opacity=0.7)
st.plotly_chart(fig_clust, use_container_width=True)
with col2:
st.subheader("Cluster Stats")
cluster_stats = rfm_df.groupby('Cluster').mean().reset_index()
st.dataframe(cluster_stats.round(2))
st.markdown("### Insights")
st.info("Clusters allow identifying loyal customers (High Frequency/Monetary) vs Churn risk (High Recency).")
# --- Forecasting ---
elif page == "Forecasting":
st.title("๐Ÿ”ฎ Sales Forecasting")
days = st.slider("Forecast Days", 7, 90, 30)
hist_df, pred_df = forecast_sales(sales_df, days)
# Combine for plot
hist_df['Type'] = 'Historical'
pred_df['Type'] = 'Predicted'
pred_df.rename(columns={'Predicted_Sales': 'TotalPrice'}, inplace=True)
combined = pd.concat([hist_df, pred_df])
fig = px.line(combined, x='Date', y='TotalPrice', color='Type',
title=f"Sales Forecast (Next {days} Days)")
fig.add_vline(x=hist_df['Date'].max(), line_dash="dash", line_color="gray")
st.plotly_chart(fig, use_container_width=True)
# --- Feedback Analysis ---
elif page == "Feedback Analysis":
st.title("๐Ÿ’ฌ Sentiment Analysis")
# Sentiment Distribution
col1, col2 = st.columns(2)
with col1:
counts = reviews_df['Sentiment_Label'].value_counts()
fig_pie = px.pie(names=counts.index, values=counts.values, title='Sentiment Distribution',
color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'gray'})
st.plotly_chart(fig_pie, use_container_width=True)
with col2:
avg_by_prod = reviews_df.groupby(
'Product')['Sentiment_Score'].mean().sort_values().reset_index()
fig_bar = px.bar(avg_by_prod, x='Sentiment_Score', y='Product', orientation='h',
title='Avg Sentiment by Product', color='Sentiment_Score', color_continuous_scale='RdYlGn')
st.plotly_chart(fig_bar, use_container_width=True)
st.subheader("Review Explorer")
sentiment_filter = st.multiselect(
"Filter Sentiment", ['Positive', 'Neutral', 'Negative'], default=['Negative'])
st.dataframe(reviews_df[reviews_df['Sentiment_Label'].isin(
sentiment_filter)][['Date', 'Product', 'Rating', 'ReviewText']])
# --- Business Q&A ---
elif page == "Business Q&A":
st.title("๐Ÿค– Business Q&A")
rag = get_rag_system()
# Initialize index if not ready
if rag.vector_store is None:
with st.spinner("Initializing Knowledge Base (this may take a moment)..."):
success = rag.build_index()
if success:
rag.init_llm()
st.success("Knowledge Base Ready!")
else:
st.error("Could not build index. Please check if data files exist in the 'data' directory.")
query = st.text_input("Ask a question:", placeholder="e.g., What are customers saying about the Pro Laptop?")
if query:
with st.spinner("Thinking..."):
try:
result = rag.query(query)
st.markdown("### Answer")
st.write(result['answer'])
with st.expander("View Retrieved Context"):
if result['context']:
for i, doc in enumerate(result['context']):
st.markdown(f"**Source {i+1} ({doc.metadata.get('source', 'unknown')}):**")
st.text(doc.page_content)
else:
st.text("No context retrieved.")
except Exception as e:
st.error(f"An error occurred: {e}")