Upload 17 files

7f90ea0 verified 4 days ago

23.1 kB

	"""
	Demand Prediction System - Streamlit Dashboard

	Interactive dashboard for visualizing sales trends and making demand predictions.
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import joblib
	import json
	from datetime import datetime, timedelta, date as dt_date
	import os
	import warnings
	warnings.filterwarnings('ignore')

	# Page configuration
	st.set_page_config(
	page_title="Demand Prediction Dashboard",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	color: #1f77b4;
	text-align: center;
	margin-bottom: 2rem;
	}
	.metric-card {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 0.5rem;
	margin: 0.5rem 0;
	}
	.stButton>button {
	width: 100%;
	background-color: #1f77b4;
	color: white;
	}
	</style>
	""", unsafe_allow_html=True)

	# Configuration
	DATA_PATH = 'data/sales.csv'
	MODEL_DIR = 'models'
	MODEL_PATH = f'{MODEL_DIR}/best_model.joblib'
	TS_MODEL_PATH = f'{MODEL_DIR}/best_timeseries_model.joblib'
	PREPROCESSING_PATH = f'{MODEL_DIR}/preprocessing.joblib'
	ALL_MODELS_METADATA_PATH = f'{MODEL_DIR}/all_models_metadata.json'


	@st.cache_data
	def load_data():
	"""Load sales data with caching."""
	if os.path.exists(DATA_PATH):
	df = pd.read_csv(DATA_PATH)
	df['date'] = pd.to_datetime(df['date'])
	return df
	return None


	@st.cache_resource
	def load_models():
	"""Load trained models with caching."""
	models = {
	'ml_model': None,
	'ts_model': None,
	'preprocessing': None,
	'model_name': None,
	'is_timeseries': False,
	'metadata': None
	}

	# Load metadata
	if os.path.exists(ALL_MODELS_METADATA_PATH):
	with open(ALL_MODELS_METADATA_PATH, 'r') as f:
	models['metadata'] = json.load(f)
	models['model_name'] = models['metadata'].get('best_model', 'Unknown')
	models['is_timeseries'] = models['model_name'] in ['ARIMA', 'Prophet']

	# Load ML model
	if os.path.exists(MODEL_PATH):
	try:
	models['ml_model'] = joblib.load(MODEL_PATH)
	except:
	pass

	# Load time-series model
	if os.path.exists(TS_MODEL_PATH):
	try:
	models['ts_model'] = joblib.load(TS_MODEL_PATH)
	except:
	pass

	# Load preprocessing
	if os.path.exists(PREPROCESSING_PATH):
	try:
	models['preprocessing'] = joblib.load(PREPROCESSING_PATH)
	except:
	pass

	return models


	def prepare_features_ml(product_id, date, price, discount, category, preprocessing_data):
	"""Prepare features for ML model prediction."""
	if preprocessing_data is None:
	return None

	# Convert date to pandas Timestamp (handles date, datetime, and string)
	# Handle datetime.date objects explicitly
	if isinstance(date, dt_date):
	date = pd.Timestamp(date)
	elif not isinstance(date, pd.Timestamp):
	date = pd.to_datetime(date)

	# Extract date features
	day = date.day
	month = date.month
	day_of_week = date.weekday()
	weekend = 1 if day_of_week >= 5 else 0
	year = date.year
	quarter = date.quarter

	# Encode categorical variables
	category_encoder = preprocessing_data['encoders']['category']
	product_encoder = preprocessing_data['encoders']['product_id']

	try:
	category_encoded = category_encoder.transform([category])[0]
	except ValueError:
	category_encoded = 0

	try:
	product_id_encoded = product_encoder.transform([product_id])[0]
	except ValueError:
	product_id_encoded = product_encoder.transform([product_encoder.classes_[0]])[0]

	# Create feature dictionary
	feature_dict = {
	'price': price,
	'discount': discount,
	'day': day,
	'month': month,
	'day_of_week': day_of_week,
	'weekend': weekend,
	'year': year,
	'quarter': quarter,
	'category_encoded': category_encoded,
	'product_id_encoded': product_id_encoded
	}

	# Create feature array in the same order as training
	feature_names = preprocessing_data['feature_names']
	features = np.array([[feature_dict[name] for name in feature_names]])

	# Scale features
	scaler = preprocessing_data['scaler']
	features_scaled = scaler.transform(features)

	return features_scaled


	def predict_ml(product_id, date, price, discount, category, model, preprocessing_data):
	"""Make prediction using ML model."""
	features = prepare_features_ml(product_id, date, price, discount, category, preprocessing_data)
	if features is None:
	return None
	prediction = model.predict(features)[0]
	return max(0, prediction)


	def predict_timeseries(date, model, model_name):
	"""Make prediction using time-series model."""
	# Convert date to pandas Timestamp (handles date, datetime, and string)
	if isinstance(date, dt_date):
	date = pd.Timestamp(date)
	elif not isinstance(date, pd.Timestamp):
	date = pd.to_datetime(date)

	if model_name == 'ARIMA':
	try:
	forecast = model.forecast(steps=1)
	prediction = forecast[0] if hasattr(forecast, '__iter__') else forecast
	return max(0, prediction)
	except:
	return None

	elif model_name == 'Prophet':
	try:
	future = pd.DataFrame({'ds': [date]})
	forecast = model.predict(future)
	prediction = forecast['yhat'].iloc[0]
	return max(0, prediction)
	except:
	return None

	return None


	def main():
	"""Main dashboard function."""

	# Header
	st.markdown('<h1 class="main-header">📊 Demand Prediction Dashboard</h1>', unsafe_allow_html=True)

	# Load data
	df = load_data()
	if df is None:
	st.error("❌ Sales data not found. Please run generate_dataset.py first.")
	return

	# Load models
	models = load_models()

	# Sidebar
	with st.sidebar:
	st.header("⚙️ Navigation")
	page = st.radio(
	"Select Page",
	["📈 Sales Trends", "🔮 Demand Prediction", "📊 Model Comparison"],
	index=0
	)

	st.markdown("---")
	st.header("ℹ️ Information")
	if models['metadata']:
	best_model = models['metadata'].get('best_model', 'Unknown')
	st.info(f"Best Model: {best_model}")
	if best_model in models['metadata'].get('all_models', {}):
	metrics = models['metadata']['all_models'][best_model]
	st.metric("R2 Score", f"{metrics.get('r2', 0):.4f}")

	# Main content based on selected page
	if page == "📈 Sales Trends":
	show_sales_trends(df)
	elif page == "🔮 Demand Prediction":
	show_prediction_interface(df, models)
	elif page == "📊 Model Comparison":
	show_model_comparison(models)


	def show_sales_trends(df):
	"""Display sales trends visualizations."""
	st.header("📈 Sales Trends Analysis")

	# Filters
	col1, col2, col3 = st.columns(3)

	with col1:
	categories = ['All'] + sorted(df['category'].unique().tolist())
	selected_category = st.selectbox("Select Category", categories)

	with col2:
	products = ['All'] + sorted(df['product_id'].unique().tolist())
	selected_product = st.selectbox("Select Product", products)

	with col3:
	date_range = st.date_input(
	"Select Date Range",
	value=(df['date'].min(), df['date'].max()),
	min_value=df['date'].min(),
	max_value=df['date'].max()
	)

	# Filter data
	filtered_df = df.copy()

	if selected_category != 'All':
	filtered_df = filtered_df[filtered_df['category'] == selected_category]

	if selected_product != 'All':
	filtered_df = filtered_df[filtered_df['product_id'] == int(selected_product)]

	if isinstance(date_range, tuple) and len(date_range) == 2:
	filtered_df = filtered_df[
	(filtered_df['date'] >= pd.to_datetime(date_range[0])) &
	(filtered_df['date'] <= pd.to_datetime(date_range[1]))
	]

	if len(filtered_df) == 0:
	st.warning("No data available for selected filters.")
	return

	# Visualizations
	tab1, tab2, tab3, tab4 = st.tabs(["📅 Daily Trends", "📆 Monthly Trends", "📦 Category Analysis", "💰 Price vs Demand"])

	with tab1:
	st.subheader("Daily Sales Trends")
	daily_sales = filtered_df.groupby('date')['sales_quantity'].sum().reset_index()

	fig, ax = plt.subplots(figsize=(14, 6))
	ax.plot(daily_sales['date'], daily_sales['sales_quantity'], linewidth=2, alpha=0.7)
	ax.set_title('Total Daily Sales Quantity', fontsize=16, fontweight='bold')
	ax.set_xlabel('Date', fontsize=12)
	ax.set_ylabel('Sales Quantity', fontsize=12)
	ax.grid(True, alpha=0.3)
	plt.xticks(rotation=45)
	plt.tight_layout()
	st.pyplot(fig)

	# Statistics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Sales", f"{daily_sales['sales_quantity'].sum():,.0f}")
	with col2:
	st.metric("Average Daily", f"{daily_sales['sales_quantity'].mean():.1f}")
	with col3:
	st.metric("Max Daily", f"{daily_sales['sales_quantity'].max():,.0f}")
	with col4:
	st.metric("Min Daily", f"{daily_sales['sales_quantity'].min():,.0f}")

	with tab2:
	st.subheader("Monthly Sales Trends")
	filtered_df['month_year'] = filtered_df['date'].dt.to_period('M')
	monthly_sales = filtered_df.groupby('month_year')['sales_quantity'].sum().reset_index()
	monthly_sales['month_year'] = monthly_sales['month_year'].astype(str)

	fig, ax = plt.subplots(figsize=(14, 6))
	ax.bar(range(len(monthly_sales)), monthly_sales['sales_quantity'], alpha=0.7, color='steelblue')
	ax.set_title('Monthly Sales Quantity', fontsize=16, fontweight='bold')
	ax.set_xlabel('Month', fontsize=12)
	ax.set_ylabel('Sales Quantity', fontsize=12)
	ax.set_xticks(range(len(monthly_sales)))
	ax.set_xticklabels(monthly_sales['month_year'], rotation=45, ha='right')
	ax.grid(True, alpha=0.3, axis='y')
	plt.tight_layout()
	st.pyplot(fig)

	with tab3:
	st.subheader("Sales by Category")
	category_sales = filtered_df.groupby('category')['sales_quantity'].sum().sort_values(ascending=False)

	fig, ax = plt.subplots(figsize=(12, 6))
	category_sales.plot(kind='barh', ax=ax, color='coral', alpha=0.7)
	ax.set_title('Total Sales by Category', fontsize=16, fontweight='bold')
	ax.set_xlabel('Total Sales Quantity', fontsize=12)
	ax.set_ylabel('Category', fontsize=12)
	ax.grid(True, alpha=0.3, axis='x')
	plt.tight_layout()
	st.pyplot(fig)

	# Category statistics table
	category_stats = filtered_df.groupby('category').agg({
	'sales_quantity': ['sum', 'mean', 'std', 'min', 'max']
	}).round(2)
	category_stats.columns = ['Total', 'Average', 'Std Dev', 'Min', 'Max']
	st.dataframe(category_stats, use_container_width=True)

	with tab4:
	st.subheader("Price vs Demand Relationship")

	# Scatter plot
	fig, ax = plt.subplots(figsize=(12, 6))
	scatter = ax.scatter(filtered_df['price'], filtered_df['sales_quantity'],
	c=filtered_df['discount'], cmap='viridis', alpha=0.6, s=50)
	ax.set_title('Price vs Sales Quantity (colored by discount)', fontsize=16, fontweight='bold')
	ax.set_xlabel('Price', fontsize=12)
	ax.set_ylabel('Sales Quantity', fontsize=12)
	ax.grid(True, alpha=0.3)
	plt.colorbar(scatter, ax=ax, label='Discount %')
	plt.tight_layout()
	st.pyplot(fig)

	# Correlation
	correlation = filtered_df['price'].corr(filtered_df['sales_quantity'])
	st.metric("Price-Demand Correlation", f"{correlation:.3f}")


	def show_prediction_interface(df, models):
	"""Display interactive prediction interface."""
	st.header("🔮 Demand Prediction")

	# Check if models are available
	if models['ml_model'] is None and models['ts_model'] is None:
	st.error("❌ No trained models found. Please run train_model.py first.")
	return

	# Model selection
	model_type = st.radio(
	"Select Model Type",
	["Auto (Best Model)", "Machine Learning", "Time-Series"],
	horizontal=True
	)

	st.markdown("---")

	if model_type == "Time-Series" or (model_type == "Auto (Best Model)" and models['is_timeseries']):
	# Time-series prediction
	st.subheader("Overall Daily Demand Prediction")

	col1, col2 = st.columns(2)
	with col1:
	prediction_date = st.date_input(
	"Select Date for Prediction",
	value=datetime.now().date() + timedelta(days=30),
	min_value=df['date'].max().date() + timedelta(days=1)
	)

	with col2:
	st.write("") # Spacing
	st.write("") # Spacing

	if st.button("🔮 Predict Demand", type="primary"):
	if models['ts_model'] is None:
	st.error("Time-series model not available.")
	else:
	with st.spinner("Making prediction..."):
	prediction = predict_timeseries(
	prediction_date,
	models['ts_model'],
	models['model_name']
	)

	if prediction is not None:
	st.success(f"✅ Prediction Complete!")

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Predicted Daily Demand", f"{prediction:,.0f} units")
	with col2:
	day_name = pd.to_datetime(prediction_date).strftime('%A')
	st.metric("Day of Week", day_name)
	with col3:
	is_weekend = "Yes" if pd.to_datetime(prediction_date).weekday() >= 5 else "No"
	st.metric("Weekend", is_weekend)

	st.info("💡 This prediction represents the total daily demand across all products.")
	else:
	st.error("Failed to make prediction.")

	else:
	# ML model prediction
	st.subheader("Product-Specific Demand Prediction")

	# Get unique values for dropdowns
	categories = sorted(df['category'].unique().tolist())
	products = sorted(df['product_id'].unique().tolist())

	col1, col2 = st.columns(2)

	with col1:
	selected_category = st.selectbox("Select Category", categories)
	selected_product = st.selectbox("Select Product ID", products)
	prediction_date = st.date_input(
	"Select Date for Prediction",
	value=datetime.now().date() + timedelta(days=30),
	min_value=df['date'].max().date() + timedelta(days=1)
	)

	with col2:
	price = st.number_input(
	"Product Price ($)",
	min_value=0.01,
	value=100.0,
	step=1.0,
	format="%.2f"
	)
	discount = st.slider(
	"Discount (%)",
	min_value=0,
	max_value=100,
	value=0,
	step=5
	)

	# Show product statistics
	product_data = df[df['product_id'] == selected_product]
	if len(product_data) > 0:
	with st.expander("📊 Product Statistics"):
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Avg Price", f"${product_data['price'].mean():.2f}")
	with col2:
	st.metric("Avg Sales", f"{product_data['sales_quantity'].mean():.1f}")
	with col3:
	st.metric("Total Sales", f"{product_data['sales_quantity'].sum():,.0f}")
	with col4:
	st.metric("Category", selected_category)

	if st.button("🔮 Predict Demand", type="primary"):
	if models['ml_model'] is None or models['preprocessing'] is None:
	st.error("ML model or preprocessing not available.")
	else:
	with st.spinner("Making prediction..."):
	prediction = predict_ml(
	selected_product,
	prediction_date,
	price,
	discount,
	selected_category,
	models['ml_model'],
	models['preprocessing']
	)

	if prediction is not None:
	st.success(f"✅ Prediction Complete!")

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Predicted Demand", f"{prediction:,.0f} units")
	with col2:
	st.metric("Price", f"${price:.2f}")
	with col3:
	st.metric("Discount", f"{discount}%")
	with col4:
	day_name = pd.to_datetime(prediction_date).strftime('%A')
	st.metric("Day", day_name)

	# Additional insights
	st.markdown("### 📈 Prediction Insights")
	date_obj = pd.to_datetime(prediction_date)
	is_weekend = date_obj.weekday() >= 5
	month = date_obj.month

	insights = []
	if is_weekend:
	insights.append("📅 Weekend - typically higher demand")
	if month in [11, 12]:
	insights.append("🎄 Holiday season - peak sales period")
	if discount > 0:
	insights.append(f"💰 {discount}% discount - may increase demand")

	if insights:
	for insight in insights:
	st.info(insight)
	else:
	st.error("Failed to make prediction.")


	def show_model_comparison(models):
	"""Display model comparison."""
	st.header("📊 Model Comparison")

	if models['metadata'] is None:
	st.warning("Model metadata not available. Please run train_model.py first.")
	return

	metadata = models['metadata']
	all_models = metadata.get('all_models', {})
	best_model = metadata.get('best_model', 'Unknown')

	if not all_models:
	st.warning("No model comparison data available.")
	return

	# Model metrics table
	st.subheader("Model Performance Metrics")

	comparison_data = []
	for model_name, metrics in all_models.items():
	comparison_data.append({
	'Model': model_name,
	'Type': 'Time-Series' if model_name in ['ARIMA', 'Prophet'] else 'Machine Learning',
	'MAE': metrics.get('mae', 0),
	'RMSE': metrics.get('rmse', 0),
	'R2 Score': metrics.get('r2', 0)
	})

	comparison_df = pd.DataFrame(comparison_data)

	# Highlight best model
	def highlight_best(row):
	if row['Model'] == best_model:
	return ['background-color: #90EE90'] * len(row)
	return [''] * len(row)

	st.dataframe(
	comparison_df.style.apply(highlight_best, axis=1),
	use_container_width=True
	)

	# Visualizations
	st.subheader("Performance Comparison Charts")

	col1, col2 = st.columns(2)

	with col1:
	fig, ax = plt.subplots(figsize=(10, 6))
	model_names = comparison_df['Model'].tolist()
	mae_scores = comparison_df['MAE'].tolist()

	colors = ['coral' if name in ['ARIMA', 'Prophet'] else 'skyblue' for name in model_names]
	ax.bar(model_names, mae_scores, color=colors, alpha=0.7)
	ax.set_title('MAE Comparison (Lower is Better)', fontsize=14, fontweight='bold')
	ax.set_ylabel('MAE', fontsize=12)
	ax.tick_params(axis='x', rotation=45)
	ax.grid(True, alpha=0.3, axis='y')
	plt.tight_layout()
	st.pyplot(fig)

	with col2:
	fig, ax = plt.subplots(figsize=(10, 6))
	r2_scores = comparison_df['R2 Score'].tolist()

	colors = ['coral' if name in ['ARIMA', 'Prophet'] else 'skyblue' for name in model_names]
	ax.bar(model_names, r2_scores, color=colors, alpha=0.7)
	ax.set_title('R2 Score Comparison (Higher is Better)', fontsize=14, fontweight='bold')
	ax.set_ylabel('R2 Score', fontsize=12)
	ax.tick_params(axis='x', rotation=45)
	ax.grid(True, alpha=0.3, axis='y')
	plt.tight_layout()
	st.pyplot(fig)

	# Best model info
	st.markdown("---")
	st.success(f"🏆 Best Model: {best_model}")
	if best_model in all_models:
	best_metrics = all_models[best_model]
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("MAE", f"{best_metrics.get('mae', 0):.2f}")
	with col2:
	st.metric("RMSE", f"{best_metrics.get('rmse', 0):.2f}")
	with col3:
	st.metric("R2 Score", f"{best_metrics.get('r2', 0):.4f}")


	if __name__ == "__main__":
	main()