Spaces:

elizabethmyn
/

Intelligent-Retail-Decision-Making-System

Paused

App Files Files Community

elizabethmyn commited on Dec 21, 2025

Commit

84548c1

1 Parent(s): cc73c46

Add demo for Sale forcasting

Browse files

Files changed (12) hide show

app/core/config.py +4 -6
app/frontend/dashboard.py +380 -0
app/frontend/data_viz.py +228 -0
app/frontend/gradio_ui.py +209 -269
app/frontend/ui_template.py +420 -0
app/main.py +9 -0
app/services/prediction.py +731 -0
app/utils/data_generator.py +774 -0
app/utils/data_loader.py +113 -0
app/utils/plots.py +131 -0
app/utils/utils.py +119 -0
app/utils/visualization_code.py +522 -0

app/core/config.py CHANGED Viewed

@@ -9,9 +9,8 @@ class Settings(BaseSettings):
     # Server
     HOST: str = "0.0.0.0"
-    # HOST: str = "127.0.0.1"
     PORT: int = 5050
-    API_PREFIX: str = "/api/v1"
     # Model
     MODEL_CHECKPOINT: str = "yainage90/fashion-object-detection"
@@ -19,13 +18,12 @@ class Settings(BaseSettings):
     # Security
     SECRET_KEY: str = "xxx"
-    ALGORITHM: str = "HS256"
-    ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 30  # 1 month
     API_TOKEN: str = "xxx"
     class Config:
-        # env_file = ".env"
         case_sensitive = True
 settings = Settings()

     # Server
     HOST: str = "0.0.0.0"
     PORT: int = 5050
+    API_PREFIX: str = "x"
     # Model
     MODEL_CHECKPOINT: str = "yainage90/fashion-object-detection"
     # Security
     SECRET_KEY: str = "xxx"
     API_TOKEN: str = "xxx"
+    ALGORITHM: str = ".xxx"
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
     class Config:
+        env_file = ".env"
         case_sensitive = True
 settings = Settings()

app/frontend/dashboard.py ADDED Viewed

	@@ -0,0 +1,380 @@

+import pandas as pd
+import gradio as gr
+import matplotlib.pyplot as plt
+from datetime import datetime
+from app.frontend.data_viz import (
+    plot_category_distribution,
+    plot_day_of_week_pattern,
+    plot_sales_distribution,
+    plot_sales_time_series,
+    plot_store_comparison,
+)
+# Mocking st.session_state for Gradio logic compatibility
+class SessionState(dict):
+    def __getattr__(self, item): return self.get(item)
+    def __setattr__(self, key, value): self[key] = value
+session_state = SessionState()
+def configure_filters(data, start_date, end_date, selected_store_input, selected_categories):
+    """Logic-only version of configure_filters (removing st.sidebar calls)"""
+    # Resolve store selection logic from the original code
+    selected_store = "All Stores"
+    selected_store_name = "All Stores"
+    if "store_name" in data.columns:
+        selected_store_name = selected_store_input
+    elif "store" in data.columns:
+        selected_store = selected_store_input
+    # Filter data based on selection
+    filtered_data = data.copy()
+    # Gradio strings to datetime.date
+    start_dt = pd.to_datetime(start_date).date()
+    end_dt = pd.to_datetime(end_date).date()
+    mask = (filtered_data["date"].dt.date >= start_dt) & (
+        filtered_data["date"].dt.date <= end_dt
+    )
+    # Apply store filter
+    if "store_name" in data.columns and selected_store_name != "All Stores":
+        mask &= filtered_data["store_name"] == selected_store_name
+    elif "store" in data.columns and selected_store != "All Stores":
+        mask &= filtered_data["store"] == selected_store
+    # Apply category filter
+    if selected_categories:
+        mask &= filtered_data["category"].isin(selected_categories)
+    # Update state for other functions
+    session_state.selected_store = selected_store
+    session_state.selected_store_name = selected_store_name
+    session_state.start_date = start_dt
+    session_state.end_date = end_dt
+    return filtered_data[mask]
+def display_kpis(filtered_data):
+    """Logic-only version of display_kpis (returns strings for UI)"""
+    total_sales = filtered_data["sales"].sum()
+    avg_daily_sales = filtered_data.groupby("date")["sales"].sum().mean()
+    if len(filtered_data["date"].unique()) >= 2:
+        mid_date = session_state.start_date + (session_state.end_date - session_state.start_date) / 2
+        period1_data = filtered_data[filtered_data["date"].dt.date <= mid_date]
+        period2_data = filtered_data[filtered_data["date"].dt.date > mid_date]
+        period1_sales = period1_data["sales"].sum() if not period1_data.empty else 0
+        period2_sales = period2_data["sales"].sum() if not period2_data.empty else 0
+        sales_change_pct = (((period2_sales - period1_sales) / period1_sales * 100) if period1_sales > 0 else 0)
+    else:
+        sales_change_pct = 0
+    if "transactions" in filtered_data.columns:
+        total_transactions = filtered_data["transactions"].sum()
+    else:
+        total_transactions = filtered_data.shape[0]
+    avg_transaction_value = (total_sales / total_transactions if total_transactions > 0 else 0)
+    # Return formatted strings for Gradio Label/Textbox components
+    return (
+        total_sales,
+        sales_change_pct,
+        avg_daily_sales,
+        total_transactions,
+        avg_transaction_value
+    )
+def display_sales_trends(filtered_data):
+    """Logic-only version of display_sales_trends (returns figures)"""
+    fig1 = plot_sales_time_series(
+        filtered_data,
+        session_state.selected_store,
+        session_state.selected_store_name,
+    )
+    fig2 = None
+    if len(filtered_data["date"].unique()) >= 7:
+        fig2 = plot_day_of_week_pattern(filtered_data)
+    return fig1, fig2
+def display_performance_breakdown(filtered_data):
+    """Logic-only version of display_performance_breakdown (returns DF and Fig)"""
+    category_df = pd.DataFrame()
+    fig_cat = None
+    store_df = pd.DataFrame()
+    fig_store = None
+    if "category" in filtered_data.columns and len(filtered_data["category"].unique()) > 1:
+        category_sales = filtered_data.groupby("category")["sales"].sum().sort_values(ascending=False)
+        category_sales_pct = (category_sales / category_sales.sum() * 100).round(1)
+        category_df = pd.DataFrame({"Sales": category_sales, "Percentage": category_sales_pct}).reset_index()
+        category_df["Sales"] = category_df["Sales"].apply(lambda x: f"${x:,.2f}")
+        category_df["Percentage"] = category_df["Percentage"].apply(lambda x: f"{x}%")
+        fig_cat = plot_category_distribution(filtered_data)
+    if (session_state.selected_store_name == "All Stores" and session_state.selected_store == "All Stores") and \
+       ("store_name" in filtered_data.columns or "store" in filtered_data.columns):
+        store_identifier = "store_name" if "store_name" in filtered_data.columns else "store"
+        store_sales = filtered_data.groupby(store_identifier)["sales"].sum().sort_values(ascending=False)
+        top_stores = store_sales.head(10)
+        store_df = pd.DataFrame({"Store": top_stores.index, "Sales": top_stores.values})
+        store_df["Sales"] = store_df["Sales"].apply(lambda x: f"${x:,.2f}")
+        fig_store = plot_store_comparison(filtered_data, store_identifier)
+    return category_df, fig_cat, store_df, fig_store
+def format_kpi_html(label, value_str, delta_pct=None):
+    """Create HTML for metric"""
+    # Process Delta
+    delta_html = ""
+    if delta_pct is not None and delta_pct != 0:
+        if delta_pct > 0:
+            color = "color: #38a169;"  # Greenn
+            arrow = "▲"
+        else:
+            color = "color: #e53e3e;"  # Red
+            arrow = "▼"
+        # Format delta: Ví dụ: "▲ 4.2%"
+        delta_str = f"{arrow} {abs(delta_pct):.1f}%"
+        delta_html = f'<div style="{color} font-size: 14px; font-weight: 500; margin-top: 5px; line-height: 1;">{delta_str}</div>'
+    html_output = f"""
+    <div style="font-family: Arial, sans-serif; padding: 10px;">
+        <div style="font-size: 14px; color: #555; margin-bottom: 5px;">{label}</div>
+        <div style="font-size: 30px; font-weight: 600; color: #1a1a1a; line-height: 1;">{value_str}</div>
+        {delta_html}
+    </div>
+    """
+    return html_output
+def update_kpis_html(total_sales, sales_change_pct, avg_daily_sales, total_transactions, avg_transaction_value):
+    """wrapper function update KPI HTML"""
+    html1 = format_kpi_html(
+        "💰 Total Sales",
+        f"${total_sales:,.2f}",
+        sales_change_pct
+    )
+    html2 = format_kpi_html(
+        "📊 Avg Daily Sales",
+        f"${avg_daily_sales:,.2f}"
+    )
+    html3 = format_kpi_html(
+        "🛒 Total Transactions",
+        f"{total_transactions:,}"
+    )
+    html4 = format_kpi_html(
+        "💵 Avg Transaction Value",
+        f"${avg_transaction_value:,.2f}"
+    )
+    return html1, html2, html3, html4
+def historical_sales_view(data):
+    """Main Gradio Interface Builder"""
+    def run_dashboard_update(start_date, end_date, store_selection, categories):
+        # 1. Logic: Filter
+        filtered_data = configure_filters(data, start_date, end_date, store_selection, categories)
+        if filtered_data.empty:
+            empty_msg = "⚠️ No data available for the selected filters. Please adjust your selections."
+            return [empty_msg] * 4 + [None] * 5 + [pd.DataFrame()]
+        # 2. Logic: KPIs
+        kpi_metrics = display_kpis(filtered_data)
+        html1, html2, html3, html4 = update_kpis_html(
+            *kpi_metrics
+        )
+        # 3. Logic: Trends
+        fig_ts, fig_dow = display_sales_trends(filtered_data)
+        # 4. Logic: Breakdown
+        cat_df, fig_cat, store_df, fig_store = display_performance_breakdown(filtered_data)
+        # 5. Logic: Distribution
+        fig_dist = plot_sales_distribution(filtered_data)
+        # 6. Logic: Table
+        detailed_table = filtered_data.sort_values("date", ascending=False)
+        return (
+            html1, html2, html3, html4,
+            fig_ts, fig_dow,
+            cat_df, fig_cat,
+            store_df, fig_store,
+            fig_dist,
+            detailed_table
+        )
+    # Define the App Layout (Compatible with older Gradio versions)
+    with gr.Blocks(title="Store Sales Dashboard") as demo:
+        # Left Sidebar - Filters (Fixed)
+        with gr.Sidebar(position="right"):
+                gr.Markdown("## 🔍 Dashboard Filters")
+                gr.Markdown("---")
+                # Date Filters
+                gr.Markdown("### 📅 Date Range")
+                min_date = data["date"].min().date()
+                max_date = data["date"].max().date()
+                start_in = gr.DateTime(
+                    label="From",
+                    value=str(min_date),
+                    type="string",
+                    interactive=True
+                )
+                end_in = gr.DateTime(
+                    label="To",
+                    value=str(max_date),
+                    type="string",
+                    interactive=True
+                )
+                gr.Markdown("---")
+                # Store Filter
+                gr.Markdown("### 🏬 Store Selection")
+                if "store_name" in data.columns:
+                    opts = ["All Stores"] + sorted(data["store_name"].unique().tolist())
+                elif "store" in data.columns:
+                    opts = ["All Stores"] + sorted(data["store"].unique().tolist())
+                else:
+                    opts = ["All Stores"]
+                store_in = gr.Dropdown(
+                    choices=opts,
+                    value="All Stores",
+                    label="Select Store",
+                    interactive=True
+                )
+                # Category Filter
+                cat_in = None
+                if "category" in data.columns:
+                    gr.Markdown("---")
+                    gr.Markdown("### 📦 Product Categories")
+                    cats = sorted(data["category"].unique().tolist())
+                    cat_in = gr.CheckboxGroup(
+                        choices=cats,
+                        value=cats,
+                        label="Select Categories",
+                        interactive=True
+                    )
+                gr.Markdown("---")
+                btn = gr.Button("🔄 Update Dashboard", variant="primary", size="lg")
+                gr.Markdown(
+                    """
+                    <br>
+                    💡 **Tip:** Adjust filters and click Update to refresh
+                    """
+                )
+        # Right Column - Main Dashboard
+    with gr.Column(scale=1):
+        # Header
+        gr.Markdown(
+            """
+            # 📊 Store Sales Dashboard
+            ### Comprehensive sales analytics and performance insights
+            """
+        )
+        # KPI Section
+        gr.Markdown("## 📈 Key Performance Indicators")
+        with gr.Row():
+            m1 = gr.HTML(label=None, scale=1, container=True)
+            m2 = gr.HTML(label=None, scale=1, container=True)
+            m3 = gr.HTML(label=None, scale=1, container=True)
+            m4 = gr.HTML(label=None, scale=1, container=True)
+        gr.Markdown("---")
+        # Sales Trends Section
+        gr.Markdown("## 📉 Sales Trends Analysis")
+        with gr.Row():
+            p_ts = gr.Plot(label="📈 Sales Time Series", container=True, scale=1)
+            p_dow = gr.Plot(label="📅 Weekly Patterns", container=True, scale=1)
+        gr.Markdown("---")
+        # Performance Breakdown Section
+        gr.Markdown("## 🎯 Performance Breakdown")
+        # Category Performance Section
+        gr.Markdown("### 📦 Category Performance")
+        with gr.Row():
+            with gr.Column(scale=1):
+                df_cat = gr.DataFrame(label="Category Sales Data", max_height=300)
+            with gr.Column(scale=1):
+                p_cat = gr.Plot(label="Sales by Category", container=True)
+        gr.Markdown("---")
+        # Store Comparison Section
+        gr.Markdown("### 🏪 Store Comparison (Top 10)")
+        with gr.Row():
+            with gr.Column(scale=1):
+                df_store = gr.DataFrame(label="Top Performing Stores", max_height=300)
+            with gr.Column(scale=2):
+                p_store = gr.Plot(label="Top 10 Stores by Sales", container=True)
+        gr.Markdown("---")
+        # Sales Distribution Section
+        gr.Markdown("## 📊 Sales Distribution")
+        p_dist = gr.Plot(label="Distribution Analysis", container=True)
+        gr.Markdown("---")
+        # Detailed Data Section
+        with gr.Accordion("📋 View Detailed Sales Data", open=True):
+            gr.Markdown("*Complete transaction history for the selected period*")
+            df_detailed = gr.DataFrame(max_height=400)
+        # Footer
+        gr.Markdown(
+            """
+            ---
+            <div style='text-align: center; color: #666; font-size: 0.9em;'>
+            📊 Store Sales Dashboard | Powered by Gradio
+            </div>
+            """
+        )
+        # Link event - Update button
+        btn.click(
+            run_dashboard_update,
+            inputs=[start_in, end_in, store_in, cat_in],
+            outputs=[m1, m2, m3, m4, p_ts, p_dow, df_cat, p_cat, df_store, p_store, p_dist, df_detailed]
+        )
+        # Auto-load initial data on page load
+        demo.load(
+            run_dashboard_update,
+            inputs=[start_in, end_in, store_in, cat_in],
+            outputs=[m1, m2, m3, m4, p_ts, p_dow, df_cat, p_cat, df_store, p_store, p_dist, df_detailed]
+        )
+    return demo
+# Usage:
+# if __name__ == "__main__":
+#     df = pd.read_csv("your_data.csv", parse_dates=['date'])
+#     app = historical_sales_view(df)
+#     app.launch()

app/frontend/data_viz.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+def plot_sales_forecast(
+    historical_data, prediction_date, prediction_value, store_id=None
+):
+    """
+    Plot historical sales with prediction point
+    """
+    fig, ax = plt.subplots(figsize=(12, 6))
+    # Filter for specific store if provided
+    if store_id is not None and "store" in historical_data.columns:
+        plot_data = historical_data[historical_data["store"] == store_id].copy()
+    else:
+        plot_data = historical_data.copy()
+    # Group by date if multiple records per date
+    if len(plot_data) > len(plot_data["date"].unique()):
+        plot_data = plot_data.groupby("date")["sales"].sum().reset_index()
+    # Sort by date
+    plot_data = plot_data.sort_values("date")
+    # Plot historical data
+    ax.plot(plot_data["date"], plot_data["sales"], label="Historical Sales")
+    # Add prediction point
+    ax.scatter(
+        prediction_date, prediction_value, color="red", s=100, label="Prediction"
+    )
+    # Formatting
+    ax.set_xlabel("Date")
+    ax.set_ylabel("Sales")
+    if store_id is not None:
+        ax.set_title(f"Sales Forecast for Store {store_id}")
+    else:
+        ax.set_title("Sales Forecast")
+    ax.legend()
+    fig.autofmt_xdate()
+    return fig
+def plot_sales_time_series(
+    filtered_data, selected_store=None, selected_store_name=None
+):
+    """Generate time series plot of sales with moving average"""
+    fig, ax = plt.subplots(figsize=(7, 6))
+    # Plot data based on store selection
+    if selected_store_name == "All Stores" and selected_store == "All Stores":
+        # Group by date for the trend line
+        sales_by_date = filtered_data.groupby("date")["sales"].sum()
+        ax.plot(sales_by_date.index, sales_by_date.values, "b-")
+        # Add moving average
+        if len(sales_by_date) > 7:
+            sales_by_date_df = sales_by_date.reset_index()
+            sales_by_date_df["MA7"] = sales_by_date_df["sales"].rolling(window=7).mean()
+            ax.plot(
+                sales_by_date_df["date"],
+                sales_by_date_df["MA7"],
+                "r--",
+                label="7-Day Moving Avg",
+            )
+            ax.legend()
+    else:
+        # Single store - show daily sales and trend
+        sales_by_date = filtered_data.groupby("date")["sales"].sum()
+        ax.plot(sales_by_date.index, sales_by_date.values, "b-")
+        # Add moving average if enough data
+        if len(sales_by_date) > 7:
+            sales_by_date_df = sales_by_date.reset_index()
+            sales_by_date_df["MA7"] = sales_by_date_df["sales"].rolling(window=7).mean()
+            ax.plot(
+                sales_by_date_df["date"],
+                sales_by_date_df["MA7"],
+                "r--",
+                label="7-Day Moving Avg",
+            )
+            ax.legend()
+    ax.set_xlabel("")
+    ax.set_ylabel("Sales ($)")
+    if "store_name" in filtered_data.columns and selected_store_name != "All Stores":
+        ax.set_title(f"Daily Sales - {selected_store_name}")
+    elif "store" in filtered_data.columns and selected_store != "All Stores":
+        ax.set_title(f"Daily Sales - Store {selected_store}")
+    else:
+        ax.set_title("Daily Sales - All Stores")
+    fig.autofmt_xdate()
+    return fig
+def plot_day_of_week_pattern(filtered_data):
+    """Generate bar chart showing sales by day of week"""
+    fig, ax = plt.subplots(figsize=(7, 7))
+    # Add day of week name
+    day_names = [
+        "Monday",
+        "Tuesday",
+        "Wednesday",
+        "Thursday",
+        "Friday",
+        "Saturday",
+        "Sunday",
+    ]
+    filtered_data["day_name"] = filtered_data["date"].dt.dayofweek.apply(
+        lambda x: day_names[x]
+    )
+    # Group by day of week
+    day_sales = filtered_data.groupby("day_name")["sales"].mean().reindex(day_names)
+    # Calculate average line
+    avg_daily = day_sales.mean()
+    # Create bar chart with average line
+    bars = ax.bar(day_sales.index, day_sales.values, color="skyblue")
+    ax.axhline(y=avg_daily, color="red", linestyle="--", label="Daily Average")
+    # Highlight best and worst days
+    best_day = day_sales.idxmax()
+    worst_day = day_sales.idxmin()
+    for i, (day, sales) in enumerate(day_sales.items()):
+        if day == best_day:
+            bars[i].set_color("green")
+        elif day == worst_day:
+            bars[i].set_color("orange")
+    ax.set_xlabel("")
+    ax.set_ylabel("Average Sales ($)")
+    ax.set_title("Sales by Day of Week")
+    plt.xticks(rotation=45)
+    ax.legend()
+    return fig
+def plot_category_distribution(filtered_data):
+    """Generate pie chart of sales by category"""
+    fig, ax = plt.subplots(figsize=(8, 6))
+    category_sales = (
+        filtered_data.groupby("category")["sales"].sum().sort_values(ascending=False)
+    )
+    top_categories = category_sales.head(5)
+    others = category_sales.iloc[5:].sum() if len(category_sales) > 5 else 0
+    if others > 0:
+        plot_data = pd.concat([top_categories, pd.Series([others], index=["Others"])])
+    else:
+        plot_data = top_categories
+    plt.pie(
+        plot_data,
+        labels=plot_data.index,
+        autopct="%1.1f%%",
+        startangle=90,
+        shadow=False,
+    )
+    plt.axis("equal")
+    plt.title("Sales by Category")
+    return fig
+def plot_store_comparison(filtered_data, store_identifier="store"):
+    """Generate horizontal bar chart for top stores by sales"""
+    fig, ax = plt.subplots(figsize=(12, 6))
+    # Group by store
+    store_sales = (
+        filtered_data.groupby(store_identifier)["sales"]
+        .sum()
+        .sort_values(ascending=False)
+    )
+    # Take top 10 stores
+    top_stores = store_sales.head(10)
+    # Plot horizontal bar chart
+    y_pos = np.arange(len(top_stores))
+    ax.barh(y_pos, top_stores.values, align="center")
+    ax.set_yticks(y_pos)
+    ax.set_yticklabels(top_stores.index)
+    ax.invert_yaxis()  # Labels read top-to-bottom
+    ax.set_xlabel("Sales ($)")
+    ax.set_title("Top 10 Stores by Sales")
+    return fig
+def plot_sales_distribution(filtered_data):
+    """Generate histogram with KDE and summary statistics"""
+    fig, ax = plt.subplots(figsize=(18, 4))
+    # Create histogram with KDE
+    sns.histplot(filtered_data["sales"], bins=30, kde=True, ax=ax)
+    # Add vertical lines for key statistics
+    median_sales = filtered_data["sales"].median()
+    mean_sales = filtered_data["sales"].mean()
+    ax.axvline(
+        x=median_sales, color="r", linestyle="--", label=f"Median: ${median_sales:.2f}"
+    )
+    ax.axvline(
+        x=mean_sales, color="g", linestyle="--", label=f"Mean: ${mean_sales:.2f}"
+    )
+    ax.set_xlabel("Sales ($)")
+    ax.set_ylabel("Frequency")
+    ax.set_title("Sales Distribution")
+    ax.legend()
+    return fig

app/frontend/gradio_ui.py CHANGED Viewed

@@ -1,59 +1,65 @@
-import os
 import gradio as gr
 import requests
 from PIL import Image, ImageDraw, ImageFont
 import io
 from typing import List, Dict, Any
-from datetime import datetime, timedelta
-from pathlib import Path
 import random
-from app.core.config import settings
-from app.core.security import create_access_token
-# Try to import logger, fallback if not available
 try:
     from app.utils.logger import logger
 except ImportError:
     import logging
     logger = logging.getLogger(__name__)
-    logging.basicConfig(level=logging.INFO)
-# Try to import ui_template, fallback if not available
-try:
-    import ui_template as ui
-    HAS_UI_TEMPLATE = True
-except ImportError:
-    HAS_UI_TEMPLATE = False
-    logger.warning("ui_template not found, using basic styling")
-# ==================== API Configuration ====================
 API_BASE_URL = "http://localhost:5050"
 API_VERSION = "v1"
 API_ENDPOINT = f"{API_BASE_URL}/api/{API_VERSION}/detect/image"
 API_HEALTH_ENDPOINT = f"{API_BASE_URL}/api/{API_VERSION}/health"
 API_BATCH_ENDPOINT = f"{API_BASE_URL}/api/{API_VERSION}/detect/batch"
 SHOW_GRADIO_API = "undocumented"
-# ==================== Fashion Detection Client ====================
 class FashionDetectionClient:
     """Client for interacting with the Fashion Detection API"""
     def __init__(self, base_url: str = API_BASE_URL, token: str = None):
         self.base_url = base_url
-        if token is None or token == "xxx":
-            token = generate_test_token()
-        else:
-            token = settings.API_TOKEN
-        self.token = token
-        self.headers = {"X-Token": token}
         self.session = requests.Session()
         self.session.headers.update(self.headers)
     def check_health(self) -> Dict[str, Any]:
         """Check API health status"""
-        logger.info(">>> check_health called")
         try:
             response = self.session.get(API_HEALTH_ENDPOINT, timeout=10)
             response.raise_for_status()
@@ -69,7 +75,6 @@ class FashionDetectionClient:
     def detect_single_image(self, image: Image.Image, threshold: float = 0.4) -> Dict[str, Any]:
         """Detect objects in a single image"""
-        logger.info(">>> detect_single_image function")
         try:
             img_byte_arr = io.BytesIO()
             image.save(img_byte_arr, format='PNG')
@@ -77,7 +82,7 @@ class FashionDetectionClient:
             files = {"file": ("image.png", img_byte_arr, "image/png")}
             params = {"threshold": threshold} if threshold else {}
-            logger.info(f">>Sending request to {API_ENDPOINT} with params={params}")
             response = self.session.post(
                 API_ENDPOINT,
                 files=files,
@@ -85,14 +90,12 @@ class FashionDetectionClient:
                 timeout=30
             )
             response.raise_for_status()
-            logger.info(f">>response {response}")
             return response.json()
         except requests.exceptions.RequestException as e:
-            logger.info(f"Lỗi: {response.status_code}. Chi tiết: {response.json()}, API_TOKEN={self.token}")
             return {
                 "success": False,
-                "error": f"API request failed: {str(e)}\n",
                 "details": f"URL: {API_ENDPOINT}"
             }
         except Exception as e:
@@ -129,7 +132,6 @@ class FashionDetectionClient:
             }
-# ==================== Drawing Functions ====================
 def draw_bounding_boxes_pil(image: Image.Image, detections: List[Dict[str, Any]]) -> Image.Image:
     """Draw bounding boxes on PIL Image"""
     img_with_boxes = image.copy()
@@ -212,62 +214,12 @@ def format_detection_results(result: Dict[str, Any]) -> str:
     return result_text
-# ==================== Helper Functions ====================
-def convert_to_pil_images(gradio_files: List) -> List[Image.Image]:
-    """Convert Gradio NamedString objects (file paths) to PIL Images"""
-    pil_images = []
-    for file in gradio_files:
-        try:
-            file_path = file.name if hasattr(file, 'name') else file
-            pil_image = Image.open(file_path)
-            if pil_image.mode != "RGB":
-                pil_image = pil_image.convert("RGB")
-            pil_images.append(pil_image)
-        except Exception as e:
-            logger.error(f"Error converting image {file_path}: {str(e)}")
-    return pil_images
-#
-def generate_test_token():
-    access_token = create_access_token(
-        data={"sub": "test_user"},
-        expires_delta=timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
-    )
-    print(f"Generated test token: {access_token}")
-    return access_token
-# ==================== Main Application ====================
-def create_app():
-    """Create the main Gradio application"""
-    # Configure UI template if available
-    if HAS_UI_TEMPLATE:
-        ui.configure(
-            project_name="Intelligent Retail Decision Making System",
-            year="2025",
-            about="AI-powered fashion detection and retail analytics",
-            description="An integrated platform for fashion item detection and sales forecasting.",
-            colors={
-                "primary": "#0F6CBD",
-                "accent": "#C4314B",
-                "success": "#2E7D32",
-                "bg1": "#F0F7FF",
-                "bg2": "#E8F0FA",
-                "bg3": "#DDE7F8"
-            },
-            meta_items=[
-                ("Model", "Fashion Detection & Sales Forecasting"),
-                ("Features", "Object Detection & Predictive Analytics"),
-            ]
-        )
-    # Initialize API client
-    api_client = FashionDetectionClient()
-    # ==================== Prediction Functions ====================
     def predict_single_image(image: Image.Image, threshold: float) -> tuple:
         """Predict objects in a single image"""
-        logger.info(">>> predict_single_image called")
         try:
             health_status = api_client.check_health()
             if not health_status.get('success', False):
@@ -275,20 +227,20 @@ def create_app():
             result = api_client.detect_single_image(image, threshold)
             result_text = format_detection_results(result)
-            logger.info(f">>> predict_single_image result_text: {result_text}")
             if result.get('success', False) and result.get('detections'):
                 image_with_boxes = draw_bounding_boxes_pil(image, result['detections'])
                 return image_with_boxes, result_text
             else:
                 return image, result_text
-            logger.info(">>> predict_single_image completed")
         except Exception as e:
             error_msg = f"❌ Prediction error: {str(e)}"
             return image, error_msg
     def predict_batch_images(images: List[Image.Image], threshold: float):
         """Predict objects in multiple images"""
-        logger.info(">>> predict_batch_images called")
         try:
             if not images:
                 return [], "Please upload at least one image."
@@ -329,21 +281,32 @@ def create_app():
         except Exception as e:
             return [], f"❌ Batch prediction error: {str(e)}"
     def check_api_health():
         """Check and display API health status"""
-        logger.info(">>> check_api_health called")
         health_status = api_client.check_health()
-        if health_status.get('success', False):
-            status_emoji = "✅"
-            status_text = "Healthy"
-        else:
-            status_emoji = "❌"
-            status_text = "Unhealthy"
         health_info = f"{status_emoji} API Status: {status_text}\n\n"
         health_info += f"📡 Endpoint: {API_BASE_URL}\n"
-        health_info += f"🕒 Checked: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
         if health_status.get('success', False):
             health_info += f"🚀 Version: {health_status.get('version', 'N/A')}\n"
@@ -355,193 +318,171 @@ def create_app():
         return health_info
-    def load_historical():
-        """Load historical sales analysis"""
-        try:
-            return "<div style='padding: 20px;'>Historical sales analysis would be displayed here.</div>"
-        except Exception as e:
-            return f"<div style='color: red; padding: 20px;'>Error loading analysis: {str(e)}</div>"
-    def make_prediction(date_str, horizon):
-        """Make sales prediction"""
-        try:
-            return f"<div style='padding: 20px;'>Prediction for {date_str} with horizon {horizon} days would be displayed here.</div>"
-        except Exception as e:
-            return f"<div style='color: red; padding: 20px;'>Error generating prediction: {str(e)}</div>"
-    # ==================== Create Gradio Interface ====================
-    demo = gr.Blocks(title="💡 Intelligent Retail Decision Making System")
-    with demo:
-        # Header
-        if HAS_UI_TEMPLATE:
-            ui.create_header(logo_path="static/intelligent_retail.png")
-        else:
-            gr.Markdown("#💡 Intelligent Retail Decision Making System")
-        gr.Markdown("AI-powered fashion detection and retail analytics platform")
-        # Info card
-        if HAS_UI_TEMPLATE:
-            gr.HTML(ui.render_info_card(
-                icon="📈",
-                title="About this Application"
-            ))
-        # Main tabs
-        with gr.Tabs():
-            # Tab 1: Fashion Detection
-            with gr.Tab("👔 Fashion Detection"):
-                gr.Markdown("## Fashion Item Detection")
-                # API Health Section
-                with gr.Row():
-                    with gr.Column():
-                        gr.Markdown("### 📊 API Status")
-                        health_btn = gr.Button("Check API Health", variant="secondary")
-                        health_output = gr.Textbox(label="API Health Status", lines=6, interactive=False)
-                # Single Image Detection
-                with gr.Row():
-                    with gr.Column():
-                        gr.Markdown("### 📷 Single Image Detection")
-                        single_image = gr.Image(type="pil", label="Upload Fashion Image")
-                        threshold_slider = gr.Slider(
-                            minimum=0.1, maximum=0.9, value=0.4, step=0.05,
-                            label="Detection Confidence Threshold"
-                        )
-                        single_btn = gr.Button("Detect Objects", variant="primary")
-                    with gr.Column():
-                        single_output_image = gr.Image(label="Detection Results", interactive=False)
-                        single_output_text = gr.Textbox(label="Detection Results", lines=12)
-                # Batch Image Detection
-                with gr.Row():
-                    with gr.Column():
-                        gr.Markdown("### 📦 Batch Image Detection")
-                        batch_images = gr.File(
-                            label="Upload Multiple Images",
-                            file_count="multiple",
-                            file_types=["image"]
-                        )
-                        batch_threshold = gr.Slider(
-                            minimum=0.1, maximum=0.9, value=0.4, step=0.05,
-                            label="Detection Confidence Threshold"
-                        )
-                        batch_btn = gr.Button("Process Batch", variant="primary")
-                    with gr.Column():
-                        batch_output_images = gr.Gallery(
-                            label="Detection Results",
-                            columns=3,
-                            height="auto",
-                            interactive=False
-                        )
-                        batch_output_text = gr.Textbox(label="Batch Results", lines=15)
-                # Examples
-                if os.path.exists("static/examples"):
-                    gr.Examples(
-                        examples=[
-                            ["static/examples/image1.png"],
-                            ["static/examples/image2.png"],
-                            ["static/examples/image3.png"]
-                        ],
-                        inputs=single_image,
-                        label="Try these example images"
-                    )
-                # Event handlers
-                health_btn.click(
-                    fn=check_api_health,
-                    outputs=health_output,
-                    api_visibility=SHOW_GRADIO_API
-                )
-                single_btn.click(
-                    fn=predict_single_image,
-                    inputs=[single_image, threshold_slider],
-                    outputs=[single_output_image, single_output_text],
-                    api_visibility=SHOW_GRADIO_API
-                )
-                batch_btn.click(
-                    fn=lambda images, threshold: predict_batch_images(convert_to_pil_images(images), threshold),
-                    inputs=[batch_images, batch_threshold],
-                    outputs=[batch_output_images, batch_output_text],
-                    api_visibility=SHOW_GRADIO_API
-                )
-            # Tab 2: Historical Sales Analysis
-            with gr.Tab("📊 Historical Sales Analysis"):
-                gr.Markdown("### Explore and visualize historical sales data")
-                with gr.Row():
-                    analyze_btn = gr.Button(
-                        "Load Historical Analysis",
-                        variant="primary",
-                        size="lg"
-                    )
-                historical_output = gr.HTML(label="Analysis Results")
-                analyze_btn.click(
-                    fn=load_historical,
-                    outputs=historical_output
-                )
-            # Tab 3: Sales Prediction
-            with gr.Tab("🔮 Sales Prediction"):
-                gr.Markdown("### Generate sales forecasts using machine learning")
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        gr.Markdown("#### Input Parameters")
-                        date_input = gr.Textbox(
-                            label="Prediction Date",
-                            placeholder="YYYY-MM-DD",
-                            info="Enter the date for prediction",
-                            value=""
-                        )
-                        forecast_horizon = gr.Slider(
-                            minimum=1,
-                            maximum=30,
-                            value=7,
-                            step=1,
-                            label="Forecast Horizon (days)",
-                            info="Number of days to forecast"
-                        )
-                        predict_btn = gr.Button(
-                            "Generate Prediction",
-                            variant="primary",
-                            size="lg"
-                        )
-                    with gr.Column(scale=2):
-                        gr.Markdown("#### Prediction Results")
-                        prediction_output = gr.HTML(label="Forecast")
-                predict_btn.click(
-                    fn=make_prediction,
-                    inputs=[date_input, forecast_horizon],
-                    outputs=prediction_output
-                )
         # Footer
-        if HAS_UI_TEMPLATE:
-            ui.create_footer(
-                logo_path="static/intelligent_retail.png",
-                creator_name="Thi-Diem-My Le",
-                creator_link="https://beacons.ai/elizabethmyn",
-                org_name="AI VIET NAM",
-                org_link="https://aivietnam.edu.vn/"
-            )
-        else:
-            gr.Markdown("---")
             gr.Markdown("© 2025 Intelligent Retail System. All rights reserved.")
     return demo
@@ -550,7 +491,7 @@ def create_app():
 # ==================== Application Entry Point ====================
 def main():
     """Main entry point"""
-    demo = create_app()
     # Custom CSS
     custom_css = """
@@ -559,8 +500,7 @@ def main():
     .error {color: red; font-weight: bold;}
     """
-    if HAS_UI_TEMPLATE:
-        custom_css = ui.get_custom_css() + custom_css
     demo.launch(
         server_name="0.0.0.0",

 import gradio as gr
 import requests
 from PIL import Image, ImageDraw, ImageFont
 import io
 from typing import List, Dict, Any
+from datetime import datetime
 import random
+# Import modules from sales forecasting app
+try:
+    import app.frontend.ui_template as ui
+    from app.utils.data_loader import (
+        load_data,
+        load_feature_engineered_data,
+        load_feature_stats,
+        load_model,
+    )
+    from app.frontend.dashboard import historical_sales_view
+    from app.services.prediction import sales_prediction_view
+    SALES_MODULE_AVAILABLE = True
+except ImportError:
+    SALES_MODULE_AVAILABLE = False
+    print("Warning: Sales forecasting modules not available")
+# Import fashion detection modules
 try:
     from app.utils.logger import logger
+    from app.core.config import settings
+    FASHION_MODULE_AVAILABLE = True
 except ImportError:
+    FASHION_MODULE_AVAILABLE = False
+    print("Warning: Fashion detection modules not available")
+    # Fallback logger
     import logging
     logger = logging.getLogger(__name__)
+    # Fallback settings
+    class Settings:
+        API_TOKEN = "your-api-token-here"
+    settings = Settings()
+# Configuration for Fashion Detection API
 API_BASE_URL = "http://localhost:5050"
 API_VERSION = "v1"
 API_ENDPOINT = f"{API_BASE_URL}/api/{API_VERSION}/detect/image"
 API_HEALTH_ENDPOINT = f"{API_BASE_URL}/api/{API_VERSION}/health"
 API_BATCH_ENDPOINT = f"{API_BASE_URL}/api/{API_VERSION}/detect/batch"
 SHOW_GRADIO_API = "undocumented"
 class FashionDetectionClient:
     """Client for interacting with the Fashion Detection API"""
     def __init__(self, base_url: str = API_BASE_URL, token: str = None):
         self.base_url = base_url
+        self.token = token or (settings.API_TOKEN if FASHION_MODULE_AVAILABLE else "default-token")
+        self.headers = {"X-Token": self.token}
         self.session = requests.Session()
         self.session.headers.update(self.headers)
     def check_health(self) -> Dict[str, Any]:
         """Check API health status"""
         try:
             response = self.session.get(API_HEALTH_ENDPOINT, timeout=10)
             response.raise_for_status()
     def detect_single_image(self, image: Image.Image, threshold: float = 0.4) -> Dict[str, Any]:
         """Detect objects in a single image"""
         try:
             img_byte_arr = io.BytesIO()
             image.save(img_byte_arr, format='PNG')
             files = {"file": ("image.png", img_byte_arr, "image/png")}
             params = {"threshold": threshold} if threshold else {}
             response = self.session.post(
                 API_ENDPOINT,
                 files=files,
                 timeout=30
             )
             response.raise_for_status()
             return response.json()
         except requests.exceptions.RequestException as e:
             return {
                 "success": False,
+                "error": f"API request failed: {str(e)}",
                 "details": f"URL: {API_ENDPOINT}"
             }
         except Exception as e:
             }
 def draw_bounding_boxes_pil(image: Image.Image, detections: List[Dict[str, Any]]) -> Image.Image:
     """Draw bounding boxes on PIL Image"""
     img_with_boxes = image.copy()
     return result_text
+def create_fashion_detection_tab(api_client: FashionDetectionClient):
+    """Create the Fashion Detection tab"""
     def predict_single_image(image: Image.Image, threshold: float) -> tuple:
         """Predict objects in a single image"""
+        logger.info(">>> predict_single_image ping clicked")
         try:
             health_status = api_client.check_health()
             if not health_status.get('success', False):
             result = api_client.detect_single_image(image, threshold)
             result_text = format_detection_results(result)
             if result.get('success', False) and result.get('detections'):
                 image_with_boxes = draw_bounding_boxes_pil(image, result['detections'])
                 return image_with_boxes, result_text
             else:
                 return image, result_text
         except Exception as e:
             error_msg = f"❌ Prediction error: {str(e)}"
             return image, error_msg
     def predict_batch_images(images: List[Image.Image], threshold: float):
         """Predict objects in multiple images"""
+        logger.info(">>> predict_batch_images ping clicked")
         try:
             if not images:
                 return [], "Please upload at least one image."
         except Exception as e:
             return [], f"❌ Batch prediction error: {str(e)}"
+    def convert_to_pil_images(gradio_files: List) -> List[Image.Image]:
+        """Convert Gradio file objects to PIL Images"""
+        pil_images = []
+        for file in gradio_files:
+            try:
+                file_path = file.name if hasattr(file, 'name') else file
+                pil_image = Image.open(file_path)
+                if pil_image.mode != "RGB":
+                    pil_image = pil_image.convert("RGB")
+                pil_images.append(pil_image)
+            except Exception as e:
+                logger.error(f"Error converting image {file_path}: {str(e)}")
+        return pil_images
     def check_api_health():
         """Check and display API health status"""
+        logger.info(">>> check_api_health ping clicked")
         health_status = api_client.check_health()
+        logger.info(health_status)
+        status_emoji = "✅" if health_status.get('success', False) else "❌"
+        status_text = "Healthy" if health_status.get('success', False) else "Unhealthy"
         health_info = f"{status_emoji} API Status: {status_text}\n\n"
         health_info += f"📡 Endpoint: {API_BASE_URL}\n"
+        health_info += f"🕐 Checked: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
         if health_status.get('success', False):
             health_info += f"🚀 Version: {health_status.get('version', 'N/A')}\n"
         return health_info
+    with gr.Column():
+        gr.Markdown("# 👔 Fashion Detection System")
+        gr.Markdown("Upload images to detect fashion items using our AI-powered API")
+        # API Health Section
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("## 📊 API Status")
+                health_btn = gr.Button("Check API Health", variant="secondary")
+                health_output = gr.Textbox(label="API Health Status", lines=6, interactive=False)
+        # Single Image Detection
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("## 📷 Single Image Detection")
+                single_image = gr.Image(type="pil", label="Upload Fashion Image")
+                threshold_slider = gr.Slider(
+                    minimum=0.1, maximum=0.9, value=0.4, step=0.05,
+                    label="Detection Confidence Threshold"
+                )
+                single_btn = gr.Button("Detect Objects", variant="primary")
+            with gr.Column():
+                single_output_image = gr.Image(label="Detection Results", interactive=False)
+                single_output_text = gr.Textbox(label="Detection Results", lines=12)
+        # Batch Image Detection
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("## 📦 Batch Image Detection")
+                batch_images = gr.File(
+                    label="Upload Multiple Images",
+                    file_count="multiple",
+                    file_types=["image"]
+                )
+                batch_threshold = gr.Slider(
+                    minimum=0.1, maximum=0.9, value=0.4, step=0.05,
+                    label="Detection Confidence Threshold"
+                )
+                batch_btn = gr.Button("Process Batch", variant="primary")
+            with gr.Column():
+                batch_output_images = gr.Gallery(
+                    label="Detection Results",
+                    columns=3,
+                    height="auto",
+                    interactive=False
+                )
+                batch_output_text = gr.Textbox(label="Batch Results", lines=15)
+        # Examples
+        gr.Examples(
+            examples=[
+                ["static/examples/image1.png"],
+                ["static/examples/image2.png"],
+                ["static/examples/image3.png"]
+            ],
+            inputs=single_image,
+            label="Try these example images"
+        )
+        # Event handlers
+        health_btn.click(
+            fn=check_api_health,
+            outputs=health_output,
+            api_visibility=SHOW_GRADIO_API
+        )
+        single_btn.click(
+            fn=predict_single_image,
+            inputs=[single_image, threshold_slider],
+            outputs=[single_output_image, single_output_text],
+            api_visibility=SHOW_GRADIO_API
+        )
+        batch_btn.click(
+            fn=lambda images, threshold: predict_batch_images(convert_to_pil_images(images), threshold),
+            inputs=[batch_images, batch_threshold],
+            outputs=[batch_output_images, batch_output_text],
+            api_visibility=SHOW_GRADIO_API
+        )
+def create_sales_forecasting_tab(data, model, feature_stats):
+    """Create the Sales Forecasting tab"""
+    with gr.Column():
+        gr.Markdown("# 📈 Sales Forecasting System")
+        # Page selector for sales forecasting sub-sections
+        page_selector = gr.Dropdown(
+            choices=["Historical Sales Analysis", "Sales Prediction"],
+            value="Historical Sales Analysis",
+            label="Choose a view",
+            interactive=True
+        )
+        # Render content based on selection
+        @gr.render(inputs=page_selector)
+        def render_sales_content(page):
+            if page == "Historical Sales Analysis":
+                historical_sales_view(data)
+            else:
+                print("Loading feature engineered data for prediction...")
+                feature_engineered_data = load_feature_engineered_data()
+                sales_prediction_view(data, model, feature_stats, feature_engineered_data)
+def create_gradio_interface():
+    """Create the Gradio application"""
+    # Initialize API client for fashion detection
+    api_client = FashionDetectionClient()
+    # Load sales forecasting data if available
+    sales_data = None
+    sales_model = None
+    sales_feature_stats = None
+    if SALES_MODULE_AVAILABLE:
+        try:
+            sales_data = load_data()
+            sales_model = load_model()
+            sales_feature_stats = load_feature_stats()
+        except Exception as e:
+            print(f"Warning: Could not load sales forecasting data: {e}")
+    # Create main interface
+    with gr.Blocks(
+        title="💡 Intelligent Retail Decision Making System",
+    ) as demo:
+        gr.Markdown("# 💡 Intelligent Retail Decision Making System")
+        gr.Markdown("### Comprehensive AI-powered solution for retail analytics and product detection")
+        # Main navigation tabs
+        with gr.Tabs():
+            # Fashion Detection Tab
+            with gr.Tab("👔 Fashion Detection"):
+                create_fashion_detection_tab(api_client)
+            # Sales Forecasting Tab
+            if SALES_MODULE_AVAILABLE and sales_data is not None:
+                with gr.Tab("📈 Sales Forecasting"):
+                    create_sales_forecasting_tab(sales_data, sales_model, sales_feature_stats)
+            else:
+                with gr.Tab("📈 Sales Forecasting"):
+                    gr.Markdown("## ⚠️ Sales Forecasting Module Not Available")
+                    gr.Markdown("Please ensure all required dependencies are installed.")
         # Footer
+        try:
+            if SALES_MODULE_AVAILABLE:
+                ui.create_footer(
+                    logo_path="static/intelligent_retail.png",
+                    creator_name="Thi-Diem-My Le",
+                    creator_link="https://beacons.ai/elizabethmyn",
+                    org_name="AI VIET NAM",
+                    org_link="https://aivietnam.edu.vn/"
+                )
+            else:
+                gr.Markdown("---")
+                gr.Markdown("### Created by Thi-Diem-My Le | AI VIET NAM")
+        except Exception as e:
+            print(f"Warning: Could not create footer: {e}")
             gr.Markdown("© 2025 Intelligent Retail System. All rights reserved.")
     return demo
 # ==================== Application Entry Point ====================
 def main():
     """Main entry point"""
+    demo = create_gradio_interface()
     # Custom CSS
     custom_css = """
     .error {color: red; font-weight: bold;}
     """
+    custom_css = ui.get_custom_css() + custom_css
     demo.launch(
         server_name="0.0.0.0",

app/frontend/ui_template.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import os
+import base64
+from pathlib import Path
+from typing import Optional, Dict, List, Tuple
+import gradio as gr
+class ThemeConfig:
+    """Centralized theme configuration with validation."""
+    def __init__(self):
+        # Default color palette
+        self.primary_color = "#0F6CBD"
+        self.accent_color = "#C4314B"
+        self.success_color = "#2E7D32"
+        self.bg1 = "#F0F7FF"
+        self.bg2 = "#E8F0FA"
+        self.bg3 = "#DDE7F8"
+        self.font_family = (
+            "'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', "
+            "Roboto, 'Helvetica Neue', Arial, sans-serif"
+        )
+        # Metadata
+        self.project_name = "Heart Project"
+        self.year = "2025"
+        self.about = ""
+        self.description = ""
+        self.meta_items: List[Tuple[str, str]] = []
+        # Cache for CSS
+        self._css_cache: Optional[str] = None
+    def update_colors(self, **kwargs) -> None:
+        """Update color scheme with validation."""
+        valid_keys = {'primary', 'accent', 'success', 'bg1', 'bg2', 'bg3'}
+        for key, value in kwargs.items():
+            if key not in valid_keys or value is None:
+                continue
+            if not self._is_valid_color(value):
+                raise ValueError(f"Invalid color format for {key}: {value}")
+            setattr(self, f"{key}_color" if not key.startswith('bg') else key, value)
+        self._invalidate_cache()
+    def update_font(self, font_family: str) -> None:
+        """Update font family."""
+        if font_family and isinstance(font_family, str):
+            self.font_family = font_family
+            self._invalidate_cache()
+    def update_meta(self, project_name: Optional[str] = None,
+                    year: Optional[str] = None,
+                    about: Optional[str] = None,
+                    description: Optional[str] = None,
+                    meta_items: Optional[List[Tuple[str, str]]] = None) -> None:
+        """Update metadata."""
+        if project_name is not None:
+            self.project_name = project_name
+        if year is not None:
+            self.year = year
+        if about is not None:
+            self.about = about
+        if description is not None:
+            self.description = description
+        if meta_items is not None:
+            self.meta_items = meta_items
+    @staticmethod
+    def _is_valid_color(color: str) -> bool:
+        """Validate hex color format."""
+        return isinstance(color, str) and (
+            color.startswith('#') and len(color) in (4, 7, 9)
+        )
+    def _invalidate_cache(self) -> None:
+        """Clear CSS cache when theme changes."""
+        self._css_cache = None
+    def get_css(self) -> str:
+        """Get or generate CSS with caching."""
+        if self._css_cache is None:
+            self._css_cache = self._build_css()
+        return self._css_cache
+    def _build_css(self) -> str:
+        """Build the complete CSS string."""
+        return f"""
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+.gradio-container {{
+    min-height: 100vh !important;
+    width: 100vw !important;
+    margin: 0 !important;
+    padding: 0px !important;
+    background: linear-gradient(135deg, {self.bg1} 0%, {self.bg2} 50%, {self.bg3} 100%);
+    background-size: 600% 600%;
+    animation: gradientBG 7s ease infinite;
+}}
+/* Global font setup */
+body, .gradio-container, .gr-block, .gr-markdown, .gr-button, .gr-input,
+.gr-dropdown, .gr-number, .gr-plot, .gr-dataframe, .gr-accordion, .gr-form,
+.gr-textbox, .gr-html, table, th, td, label, h1, h2, h3, h4, h5, h6, p, span, div {{
+    font-family: {self.font_family} !important;
+}}
+@keyframes gradientBG {{
+    0% {{background-position: 0% 50%;}}
+    50% {{background-position: 100% 50%;}}
+    100% {{background-position: 0% 50%;}}
+}}
+/* Minimize spacing and padding */
+.content-wrap {{
+    padding: 2px !important;
+    margin: 0 !important;
+}}
+/* Reduce component spacing */
+.gr-row {{
+    gap: 5px !important;
+    margin: 2px 0 !important;
+}}
+.gr-column {{
+    gap: 4px !important;
+    padding: 4px !important;
+}}
+/* Accordion optimization */
+.gr-accordion {{
+    margin: 4px 0 !important;
+}}
+.gr-accordion .gr-accordion-content {{
+    padding: 2px !important;
+}}
+/* Form elements spacing */
+.gr-form {{
+    gap: 2px !important;
+}}
+/* Button styling */
+.gr-button {{
+    margin: 2px 0 !important;
+}}
+/* DataFrame optimization */
+.gr-dataframe {{
+    margin: 4px 0 !important;
+}}
+/* Remove horizontal scroll from data preview */
+.gr-dataframe .wrap {{
+    overflow-x: auto !important;
+    max-width: 100% !important;
+}}
+/* Plot optimization */
+.gr-plot {{
+    margin: 4px 0 !important;
+}}
+/* Reduce markdown margins */
+.gr-markdown {{
+    margin: 2px 0 !important;
+}}
+/* Footer positioning */
+.sticky-footer {{
+    position: fixed;
+    bottom: 0px;
+    left: 0;
+    width: 100%;
+    background: {self.bg1};
+    padding: 6px !important;
+    box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
+    z-index: 1000;
+}}
+"""
+# Global theme instance
+_theme = ThemeConfig()
+def configure(project_name: Optional[str] = None,
+              year: Optional[str] = None,
+              about: Optional[str] = None,
+              description: Optional[str] = None,
+              colors: Optional[Dict[str, str]] = None,
+              font_family: Optional[str] = None,
+              meta_items: Optional[List[Tuple[str, str]]] = None) -> None:
+    """
+    One-call configuration for the entire theme.
+    Args:
+        project_name: Name of the project
+        year: Project year
+        about: About project
+        description: Project description
+        colors: Dict with keys: primary, accent, success, bg1, bg2, bg3
+        font_family: CSS font family string
+        meta_items: List of (label, value) tuples for metadata
+    """
+    if colors:
+        _theme.update_colors(**colors)
+    if font_family:
+        _theme.update_font(font_family)
+    _theme.update_meta(project_name, year, about, description, meta_items)
+def get_custom_css() -> str:
+    """Get the current custom CSS."""
+    return _theme.get_css()
+def _image_to_base64(image_path: str) -> str:
+    """
+    Convert image to base64 string with better error handling.
+    Args:
+        image_path: Relative path to image file
+    Returns:
+        Base64 encoded string
+    Raises:
+        FileNotFoundError: If image file doesn't exist
+    """
+    current_dir = Path(__file__).parent
+    full_path = current_dir / image_path
+    if not full_path.exists():
+        raise FileNotFoundError(f"Image not found: {full_path}")
+    with open(full_path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+def create_header(logo_path: str = "static/intelligent_retail.png") -> None:
+    """
+    Create a header with logo and project name.
+    Args:
+        logo_path: Path to logo image
+    """
+    with gr.Row():
+        with gr.Column(scale=2):
+            try:
+                logo_base64 = _image_to_base64(logo_path)
+                gr.HTML(
+                    f"""<img src="data:image/png;base64,{logo_base64}"
+                            alt="Logo"
+                            style="height:100px;width:auto;margin:0 auto;margin-bottom:18px;display:block;">"""
+                )
+            except FileNotFoundError:
+                gr.HTML("<div style='text-align:center;color:#999;'>Logo not found</div>")
+        with gr.Column(scale=2):
+            gr.HTML(f"""
+<div style="display:flex;justify-content:flex-start;align-items:center;gap:30px;">
+    <div>
+        <h1 style="margin-bottom:0;color:{_theme.primary_color};font-size:2.32em;font-weight:bold;">
+            {_theme.project_name}
+        </h1>
+        <p style="margin-top:4px;font-size:1.1em;color:#555;">{_theme.about}</p>
+    </div>
+</div>
+""")
+def create_footer(logo_path: str = "static/intelligent_retail.png",
+                  creator_name: str = "Thi-Diem-My Le",
+                  creator_link: str = "https://beacons.ai/elizabethmyn",
+                  org_name: str = "AI VIET NAM",
+                  org_link: str = "https://aivietnam.edu.vn/") -> gr.HTML:
+    """
+    Create a sticky footer with creator information.
+    Args:
+        logo_path: Path to logo image
+        creator_name: Name of creator
+        creator_link: Link to creator profile
+        org_name: Organization name
+        org_link: Link to organization
+    Returns:
+        Gradio HTML component
+    """
+    try:
+        logo_base64 = _image_to_base64(logo_path)
+        logo_html = f'<img src="data:image/png;base64,{logo_base64}" alt="Logo" style="height:0px;width:auto;">'
+    except FileNotFoundError:
+        logo_html = ""
+    footer_html = f"""
+<style>
+  .sticky-footer{{
+    position:fixed;
+    bottom:0px;
+    left:0;
+    width:100%;
+    background:#E8F5E8;
+    padding:10px;
+    box-shadow:0 -2px 10px rgba(0,0,0,0.1);
+    z-index:1000;
+  }}
+  .content-wrap{{padding-bottom:60px;}}
+</style>
+<div class="sticky-footer">
+  <div style="text-align:center;font-size:18px;color:#888">
+    Created by
+    <a href="{creator_link}" target="_blank"
+       style="color:#465C88;text-decoration:none;font-weight:bold;display:inline-flex;align-items:center;">
+      {creator_name}
+      {logo_html}
+    </a>
+    from
+    <a href="{org_link}" target="_blank"
+       style="color:#355724;text-decoration:none;font-weight:bold;">
+      {org_name}
+    </a>
+  </div>
+</div>
+"""
+    return gr.HTML(footer_html)
+def render_info_card(description: Optional[str] = None,
+                     meta_items: Optional[List[Tuple[str, str]]] = None,
+                     icon: str = "🧠",
+                     title: str = "About this demo") -> str:
+    """
+    Render an informational card.
+    Args:
+        description: Card description text
+        meta_items: List of (label, value) tuples
+        icon: Emoji or icon for the card
+        title: Card title
+    Returns:
+        HTML string for the card
+    """
+    desc = description if description is not None else _theme.description
+    items = meta_items if meta_items is not None else _theme.meta_items
+    meta_html = ""
+    if items:
+        meta_html = "".join([f"<span><strong>{k}</strong>: {v}</span><br>" for k, v in items])
+    return f"""
+    <div style="margin:8px 0 8px 0;">
+      <div style="background:#F5F9FF;border-left:6px solid {_theme.primary_color};
+                  padding:14px 16px;border-radius:10px;box-shadow:0 1px 3px rgba(0,0,0,0.06);">
+        <div style="display:flex;gap:14px;align-items:flex-start;">
+          <div style="font-size:22px;">{icon}</div>
+          <div>
+            <div style="font-weight:700;color:{_theme.primary_color};margin-bottom:4px;">{title}</div>
+            <div style="color:#000;font-size:14px;line-height:1.5;">{desc}</div>
+            {f'<div style="margin-top:8px;color:#000;font-size:13px;">{meta_html}</div>' if meta_html else ''}
+          </div>
+        </div>
+      </div>
+    </div>
+    """
+def render_disclaimer(text: str,
+                      icon: str = "⚠️",
+                      title: str = "Educational Use Only") -> str:
+    """
+    Render a disclaimer/warning card.
+    Args:
+        text: Warning text
+        icon: Warning icon/emoji
+        title: Warning title
+    Returns:
+        HTML string for the disclaimer
+    """
+    return f"""
+    <div style="margin:8px 0 6px 0;">
+      <div style="background:#FFF4F4;border-left:6px solid {_theme.accent_color};
+                  padding:12px 16px;border-radius:8px;box-shadow:0 1px 3px rgba(0,0,0,0.06);">
+        <div style="display:flex;gap:10px;align-items:flex-start;color:#000;">
+          <span style="font-size:20px">{icon}</span>
+          <div>
+            <div style="font-weight:700;margin-bottom:4px;">{title}</div>
+            <div style="font-size:14px;line-height:1.4;">{text}</div>
+          </div>
+        </div>
+      </div>
+    </div>
+    """
+# Backward compatibility - expose old function names
+def set_colors(**kwargs):
+    """Legacy function - use configure() instead."""
+    _theme.update_colors(**kwargs)
+def set_font(font_family: str):
+    """Legacy function - use configure() instead."""
+    _theme.update_font(font_family)
+def set_meta(**kwargs):
+    """Legacy function - use configure() instead."""
+    _theme.update_meta(**kwargs)
+# Expose custom_css as a property for backward compatibility
+@property
+def custom_css():
+    return _theme.get_css()

app/main.py CHANGED Viewed

@@ -7,6 +7,9 @@ from app.core.config import settings
 from app.api.routes import detection, health
 from app.utils.logger import logger
 # Create FastAPI application
 app = FastAPI(
     title=settings.APP_NAME,
@@ -17,6 +20,12 @@ app = FastAPI(
     openapi_url="/api/openapi.json"
 )
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,

 from app.api.routes import detection, health
 from app.utils.logger import logger
+from datetime import timedelta
+from app.core.security import create_access_token
 # Create FastAPI application
 app = FastAPI(
     title=settings.APP_NAME,
     openapi_url="/api/openapi.json"
 )
+access_token = create_access_token(
+    data={"sub": "test_user"},
+    expires_delta=timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
+)
+print(f"Generated test token: {access_token}")
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,

app/services/prediction.py ADDED Viewed

	@@ -0,0 +1,731 @@

+from datetime import datetime, timedelta
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import gradio as gr
+def sales_prediction_view(data, model, feature_stats, feature_engineered_data):
+    """Display the sales prediction tool interface"""
+    if model is None:
+        return gr.Interface(
+            fn=lambda: "Model not loaded. Please check if the model file exists.",
+            inputs=[],
+            outputs=gr.Textbox(label="Error"),
+            title="Sales Prediction Tool"
+        )
+    if feature_engineered_data.empty:
+        return gr.Interface(
+            fn=lambda: "Feature engineered data not loaded.",
+            inputs=[],
+            outputs=gr.Textbox(label="Error"),
+            title="Sales Prediction Tool"
+        )
+    # Determine store and item column names
+    store_col = "store_id" if "store_id" in feature_engineered_data.columns else "store"
+    item_col = "item_id" if "item_id" in feature_engineered_data.columns else "item"
+    # Check for store/item name columns
+    has_store_names = "store_name" in feature_engineered_data.columns
+    has_item_names = "item_name" in feature_engineered_data.columns
+    # Create mapping dictionaries for names if available
+    store_names, item_names = create_name_mappings(
+        feature_engineered_data, store_col, item_col, has_store_names, has_item_names
+    )
+    # Get unique store and item lists
+    stores = sorted(feature_engineered_data[store_col].unique())
+    # Create store options
+    if has_store_names:
+        store_options = [f"{store_id} - {store_names[store_id]}" for store_id in stores]
+    else:
+        store_options = stores
+    def update_items(store_selection):
+        """Update item dropdown based on selected store"""
+        if has_store_names:
+            store_id = int(store_selection.split(" - ")[0])
+        else:
+            store_id = store_selection
+        store_items = feature_engineered_data[feature_engineered_data[store_col] == store_id][item_col].unique()
+        if has_item_names:
+            item_options = [
+                f"{item_id} - {item_names[item_id]}"
+                for item_id in store_items
+                if item_id in item_names
+            ]
+        else:
+            item_options = sorted(store_items)
+        return gr.Dropdown(choices=item_options)
+    def predict_sales(store_selection, item_selection, prediction_date, is_holiday,
+                     special_event, promotion_impact, event_impact, clearance_impact,
+                     launch_impact, temperature, weather_condition, humidity,
+                     competition_level, supply_chain):
+        """Wrapper function for prediction with all inputs"""
+        # Parse store and item IDs
+        if has_store_names:
+            store_id = int(store_selection.split(" - ")[0])
+        else:
+            store_id = store_selection
+        if has_item_names:
+            item_id = int(item_selection.split(" - ")[0])
+        else:
+            item_id = item_selection
+        # Collect prediction inputs
+        prediction_inputs = collect_prediction_inputs_from_values(
+            prediction_date, is_holiday, special_event, promotion_impact,
+            event_impact, clearance_impact, launch_impact, temperature,
+            weather_condition, humidity, competition_level, supply_chain
+        )
+        # Generate prediction and return results
+        return generate_prediction(
+            feature_engineered_data,
+            model,
+            store_id,
+            item_id,
+            store_col,
+            item_col,
+            prediction_inputs,
+            has_store_names,
+            has_item_names,
+            store_names,
+            item_names,
+        )
+    # Get initial items for first store
+    initial_store = store_options[0] if store_options else None
+    if initial_store:
+        if has_store_names:
+            initial_store_id = int(initial_store.split(" - ")[0])
+        else:
+            initial_store_id = initial_store
+        initial_items = feature_engineered_data[feature_engineered_data[store_col] == initial_store_id][item_col].unique()
+        if has_item_names:
+            initial_item_options = [
+                f"{item_id} - {item_names[item_id]}"
+                for item_id in initial_items
+                if item_id in item_names
+            ]
+        else:
+            initial_item_options = sorted(initial_items)
+    else:
+        initial_item_options = []
+    # Build Gradio interface
+    with gr.Blocks(title="Sales Prediction Tool") as demo:
+        gr.Markdown("# Sales Prediction Tool")
+        with gr.Row():
+            # Left column - Product Selection
+            with gr.Column(scale=1):
+                gr.Markdown("## Product Selection")
+                store_dropdown = gr.Dropdown(
+                    choices=store_options,
+                    label="Select Store",
+                    value=initial_store,
+                    interactive=True,
+                    allow_custom_value=False
+                )
+                item_dropdown = gr.Dropdown(
+                    choices=initial_item_options,
+                    label="Select Product",
+                    value=initial_item_options[0] if initial_item_options else None,
+                    interactive=True,
+                    allow_custom_value=False
+                )
+                # Update items when store changes
+                store_dropdown.change(
+                    fn=update_items,
+                    inputs=[store_dropdown],
+                    outputs=[item_dropdown]
+                )
+            # Right column - Prediction Parameters
+            with gr.Column(scale=2):
+                gr.Markdown("## Prediction Parameters")
+                with gr.Row():
+                    with gr.Column():
+                        prediction_date = gr.Textbox(
+                            label="Prediction Date (YYYY-MM-DD)",
+                            value=(datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d"),
+                            interactive=True
+                        )
+                        is_holiday = gr.Checkbox(label="Holiday", value=False, interactive=True)
+                        special_event = gr.Dropdown(
+                            choices=["None", "Sale/Promotion", "Local Event",
+                                   "Inventory Clearance", "New Product Launch"],
+                            label="Special Event",
+                            value="None",
+                            interactive=True
+                        )
+                        promotion_impact = gr.Slider(-50, 100, value=20, label="Promotion Impact (%)", interactive=True)
+                        event_impact = gr.Slider(-20, 50, value=10, label="Event Impact (%)", interactive=True)
+                        clearance_impact = gr.Slider(-70, 30, value=-10, label="Clearance Impact (%)", interactive=True)
+                        launch_impact = gr.Slider(0, 200, value=50, label="Launch Impact (%)", interactive=True)
+                    with gr.Column():
+                        temperature = gr.Slider(-10.0, 40.0, value=20.0, label="Temperature (°C)", interactive=True)
+                        weather_condition = gr.Dropdown(
+                            choices=["Clear", "Cloudy", "Rainy", "Snowy", "Stormy"],
+                            label="Weather Condition",
+                            value="Clear",
+                            interactive=True
+                        )
+                        gr.Markdown("*Note: Weather impacts vary by product category*")
+                    with gr.Column():
+                        humidity = gr.Slider(0, 100, value=50, label="Humidity (%)", interactive=True)
+                        competition_level = gr.Radio(
+                            choices=["Low", "Medium", "High"],
+                            label="Competition Level",
+                            value="Medium",
+                            interactive=True
+                        )
+                        supply_chain = gr.Radio(
+                            choices=["Constrained", "Normal", "Abundant"],
+                            label="Supply Chain Status",
+                            value="Normal",
+                            interactive=True
+                        )
+                predict_btn = gr.Button("Predict Sales", variant="primary")
+        # Output section
+        gr.Markdown("## Prediction Results")
+        with gr.Row():
+            result_text = gr.Textbox(label="Results", lines=10)
+            result_plot1 = gr.Plot(label="Sales History")
+        with gr.Row():
+            result_plot2 = gr.Plot(label="Weekly Pattern")
+            result_plot3 = gr.Plot(label="Feature Importance")
+        # Connect button to prediction function
+        predict_btn.click(
+            fn=predict_sales,
+            inputs=[
+                store_dropdown, item_dropdown, prediction_date, is_holiday,
+                special_event, promotion_impact, event_impact, clearance_impact,
+                launch_impact, temperature, weather_condition, humidity,
+                competition_level, supply_chain
+            ],
+            outputs=[result_text, result_plot1, result_plot2, result_plot3]
+        )
+    return demo
+def create_name_mappings(df, store_col, item_col, has_store_names, has_item_names):
+    """Create mapping dictionaries for store and item names"""
+    store_names = {}
+    item_names = {}
+    if has_store_names:
+        # Create store ID to name mapping
+        for _, row in df[[store_col, "store_name"]].drop_duplicates().iterrows():
+            store_names[row[store_col]] = row["store_name"]
+    if has_item_names:
+        # Create item ID to name mapping
+        for _, row in df[[item_col, "item_name"]].drop_duplicates().iterrows():
+            item_names[row[item_col]] = row["item_name"]
+    return store_names, item_names
+def create_product_selection_sidebar(
+    df,
+    stores,
+    store_col,
+    item_col,
+    has_store_names,
+    has_item_names,
+    store_names,
+    item_names,
+):
+    """Create sidebar for store and product selection"""
+    # This function is kept for compatibility but not used in Gradio version
+    # The logic is integrated into sales_prediction_view
+    pass
+def collect_prediction_inputs():
+    """Collect all prediction inputs from the user"""
+    # This function is kept for compatibility but adapted for Gradio
+    # See collect_prediction_inputs_from_values instead
+    pass
+def collect_prediction_inputs_from_values(
+    prediction_date_str, is_holiday, special_event, promotion_impact,
+    event_impact, clearance_impact, launch_impact, temperature,
+    weather_condition, humidity, competition_level, supply_chain
+):
+    """Collect all prediction inputs from provided values"""
+    # Parse date
+    prediction_date = datetime.strptime(prediction_date_str, "%Y-%m-%d").date()
+    # Calculate special event factor
+    special_event_factor = 1.0
+    if special_event == "Sale/Promotion":
+        special_event_factor = promotion_impact / 100 + 1.0
+    elif special_event == "Local Event":
+        special_event_factor = event_impact / 100 + 1.0
+    elif special_event == "Inventory Clearance":
+        special_event_factor = clearance_impact / 100 + 1.0
+    elif special_event == "New Product Launch":
+        special_event_factor = launch_impact / 100 + 1.0
+    # Determine temperature category
+    if temperature < 15:
+        temp_category = "Cool"
+    elif temperature < 25:
+        temp_category = "Warm"
+    else:
+        temp_category = "Hot"
+    # Determine humidity level
+    if humidity < 40:
+        humidity_level = "Low"
+    elif humidity < 70:
+        humidity_level = "Medium"
+    else:
+        humidity_level = "High"
+    # Calculate derived parameters
+    month = prediction_date.month
+    if month in [3, 4, 5]:
+        season = "spring"
+    elif month in [6, 7, 8]:
+        season = "summer"
+    elif month in [9, 10, 11]:
+        season = "fall"
+    else:
+        season = "winter"
+    quarter = (prediction_date.month - 1) // 3 + 1
+    day_of_week = prediction_date.weekday()
+    is_weekend = 1 if day_of_week >= 5 else 0
+    # Calculate factors
+    weather_factor = {
+        "Clear": 1.0,
+        "Cloudy": 0.95,
+        "Rainy": 0.9,
+        "Snowy": 0.8,
+        "Stormy": 0.7,
+    }
+    competition_factor = {"Low": 1.1, "Medium": 1.0, "High": 0.9}
+    supply_factor = {"Constrained": 0.9, "Normal": 1.0, "Abundant": 1.05}
+    weekend_factor = 1.15 if is_weekend else 1.0
+    # Combined adjustment factor
+    adjustment_factor = (
+        special_event_factor
+        * weather_factor.get(weather_condition, 1.0)
+        * competition_factor.get(competition_level, 1.0)
+        * supply_factor.get(supply_chain, 1.0)
+        * weekend_factor
+    )
+    return {
+        "date": prediction_date,
+        "is_holiday": is_holiday,
+        "temperature": temperature,
+        "temp_category": temp_category,
+        "humidity": humidity,
+        "humidity_level": humidity_level,
+        "season": season,
+        "quarter": quarter,
+        "day_of_week": day_of_week,
+        "is_weekend": is_weekend,
+        "special_event": special_event,
+        "weather_condition": weather_condition,
+        "competition_level": competition_level,
+        "supply_chain": supply_chain,
+        "adjustment_factor": adjustment_factor,
+    }
+def generate_prediction(
+    feature_engineered_data,
+    model,
+    store_id,
+    item_id,
+    store_col,
+    item_col,
+    prediction_inputs,
+    has_store_names,
+    has_item_names,
+    store_names,
+    item_names,
+):
+    """Generate sales prediction and display results"""
+    try:
+        # Find recent samples for the same store-item combination
+        recent_samples = (
+            feature_engineered_data[
+                (feature_engineered_data[store_col] == store_id)
+                & (feature_engineered_data[item_col] == item_id)
+            ]
+            .sort_values("date", ascending=False)
+            .head(5)
+        )
+        if recent_samples.empty:
+            return "No historical data found for this product-store combination.", None, None, None
+        # Create input based on most recent sample
+        input_row = prepare_prediction_input(recent_samples, prediction_inputs)
+        # Create DataFrame for prediction
+        input_df = pd.DataFrame([input_row])
+        # Get the features that the model expects
+        if hasattr(model, "feature_name_"):
+            model_features = model.feature_name_
+        else:
+            model_features = [
+                col
+                for col in input_df.columns
+                if col
+                not in ["sales", "date", "variation_factor", "adjustment_factor"]
+            ]
+        # Select only the features used by the model
+        X_pred = input_df[model_features]
+        # Make prediction
+        base_prediction = model.predict(X_pred)[0]
+        # Apply adjustment factors
+        adjusted_prediction = base_prediction
+        # Apply the variation factor if it exists
+        if "variation_factor" in input_row:
+            adjusted_prediction *= input_row["variation_factor"]
+        # Apply adjustment factor from user inputs
+        if "adjustment_factor" in prediction_inputs:
+            adjusted_prediction *= prediction_inputs["adjustment_factor"]
+        # Display results
+        result_text, plot1, plot2, plot3 = display_prediction_results(
+            adjusted_prediction,
+            base_prediction,
+            store_id,
+            item_id,
+            prediction_inputs,
+            feature_engineered_data,
+            store_col,
+            item_col,
+            has_store_names,
+            has_item_names,
+            store_names,
+            item_names,
+            model,
+            model_features,
+        )
+        return result_text, plot1, plot2, plot3
+    except Exception as e:
+        import traceback
+        error_msg = f"Error making prediction: {str(e)}\n\n{traceback.format_exc()}"
+        return error_msg, None, None, None
+def prepare_prediction_input(recent_samples, prediction_inputs):
+    """Prepare input row for prediction based on recent sample and user inputs"""
+    # Create input row based on most recent sample
+    input_row = recent_samples.iloc[0].copy()
+    # Update with user inputs
+    input_row["date"] = pd.to_datetime(prediction_inputs["date"])
+    input_row["day"] = prediction_inputs["date"].day
+    input_row["month"] = prediction_inputs["date"].month
+    input_row["year"] = prediction_inputs["date"].year
+    input_row["quarter"] = prediction_inputs["quarter"]
+    input_row["is_holiday"] = int(prediction_inputs["is_holiday"])
+    # Add day of week information
+    input_row["day_of_week"] = input_row["date"].dayofweek
+    input_row["day_of_month"] = input_row["date"].day
+    input_row["is_weekend"] = 1 if input_row["day_of_week"] >= 5 else 0
+    # Update actual temperature and humidity values if they exist in the dataframe
+    if "temperature" in input_row:
+        input_row["temperature"] = prediction_inputs["temperature"]
+    if "humidity" in input_row:
+        input_row["humidity"] = prediction_inputs["humidity"]
+    # Update temperature and humidity categories
+    for category in ["Cool", "Warm", "Hot"]:
+        if f"temp_category_{category}" in input_row:
+            input_row[f"temp_category_{category}"] = (
+                1 if category == prediction_inputs["temp_category"] else 0
+            )
+    for level in ["Low", "Medium", "High"]:
+        if f"humidity_level_{level}" in input_row:
+            input_row[f"humidity_level_{level}"] = (
+                1 if level == prediction_inputs["humidity_level"] else 0
+            )
+    # Update season
+    for s in ["spring", "summer", "fall", "winter", "wet"]:
+        if f"season_{s}" in input_row:
+            input_row[f"season_{s}"] = 1 if s == prediction_inputs["season"] else 0
+    # Set a random variation factor
+    variation_factor = 1.0 + np.random.uniform(-0.02, 0.02)
+    input_row["variation_factor"] = variation_factor
+    return input_row
+def display_prediction_results(
+    prediction_value,
+    base_prediction,
+    store_id,
+    item_id,
+    prediction_inputs,
+    historical_data,
+    store_col,
+    item_col,
+    has_store_names,
+    has_item_names,
+    store_names,
+    item_names,
+    model,
+    model_features,
+):
+    """Display prediction results with visualizations"""
+    # Build result text
+    result_lines = []
+    result_lines.append("=" * 50)
+    result_lines.append("PREDICTION RESULTS")
+    result_lines.append("=" * 50)
+    result_lines.append(f"\nPredicted Sales: ${prediction_value:,.2f}")
+    if has_store_names:
+        result_lines.append(f"Store: {store_names[store_id]}")
+    else:
+        result_lines.append(f"Store ID: {store_id}")
+    if has_item_names:
+        result_lines.append(f"Product: {item_names[item_id]}")
+    else:
+        result_lines.append(f"Product ID: {item_id}")
+    result_lines.append(f"Date: {prediction_inputs['date'].strftime('%B %d, %Y')}")
+    result_lines.append(f"Season: {prediction_inputs['season'].capitalize()}")
+    if prediction_inputs["is_holiday"]:
+        result_lines.append("Holiday: Yes")
+    # Adjustment details
+    result_lines.append(f"\n{'='*50}")
+    result_lines.append("ADJUSTMENT DETAILS")
+    result_lines.append("="*50)
+    result_lines.append(f"Base prediction: ${base_prediction:.2f}")
+    result_lines.append(f"Final prediction: ${prediction_value:.2f}")
+    result_lines.append(f"Total adjustment: {prediction_inputs['adjustment_factor']:.2f}x")
+    result_lines.append(f"\nEvent: {prediction_inputs['special_event']}")
+    result_lines.append(f"Weather: {prediction_inputs['weather_condition']}")
+    result_lines.append(f"Competition: {prediction_inputs['competition_level']}")
+    result_lines.append(f"Supply: {prediction_inputs['supply_chain']}")
+    result_lines.append(f"Weekend: {'Yes' if prediction_inputs['is_weekend'] else 'No'}")
+    result_lines.append(f"Holiday: {'Yes' if prediction_inputs['is_holiday'] else 'No'}")
+    # Get historical context
+    historical = historical_data[
+        (historical_data[store_col] == store_id)
+        & (historical_data[item_col] == item_id)
+    ].sort_values("date")
+    if "sales" in historical.columns and len(historical) > 0:
+        last_value = historical["sales"].iloc[-1]
+        last_date = historical["date"].iloc[-1]
+        avg_sales = historical["sales"].mean()
+        max_sales = historical["sales"].max()
+        max_date = historical.loc[historical["sales"].idxmax(), "date"]
+        result_lines.append(f"\n{'='*50}")
+        result_lines.append("HISTORICAL CONTEXT")
+        result_lines.append("="*50)
+        result_lines.append(f"Historical Average: ${avg_sales:,.2f}")
+        result_lines.append(f"Period: {historical['date'].min().strftime('%b %d, %Y')} to {historical['date'].max().strftime('%b %d, %Y')}")
+        result_lines.append(f"\nLast Recorded Sales: ${last_value:,.2f}")
+        result_lines.append(f"Date: {last_date.strftime('%b %d, %Y')}")
+        result_lines.append(f"\nHistorical Maximum: ${max_sales:,.2f}")
+        result_lines.append(f"Date: {max_date.strftime('%b %d, %Y')}")
+    result_text = "\n".join(result_lines)
+    # Create visualizations
+    plot1 = display_historical_context(historical, prediction_inputs["date"], prediction_value)
+    plot2 = display_weekly_pattern(historical, prediction_inputs["date"])
+    plot3 = display_feature_importance(model, model_features)
+    return result_text, plot1, plot2, plot3
+def display_historical_context(historical_data, prediction_date, prediction_value):
+    """Display historical context visualizations"""
+    if "sales" not in historical_data.columns or historical_data.empty:
+        return None
+    # Limit to last 2 months
+    last_date = historical_data["date"].max()
+    two_months_ago = last_date - pd.Timedelta(days=60)
+    recent_history = historical_data[historical_data["date"] >= two_months_ago].copy()
+    if recent_history.empty:
+        return None
+    # Plot recent sales history
+    fig, ax = plt.subplots(figsize=(6, 2.5))
+    # Plot historical sales
+    ax.plot(
+        recent_history["date"],
+        recent_history["sales"],
+        "b-",
+        label="Sales",
+    )
+    # Add the prediction point
+    ax.scatter(
+        prediction_date,
+        prediction_value,
+        color="red",
+        s=60,
+        label="Prediction",
+    )
+    # Add moving average
+    if len(recent_history) > 7:
+        recent_history["MA7"] = recent_history["sales"].rolling(window=7).mean()
+        ax.plot(
+            recent_history["date"],
+            recent_history["MA7"],
+            "g--",
+            label="7-Day Avg",
+        )
+    ax.set_xlabel("")
+    ax.set_ylabel("Sales ($)")
+    ax.set_title("Last 60 Days Sales History")
+    ax.legend(loc="upper left", fontsize="x-small")
+    fig.autofmt_xdate(rotation=45)
+    fig.tight_layout()
+    return fig
+def display_weekly_pattern(recent_history, prediction_date):
+    """Display weekly sales pattern visualization"""
+    if len(recent_history) < 7:
+        return None
+    # Add day of week
+    recent_history = recent_history.copy()
+    recent_history["day_of_week"] = recent_history["date"].dt.dayofweek
+    day_names = [
+        "Monday",
+        "Tuesday",
+        "Wednesday",
+        "Thursday",
+        "Friday",
+        "Saturday",
+        "Sunday",
+    ]
+    # Group by day of week
+    day_sales = recent_history.groupby("day_of_week")["sales"].mean()
+    day_sales_df = pd.DataFrame(
+        {
+            "day_name": [day_names[i] for i in range(7) if i in day_sales.index],
+            "sales": [day_sales[i] for i in range(7) if i in day_sales.index],
+        }
+    )
+    # Plot
+    fig, ax = plt.subplots(figsize=(6, 2.5))
+    # Plot day of week pattern
+    sns.barplot(x="day_name", y="sales", data=day_sales_df, ax=ax)
+    # Highlight the day of the prediction
+    prediction_day = prediction_date.weekday()
+    for i, patch in enumerate(ax.patches):
+        if day_sales_df.iloc[i]["day_name"] == day_names[prediction_day]:
+            patch.set_facecolor("red")
+    ax.set_xlabel("")
+    ax.set_ylabel("Avg Sales ($)")
+    ax.set_title("Sales by Day of Week")
+    plt.xticks(rotation=45, fontsize=8)
+    fig.tight_layout()
+    return fig
+def display_feature_importance(model, model_features):
+    """Display feature importance visualization"""
+    if not hasattr(model, "feature_importances_"):
+        return None
+    # Get feature importances
+    importances = model.feature_importances_
+    # Create DataFrame with feature importances
+    importance_df = (
+        pd.DataFrame({"Feature": model_features, "Importance": importances})
+        .sort_values("Importance", ascending=False)
+        .head(8)
+    )
+    # Clean feature names for display
+    importance_df["Feature"] = importance_df["Feature"].apply(
+        lambda x: x.replace("_", " ").title()
+    )
+    # Plot feature importances
+    fig, ax = plt.subplots(figsize=(6, 2.5))
+    sns.barplot(x="Importance", y="Feature", data=importance_df, ax=ax)
+    ax.set_title("Top Factors Influencing Sales Prediction")
+    plt.xticks(fontsize=8)
+    plt.yticks(fontsize=8)
+    fig.tight_layout()
+    return fig

app/utils/data_generator.py ADDED Viewed

	@@ -0,0 +1,774 @@

+import os
+from datetime import datetime, timedelta
+import numpy as np
+import pandas as pd
+# Set random seed for reproducibility
+np.random.seed(2025)
+def generate_store_data():
+    """Generate store data"""
+    # Define provinces and stores
+    provinces = ["Hanoi", "Ho Chi Minh City"]
+    stores = [
+        # Hanoi stores
+        {"id": 1, "name": "Hoan Kiem Market", "province": "Hanoi"},
+        {"id": 2, "name": "Ba Dinh Supermarket", "province": "Hanoi"},
+        {"id": 3, "name": "Dong Da Mall", "province": "Hanoi"},
+        {"id": 4, "name": "Tay Ho Store", "province": "Hanoi"},
+        {"id": 5, "name": "Long Bien Shop", "province": "Hanoi"},
+        # Ho Chi Minh City stores
+        {"id": 6, "name": "District 1 Market", "province": "Ho Chi Minh City"},
+        {"id": 7, "name": "Ben Thanh Store", "province": "Ho Chi Minh City"},
+        {"id": 8, "name": "Saigon Supermarket", "province": "Ho Chi Minh City"},
+        {"id": 9, "name": "Phu Nhuan Shop", "province": "Ho Chi Minh City"},
+        {"id": 10, "name": "Binh Thanh Market", "province": "Ho Chi Minh City"},
+    ]
+    return provinces, stores
+def generate_item_data():
+    """Generate item data"""
+    # Define categories and items
+    categories = [
+        "Staples",
+        "Dairy & Frozen",
+        "Beverages & Snacks",
+        "Household & Personal Care",
+        "Baby & Health",
+    ]
+    items = [
+        # Staples
+        {
+            "id": 1,
+            "name": "Rice",
+            "category": "Staples",
+            "base_price": 20.0,
+            "base_sales": 15,
+            "volatility": 0.3,
+        },
+        {
+            "id": 2,
+            "name": "Noodles",
+            "category": "Staples",
+            "base_price": 15.0,
+            "base_sales": 12,
+            "volatility": 0.25,
+        },
+        {
+            "id": 3,
+            "name": "Bread",
+            "category": "Staples",
+            "base_price": 10.0,
+            "base_sales": 20,
+            "volatility": 0.4,
+        },
+        {
+            "id": 4,
+            "name": "Flour",
+            "category": "Staples",
+            "base_price": 12.0,
+            "base_sales": 8,
+            "volatility": 0.2,
+        },
+        {
+            "id": 5,
+            "name": "Cooking Oil",
+            "category": "Staples",
+            "base_price": 25.0,
+            "base_sales": 10,
+            "volatility": 0.15,
+        },
+        {
+            "id": 6,
+            "name": "Sugar",
+            "category": "Staples",
+            "base_price": 8.0,
+            "base_sales": 7,
+            "volatility": 0.1,
+        },
+        # Dairy & Frozen
+        {
+            "id": 7,
+            "name": "Milk",
+            "category": "Dairy & Frozen",
+            "base_price": 18.0,
+            "base_sales": 30,
+            "volatility": 0.35,
+        },
+        {
+            "id": 8,
+            "name": "Cheese",
+            "category": "Dairy & Frozen",
+            "base_price": 35.0,
+            "base_sales": 12,
+            "volatility": 0.3,
+        },
+        {
+            "id": 9,
+            "name": "Yogurt",
+            "category": "Dairy & Frozen",
+            "base_price": 12.0,
+            "base_sales": 25,
+            "volatility": 0.4,
+        },
+        {
+            "id": 10,
+            "name": "Ice Cream",
+            "category": "Dairy & Frozen",
+            "base_price": 30.0,
+            "base_sales": 15,
+            "volatility": 0.5,
+        },
+        {
+            "id": 11,
+            "name": "Frozen Vegetables",
+            "category": "Dairy & Frozen",
+            "base_price": 22.0,
+            "base_sales": 10,
+            "volatility": 0.25,
+        },
+        # Beverages & Snacks
+        {
+            "id": 12,
+            "name": "Soda",
+            "category": "Beverages & Snacks",
+            "base_price": 15.0,
+            "base_sales": 40,
+            "volatility": 0.45,
+        },
+        {
+            "id": 13,
+            "name": "Juice",
+            "category": "Beverages & Snacks",
+            "base_price": 20.0,
+            "base_sales": 30,
+            "volatility": 0.4,
+        },
+        {
+            "id": 14,
+            "name": "Water",
+            "category": "Beverages & Snacks",
+            "base_price": 10.0,
+            "base_sales": 50,
+            "volatility": 0.3,
+        },
+        {
+            "id": 15,
+            "name": "Coffee",
+            "category": "Beverages & Snacks",
+            "base_price": 45.0,
+            "base_sales": 20,
+            "volatility": 0.25,
+        },
+        {
+            "id": 16,
+            "name": "Tea",
+            "category": "Beverages & Snacks",
+            "base_price": 35.0,
+            "base_sales": 15,
+            "volatility": 0.2,
+        },
+        {
+            "id": 17,
+            "name": "Chips",
+            "category": "Beverages & Snacks",
+            "base_price": 12.0,
+            "base_sales": 35,
+            "volatility": 0.45,
+        },
+        {
+            "id": 18,
+            "name": "Cookies",
+            "category": "Beverages & Snacks",
+            "base_price": 18.0,
+            "base_sales": 30,
+            "volatility": 0.4,
+        },
+        {
+            "id": 19,
+            "name": "Chocolate",
+            "category": "Beverages & Snacks",
+            "base_price": 22.0,
+            "base_sales": 25,
+            "volatility": 0.35,
+        },
+        # Household & Personal Care
+        {
+            "id": 20,
+            "name": "Soap",
+            "category": "Household & Personal Care",
+            "base_price": 8.0,
+            "base_sales": 20,
+            "volatility": 0.2,
+        },
+        {
+            "id": 21,
+            "name": "Shampoo",
+            "category": "Household & Personal Care",
+            "base_price": 25.0,
+            "base_sales": 15,
+            "volatility": 0.25,
+        },
+        {
+            "id": 22,
+            "name": "Toothpaste",
+            "category": "Household & Personal Care",
+            "base_price": 15.0,
+            "base_sales": 18,
+            "volatility": 0.15,
+        },
+        {
+            "id": 23,
+            "name": "Laundry Detergent",
+            "category": "Household & Personal Care",
+            "base_price": 40.0,
+            "base_sales": 12,
+            "volatility": 0.2,
+        },
+        {
+            "id": 24,
+            "name": "Paper Towels",
+            "category": "Household & Personal Care",
+            "base_price": 20.0,
+            "base_sales": 14,
+            "volatility": 0.3,
+        },
+        {
+            "id": 25,
+            "name": "Toilet Paper",
+            "category": "Household & Personal Care",
+            "base_price": 25.0,
+            "base_sales": 16,
+            "volatility": 0.25,
+        },
+        {
+            "id": 26,
+            "name": "Trash Bags",
+            "category": "Household & Personal Care",
+            "base_price": 18.0,
+            "base_sales": 10,
+            "volatility": 0.15,
+        },
+        {
+            "id": 27,
+            "name": "Dishwashing Liquid",
+            "category": "Household & Personal Care",
+            "base_price": 15.0,
+            "base_sales": 11,
+            "volatility": 0.2,
+        },
+        {
+            "id": 28,
+            "name": "All-Purpose Cleaner",
+            "category": "Household & Personal Care",
+            "base_price": 22.0,
+            "base_sales": 9,
+            "volatility": 0.15,
+        },
+        # Baby & Health
+        {
+            "id": 29,
+            "name": "Diapers",
+            "category": "Baby & Health",
+            "base_price": 45.0,
+            "base_sales": 25,
+            "volatility": 0.3,
+        },
+        {
+            "id": 30,
+            "name": "Baby Food",
+            "category": "Baby & Health",
+            "base_price": 20.0,
+            "base_sales": 15,
+            "volatility": 0.25,
+        },
+        {
+            "id": 31,
+            "name": "Baby Wipes",
+            "category": "Baby & Health",
+            "base_price": 15.0,
+            "base_sales": 20,
+            "volatility": 0.2,
+        },
+        {
+            "id": 32,
+            "name": "Pain Relievers",
+            "category": "Baby & Health",
+            "base_price": 30.0,
+            "base_sales": 10,
+            "volatility": 0.15,
+        },
+        {
+            "id": 33,
+            "name": "Vitamins",
+            "category": "Baby & Health",
+            "base_price": 40.0,
+            "base_sales": 8,
+            "volatility": 0.2,
+        },
+        {
+            "id": 34,
+            "name": "Cold & Flu Medicine",
+            "category": "Baby & Health",
+            "base_price": 35.0,
+            "base_sales": 7,
+            "volatility": 0.4,
+        },
+        {
+            "id": 35,
+            "name": "First Aid Kit",
+            "category": "Baby & Health",
+            "base_price": 50.0,
+            "base_sales": 5,
+            "volatility": 0.1,
+        },
+    ]
+    return categories, items
+def calculate_daily_sales(date, store, item, weather_data=None):
+    """
+    Calculate daily sales based on various factors.
+    Returns an integer value for sales quantity.
+    """
+    # Base sales for this item
+    base_sales = item["base_sales"]
+    # Store factor (some stores have higher sales)
+    store_factor = 0.8 + (store["id"] % 10) / 10  # 0.8 to 1.7
+    # Day of week factor (weekend boost)
+    day_of_week = date.weekday()  # 0 = Monday, 6 = Sunday
+    weekday_factor = 1.0
+    if day_of_week >= 5:  # Weekend
+        weekday_factor = 1.3
+    # Monthly seasonality
+    month = date.month
+    # Higher sales in December (holidays), lower in February
+    month_factor = 1.0 + 0.3 * (month == 12) - 0.1 * (month == 2)
+    # Quarterly business cycle
+    quarter = (month - 1) // 3 + 1
+    quarter_factor = 1.0 + 0.05 * (quarter - 2.5)  # Q3-Q4 slightly higher
+    # Holiday effects
+    holiday_factor = 1.0
+    # Vietnamese New Year (Tet) - usually in late January or early February
+    if (month == 1 and date.day >= 27) or (month == 2 and date.day <= 5):
+        holiday_factor = 1.5
+    # National Day (September 2)
+    elif month == 9 and date.day == 2:
+        holiday_factor = 1.3
+    # Year-end shopping
+    elif month == 12 and date.day >= 20:
+        holiday_factor = 1.4
+    # Weather effects if weather data is provided
+    weather_factor = 1.0
+    if weather_data is not None:
+        # Find weather for this date and province
+        date_str = date.strftime("%Y-%m-%d")
+        province = store["province"]
+        day_weather = weather_data.get((date_str, province))
+        if day_weather:
+            temp = day_weather["temperature"]
+            humidity = day_weather["humidity"]
+            # Temperature effects differ by item category
+            if item["category"] == "Beverages & Snacks":
+                # More beverages sold in hot weather
+                if temp > 28:
+                    weather_factor *= 1.3
+                elif temp < 18:
+                    weather_factor *= 0.9
+            elif item["category"] == "Dairy & Frozen":
+                # More ice cream in hot weather
+                if temp > 28:
+                    weather_factor *= 1.4
+                elif temp < 18:
+                    weather_factor *= 0.8
+            # Rain effect (approximated by high humidity)
+            if humidity > 80:
+                # People buy more when staying indoors
+                if item["category"] in [
+                    "Beverages & Snacks",
+                    "Household & Personal Care",
+                ]:
+                    weather_factor *= 1.2
+    # Year-over-year growth (for 2017 data)
+    yoy_growth = 1.0
+    if date.year == 2017:
+        # 5% general growth with some category variations
+        category_growth = {
+            "Staples": 1.03,
+            "Dairy & Frozen": 1.05,
+            "Beverages & Snacks": 1.08,
+            "Household & Personal Care": 1.05,
+            "Baby & Health": 1.07,
+        }
+        yoy_growth = category_growth.get(item["category"], 1.05)
+    # Random variation
+    random_factor = np.random.normal(1.0, item["volatility"])
+    # Calculate final sales
+    sales = (
+        base_sales
+        * store_factor
+        * weekday_factor
+        * month_factor
+        * quarter_factor
+        * holiday_factor
+        * weather_factor
+        * yoy_growth
+        * random_factor
+    )
+    # Ensure minimum sales and convert to integer
+    sales = max(
+        1, int(round(sales))
+    )  # Minimum sales of 1 unit, rounded to nearest integer
+    return sales
+def generate_weather_data(start_date, end_date, provinces):
+    """Generate synthetic weather data"""
+    # Define base temperatures and humidity for each province
+    province_weather = {
+        "Hanoi": {
+            "base_temp": {
+                1: 16,
+                2: 17,
+                3: 20,
+                4: 24,
+                5: 28,
+                6: 30,
+                7: 30,
+                8: 29,
+                9: 28,
+                10: 25,
+                11: 21,
+                12: 18,
+            },
+            "temp_variation": 3.5,
+            "base_humidity": {
+                1: 80,
+                2: 83,
+                3: 85,
+                4: 85,
+                5: 80,
+                6: 80,
+                7: 83,
+                8: 85,
+                9: 83,
+                10: 78,
+                11: 75,
+                12: 77,
+            },
+            "humidity_variation": 10,
+            "seasons": {
+                1: "winter",
+                2: "winter",
+                3: "spring",
+                4: "spring",
+                5: "summer",
+                6: "summer",
+                7: "summer",
+                8: "summer",
+                9: "fall",
+                10: "fall",
+                11: "fall",
+                12: "winter",
+            },
+        },
+        "Ho Chi Minh City": {
+            "base_temp": {
+                1: 26,
+                2: 27,
+                3: 28,
+                4: 29,
+                5: 29,
+                6: 28,
+                7: 28,
+                8: 28,
+                9: 28,
+                10: 27,
+                11: 27,
+                12: 26,
+            },
+            "temp_variation": 2.0,
+            "base_humidity": {
+                1: 70,
+                2: 70,
+                3: 70,
+                4: 75,
+                5: 80,
+                6: 83,
+                7: 85,
+                8: 85,
+                9: 88,
+                10: 85,
+                11: 80,
+                12: 75,
+            },
+            "humidity_variation": 8,
+            "seasons": {
+                1: "dry",
+                2: "dry",
+                3: "dry",
+                4: "dry",
+                5: "wet",
+                6: "wet",
+                7: "wet",
+                8: "wet",
+                9: "wet",
+                10: "wet",
+                11: "wet",
+                12: "dry",
+            },
+        },
+    }
+    # Create date range
+    date_list = []
+    current_date = start_date
+    while current_date <= end_date:
+        date_list.append(current_date)
+        current_date += timedelta(days=1)
+    # Generate weather data
+    weather_data = []
+    weather_dict = {}  # For lookup during sales calculation
+    for date in date_list:
+        month = date.month
+        for province in provinces:
+            # Get base values for this province and month
+            base_temp = province_weather[province]["base_temp"][month]
+            temp_variation = province_weather[province]["temp_variation"]
+            base_humidity = province_weather[province]["base_humidity"][month]
+            humidity_variation = province_weather[province]["humidity_variation"]
+            season = province_weather[province]["seasons"][month]
+            # Add random variation
+            temperature = base_temp + np.random.uniform(-temp_variation, temp_variation)
+            humidity = base_humidity + np.random.uniform(
+                -humidity_variation, humidity_variation
+            )
+            # Round to one decimal place
+            temperature = round(temperature, 1)
+            humidity = round(humidity, 1)
+            # Ensure humidity is within realistic range
+            humidity = max(40, min(95, humidity))
+            # Add to weather data
+            weather_data.append(
+                {
+                    "city": province,
+                    "date": date.strftime("%Y-%m-%d"),
+                    "temperature": temperature,
+                    "humidity": humidity,
+                    "season": season,
+                }
+            )
+            # Add to lookup dictionary
+            weather_dict[(date.strftime("%Y-%m-%d"), province)] = {
+                "temperature": temperature,
+                "humidity": humidity,
+                "season": season,
+            }
+    return pd.DataFrame(weather_data), weather_dict
+def generate_sales_data(start_date, end_date, stores, items, weather_dict):
+    """Generate synthetic sales data"""
+    # Create date range
+    date_list = []
+    current_date = start_date
+    while current_date <= end_date:
+        date_list.append(current_date)
+        current_date += timedelta(days=1)
+    # Generate sales data
+    sales_data = []
+    # For each date, store, and item, calculate sales
+    for date in date_list:
+        for store in stores:
+            # Not all stores carry all items
+            # Use store_id to deterministically select items
+            store_seed = store["id"] * 10
+            np.random.seed(store_seed)
+            # Select a subset of items for this store
+            store_items = []
+            for item in items:
+                # 80% chance of carrying an item
+                if np.random.random() < 0.8:
+                    store_items.append(item)
+            # Reset random seed
+            np.random.seed(None)
+            # Calculate sales for each item
+            for item in store_items:
+                # Calculate sales for this combination
+                sales_value = calculate_daily_sales(date, store, item, weather_dict)
+                # Add to sales data
+                sales_data.append(
+                    {
+                        "date": date.strftime("%Y-%m-%d"),
+                        "province": store["province"],
+                        "store_id": store["id"],
+                        "store_name": store["name"],
+                        "category": item["category"],
+                        "item_id": item["id"],
+                        "item_name": item["name"],
+                        "sales": sales_value,
+                    }
+                )
+    return pd.DataFrame(sales_data)
+def add_outliers_and_nans(data, outlier_percentage=0.01, nan_percentage=0.1):
+    """Add the nan values to data set"""
+    # Copy the original data to avoid modifying the input directly
+    modified_data = data.copy()
+    # Calculate the number of rows to add outliers and NaN values
+    num_rows = len(modified_data)
+    num_outliers = int(num_rows * outlier_percentage / 100)
+    num_nans = int(num_rows * nan_percentage / 100)
+    # Add outliers to the 'sales' column
+    np.random.seed(2025)
+    outlier_indices = np.random.choice(num_rows, num_outliers, replace=False)
+    modified_data.loc[
+        outlier_indices, "sales"
+    ] *= 3  # Increase sales by a factor to create outliers
+    # Add NaN values to the 'sales' column
+    nan_indices = np.random.choice(num_rows, num_nans, replace=False)
+    modified_data.loc[nan_indices, "sales"] = np.nan
+    return modified_data
+def check_missing_values(df):
+    """Check missing values"""
+    df_nan = pd.DataFrame(
+        {
+            "counts": df.isna().sum(),
+            "ratio (%)": np.round(df.isna().sum() / df.shape[0], 4) * 100,
+        }
+    )
+    return df_nan
+def main():
+    """Main function to generate all data"""
+    print("Generating synthetic data for Sales Forecasting with XAI project...")
+    # Create output directory if it doesn't exist
+    os.makedirs("data", exist_ok=True)
+    # Generate store and item data
+    provinces, stores = generate_store_data()
+    categories, items = generate_item_data()
+    print(
+        f"Created {len(stores)} stores and {len(items)} items across {len(categories)} categories"
+    )
+    # Define date ranges
+    start_date_2016 = datetime(2016, 1, 1)
+    end_date_2016 = datetime(2016, 12, 31)
+    start_date_2017 = datetime(2017, 1, 1)
+    end_date_2017 = datetime(2017, 12, 31)
+    # Generate weather data for both years
+    print("Generating weather data...")
+    weather_df, weather_dict = generate_weather_data(
+        start_date_2016, end_date_2017, provinces
+    )
+    # Save weather data
+    weather_df.to_csv("data/weather_data.csv", index=False)
+    print(f"Saved weather data with {len(weather_df)} records")
+    # Generate 2016 sales data
+    print("Generating 2016 sales data...")
+    sales_2016 = generate_sales_data(
+        start_date_2016, end_date_2016, stores, items, weather_dict
+    )
+    sales_2016 = add_outliers_and_nans(
+        sales_2016, outlier_percentage=0.5, nan_percentage=1
+    )
+    # Save 2016 sales data
+    sales_2016.to_csv("data/2016_sales.csv", index=False)
+    print(f"Saved 2016 sales data with {len(sales_2016)} records")
+    # Generate 2017 sales data
+    print("Generating 2017 sales data...")
+    sales_2017 = generate_sales_data(
+        start_date_2017, end_date_2017, stores, items, weather_dict
+    )
+    sales_2017 = add_outliers_and_nans(
+        sales_2017, outlier_percentage=0.5, nan_percentage=1
+    )
+    # Save 2017 sales data
+    sales_2017.to_csv("data/2017_sales.csv", index=False)
+    print(f"Saved 2017 sales data with {len(sales_2017)} records")
+    # Print statistics
+    print("\nData Generation Complete!")
+    print(f"Total weather records: {len(weather_df)}")
+    print(f"Total 2016 sales records: {len(sales_2016)}")
+    print(f"Total 2017 sales records: {len(sales_2017)}")
+    print(
+        f"Total combined records: {len(weather_df) + len(sales_2016) + len(sales_2017)}"
+    )
+    print("\nSales Statistics:")
+    print(f"2016 Average Sales: {sales_2016['sales'].mean():.2f} units")
+    print(f"2016 Max Sales: {sales_2016['sales'].max()} units")
+    print(f"2017 Average Sales: {sales_2017['sales'].mean():.2f} units")
+    print(f"2017 Max Sales: {sales_2017['sales'].max()} units")
+    print(f"Missing values: {check_missing_values(sales_2016)}")
+    print(f"Missing values: {check_missing_values(sales_2017)}")
+    print("\nFiles saved to data/ directory:")
+    print("- data/weather_data.csv")
+    print("- data/2016_sales.csv")
+    print("- data/2017_sales.csv")
+if __name__ == "__main__":
+    main()

app/utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import json
+import pickle
+import pandas as pd
+import gradio as gr
+import pyarrow.feather as feather
+from functools import lru_cache
+# --- Data & Model Loading Logic ---
+def load_model():
+    """Load the trained sales forecast model"""
+    try:
+        with open("models/sales_forecast_model.pkl", "rb") as file:
+            model = pickle.load(file)
+        return model
+    except FileNotFoundError:
+        # Using gr.Error for UI notification if called within an interaction
+        # or standard print for startup logs
+        print("Error: 'models/sales_forecast_model.pkl' not found.")
+        return None
+def load_feature_stats():
+    """Load feature statistics used for normalization"""
+    try:
+        with open("models/feature_stats.json", "r") as file:
+            feature_stats = json.load(file)
+        return feature_stats
+    except FileNotFoundError:
+        print("Error: 'models/feature_stats.json' not found.")
+        return {}
+@lru_cache(maxsize=1)
+def load_data():
+    """Load preprocessed sales data (lru_cache replaces @st.cache_data)"""
+    try:
+        df = pd.read_csv("data/sales_data_preprocessed.csv")
+        if "date" in df.columns:
+            df["date"] = pd.to_datetime(df["date"])
+        return df
+    except FileNotFoundError:
+        print("Error: 'data/sales_data_preprocessed.csv' not found.")
+        return pd.DataFrame(columns=["date", "store", "sales"])
+def load_feature_engineered_data():
+    """Load feature engineered data with extended features"""
+    try:
+        feature_engineered_data = feather.read_feather(
+            "data/feature_engineered_data_55_features.feather"
+        )
+        return feature_engineered_data
+    except Exception as e:
+        print(f"Error loading feature engineered data: {str(e)}")
+        return pd.DataFrame()
+# --- Processing Logic ---
+def preprocess_data(df, feature_stats=None):
+    """Preprocess data for prediction (simplified version)"""
+    # Create a copy to avoid modifying the original
+    processed_df = df.copy()
+    # Extract date features if date column exists
+    if "date" in processed_df.columns:
+        processed_df["day_of_week"] = processed_df["date"].dt.dayofweek
+        processed_df["day_of_month"] = processed_df["date"].dt.day
+        processed_df["month"] = processed_df["date"].dt.month
+        processed_df["year"] = processed_df["date"].dt.year
+        processed_df["is_weekend"] = processed_df["day_of_week"].apply(
+            lambda x: 1 if x >= 5 else 0
+        )
+    # Normalize numerical features if stats are provided
+    if feature_stats:
+        for feature, stats in feature_stats.items():
+            if feature in processed_df.columns and "mean" in stats and "std" in stats:
+                processed_df[feature] = (processed_df[feature] - stats["mean"]) / stats[
+                    "std"
+                ]
+    return processed_df
+# --- Gradio UI Implementation ---
+# Load resources once when the app starts
+model = load_model()
+stats = load_feature_stats()
+def predict_sales_ui(store_id):
+    """Example function to link the logic to a Gradio interface"""
+    if model is None:
+        raise gr.Error("Model not loaded. Check server logs.")
+    data = load_data()
+    # Apply your logic
+    processed = preprocess_data(data, stats)
+    # Filter for the specific store
+    store_data = processed[processed['store'] == store_id]
+    # Return results (placeholder for actual model.predict logic)
+    return store_data.head()
+# Simple Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Sales Forecast Prediction")
+    store_input = gr.Number(label="Enter Store ID")
+    output_table = gr.DataFrame(label="Preprocessed Data Preview")
+    btn = gr.Button("Predict")
+    btn.click(fn=predict_sales_ui, inputs=store_input, outputs=output_table)
+if __name__ == "__main__":
+    demo.launch()

app/utils/plots.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+def plot_sales(df, store_id=1, item_id=1):
+    """Plot sales and visualize missing values"""
+    df_2plot = df.query("(store_id==@store_id)&(item_id==@item_id)")
+    store_name = df_2plot["store_name"].iloc[-1]
+    item_name = df_2plot["item_name"].iloc[-1]
+    fig, ax = plt.subplots(figsize=(6, 3))
+    df_2plot[["date", "sales"]].plot(x="date", y="sales", ax=ax, legend=False)
+    # Replace NaN values with the mean of surrounding two points
+    nan_indices = df_2plot[df_2plot["sales"].isna()].index
+    if len(nan_indices) >= 1:
+        df_2plot = df_2plot.assign(sales=lambda df: df["sales"].fillna(method="ffill"))
+        # Draw arrows for NaN values
+        nan_dates = df_2plot.loc[nan_indices, "date"]
+        nan_sales = df_2plot.loc[nan_indices, "sales"]
+        for date, sales in zip(nan_dates, nan_sales):
+            ax.annotate(
+                "-",
+                xy=(date, sales),
+                color="red",  # Set text color to red
+                size=20,
+            )
+    # Set plot labels and legend
+    ax.set_xlabel("Date")
+    ax.set_ylabel("Sales")
+    ax.set_title(f"Store: {store_name} - Item: {item_name}")
+    ax.legend()
+    plt.show()
+def plot_forecast_single(flat_df, store_item):
+    """
+    Plot actual vs predicted sales for one store-item combo from flattened predictions for Prophet.
+    """
+    df = flat_df[flat_df["store_item"] == store_item].copy()
+    if df.empty:
+        print(f"No data found for: {store_item}")
+        return
+    plt.figure(figsize=(12, 6))
+    sns.lineplot(data=df, x="ds", y="y", label="Actual", color="black")
+    sns.lineplot(data=df, x="ds", y="yhat", label="Forecast", color="blue")
+    plt.fill_between(
+        df["ds"],
+        df["yhat_lower"],
+        df["yhat_upper"],
+        color="blue",
+        alpha=0.2,
+        label="Confidence Interval",
+    )
+    plt.title(f"Forecast vs Actual for {store_item}")
+    plt.xlabel("Date")
+    plt.ylabel("Sales")
+    plt.xticks(rotation=45)
+    plt.legend()
+    # plt.grid(True)
+    plt.tight_layout()
+    plt.show()
+def plot_sales_predictions(
+    df_prediction, store_id=1, nrows=6, ncols=5, figsize=(20, 20)
+):
+    """
+    Plots actual vs predicted sales for items in a given store.
+    Parameters:
+        df_prediction (DataFrame): Must include ['store_id', 'item_id', 'date', 'sales', 'prediction']
+        store_id (int): Store to filter on
+        nrows (int): Rows of subplots
+        ncols (int): Columns of subplots
+        figsize (tuple): Size of the full figure
+    """
+    df_sample = df_prediction[df_prediction["store_id"] == store_id]
+    store_name = df_sample["store_name"].iloc[-1]
+    fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
+    axes = axes.flatten()
+    item_ids = sorted(df_sample["item_id"].unique())
+    for i, ax in enumerate(axes):
+        if i >= len(item_ids):
+            ax.axis("off")  # Hide unused subplots
+            continue
+        item_id = item_ids[i]
+        df2plot = df_sample[df_sample["item_id"] == item_id]
+        item_name = df2plot["item_name"].iloc[-1]
+        if df2plot.empty:
+            ax.axis("off")
+            continue
+        # Plot actual and predicted sales
+        ax.plot(df2plot["date"], df2plot["sales"], label="Actual", color="blue")
+        ax.plot(
+            df2plot["date"],
+            df2plot["prediction"],
+            label="Forecast",
+            color="red",
+            linestyle="--",
+            marker=".",
+        )
+        ax.set_title(f"Item: {item_name}")
+        ax.set_xlabel("")
+        ax.set_ylabel("Sales")
+        ax.tick_params(axis="x", rotation=45)
+        ax.grid(True)
+    # Only add legend to the first subplot
+    handles, labels = axes[0].get_legend_handles_labels()
+    fig.legend(handles, labels, loc="upper center", ncol=2, fontsize=12)
+    plt.tight_layout(rect=[0, 0, 1, 0.97])  # Leave space for the legend
+    fig.suptitle(
+        f"Sales Forecast vs Actual - Store {store_name}", fontsize=16, fontweight="bold"
+    )
+    plt.show()

app/utils/utils.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import pickle
+import lightgbm as lgbm
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+def fill_misisng_values(df):
+    """Fill NaN values in the 'sales' column with the mean of non-NaN values"""
+    df_filled = df.copy()
+    df_filled["sales"] = df_filled["sales"].fillna(df_filled["sales"].mean())
+    return df_filled
+def correct_outliers(df, factor=3):
+    """Identify and correct outliers in the 'sales' column by reducing them to the mean"""
+    df_corrected = df.copy()
+    # Identify outliers using z-score
+    z_scores = (df_corrected["sales"] - df_corrected["sales"].mean()) / df_corrected[
+        "sales"
+    ].std()
+    outlier_indices = np.abs(z_scores) > factor  # Adjust the threshold as needed
+    # Correct outliers by reducing them to the mean
+    df_corrected.loc[outlier_indices, "sales"] = df_corrected["sales"].mean()
+    return df_corrected
+def get_sample_stores(df: pd.DataFrame, store_id: int = 1) -> pd.DataFrame:
+    """Get the sample stores with store_id"""
+    grouped = df.groupby("store_id")
+    sample_store = grouped.get_group((store_id))
+    return sample_store
+def save_data(df, file_path, file_format="feather"):
+    """
+    Save a DataFrame to a specified file format.
+    Parameters:
+    - df (pd.DataFrame): The DataFrame to be saved.
+    - file_path (str): The path where the file will be saved.
+    - file_format (str): The format in which to save the file. Supported formats: 'feather', 'csv'.
+                        Default is 'feather'.
+    Example:
+    ```python
+    # Assuming df is the DataFrame you want to save
+    save_data(df, 'output_data.feather', file_format='feather')
+    ```
+    Note:
+    - Make sure to have the required libraries (pandas and feather-format) installed.
+    """
+    if file_format.lower() == "feather":
+        # Save to Feather format
+        df.to_feather(file_path)
+        print(f"DataFrame saved to {file_path} in Feather format.")
+    elif file_format.lower() == "csv":
+        # Save to CSV format
+        df.to_csv(file_path, index=False)
+        print(f"DataFrame saved to {file_path} in CSV format.")
+    else:
+        print(
+            f"Error: Unsupported file format '{file_format}'. Supported formats: 'feather', 'csv'."
+        )
+def flatten_prophet_predictions(predictions_dict):
+    all_dfs = []
+    for store_item, df in predictions_dict.items():
+        df = df.copy()
+        df["store_item"] = store_item
+        all_dfs.append(df)
+    return pd.concat(all_dfs, ignore_index=True)
+def load_model(file_path):
+    """
+    Load a machine learning model from a file.
+    Parameters:
+    - file_path: The file path from where the model will be loaded.
+    Returns:
+    - The loaded model.
+    """
+    try:
+        with open(file_path, "rb") as file:
+            model = pickle.load(file)
+            print(f"Sklearn model loaded from {file_path}")
+    except (pickle.UnpicklingError, FileNotFoundError):
+        # If loading as scikit-learn model fails or the file is not found,
+        # assume it is a LightGBM model (scikit-learn API)
+        model = lgbm.Booster(model_file=file_path)
+        print(f"LightGBM (scikit-learn API) model loaded from {file_path}")
+    return model
+# Function to calculate WAPE (Weighted Absolute Percentage Error)
+def weighted_absolute_percentage_error(y_true, y_pred):
+    """
+    Calculate Weighted Absolute Percentage Error
+    Args:
+        y_true: Actual values
+        y_pred: Predicted values
+    Returns:
+        WAPE value (percentage)
+    """
+    y_true, y_pred = np.array(y_true), np.array(y_pred)
+    return 100 * np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true))

app/utils/visualization_code.py ADDED Viewed

	@@ -0,0 +1,522 @@

+import os
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from matplotlib.dates import DateFormatter
+# Set up plotting style
+plt.style.use("seaborn-v0_8-whitegrid")
+sns.set_palette("deep")
+plt.rcParams["figure.figsize"] = (14, 8)
+plt.rcParams["font.size"] = 12
+def visualize_predictions_by_store_item(test_results, output_dir="visualizations"):
+    """
+    Create visualizations of actual vs predicted values for each store-item combination.
+    Args:
+        test_results: DataFrame containing test results with columns:
+                     'date', 'store_name', 'item_name', 'sales', 'prediction'
+        output_dir: Directory to save the visualizations
+    """
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+    # Create a time series plot for each store-item combination
+    store_items = test_results.groupby(["store_name", "item_name"])
+    # Get total number of combinations for progress tracking
+    total_combinations = len(store_items)
+    print(
+        f"Creating visualizations for {total_combinations} store-item combinations..."
+    )
+    # Counter for progress tracking
+    counter = 0
+    # For each store-item combination, create a plot
+    for (store, item), group in store_items:
+        # Sort by date to ensure proper time series order
+        group = group.sort_values("date")
+        # Convert date to datetime if it's not already
+        if not pd.api.types.is_datetime64_any_dtype(group["date"]):
+            group["date"] = pd.to_datetime(group["date"])
+        # Create the plot
+        fig, ax = plt.subplots(figsize=(14, 6))
+        # Plot actual and predicted values
+        ax.plot(
+            group["date"], group["sales"], "o-", label="Actual", alpha=0.7, linewidth=2
+        )
+        ax.plot(
+            group["date"],
+            group["prediction"],
+            "s--",
+            label="Predicted",
+            alpha=0.7,
+            linewidth=2,
+        )
+        # Calculate error metrics for this store-item
+        mae = np.mean(np.abs(group["sales"] - group["prediction"]))
+        mape = (
+            np.mean(np.abs((group["sales"] - group["prediction"]) / group["sales"]))
+            * 100
+        )
+        # Add title and labels
+        ax.set_title(f"Store: {store}, Item: {item}\nMAE: {mae:.2f}, MAPE: {mape:.2f}%")
+        ax.set_xlabel("Date")
+        ax.set_ylabel("Sales")
+        # Format x-axis dates
+        date_formatter = DateFormatter("%Y-%m-%d")
+        ax.xaxis.set_major_formatter(date_formatter)
+        # Rotate date labels for better readability
+        plt.xticks(rotation=45)
+        # Add grid for easier reading
+        ax.grid(True, linestyle="--", alpha=0.7)
+        # Add legend
+        ax.legend()
+        # Adjust layout
+        plt.tight_layout()
+        # Save the figure
+        safe_store = store.replace(" ", "_").replace("/", "_")
+        safe_item = item.replace(" ", "_").replace("/", "_")
+        filename = f"{safe_store}_{safe_item}.png"
+        plt.savefig(os.path.join(output_dir, filename))
+        # Close the figure to free memory
+        plt.close(fig)
+        # Update progress
+        counter += 1
+        if counter % 10 == 0:
+            print(f"Processed {counter}/{total_combinations} combinations")
+    print(f"All visualizations saved to {output_dir}/")
+def visualize_aggregated_predictions(test_results, output_dir="visualizations"):
+    """
+    Create aggregated visualizations of actual vs predicted values by store, item, and date.
+    Args:
+        test_results: DataFrame containing test results
+        output_dir: Directory to save the visualizations
+    """
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+    # Ensure date is in datetime format
+    if not pd.api.types.is_datetime64_any_dtype(test_results["date"]):
+        test_results["date"] = pd.to_datetime(test_results["date"])
+    # 1. Aggregate by date
+    daily_results = (
+        test_results.groupby("date")
+        .agg({"sales": "sum", "prediction": "sum"})
+        .reset_index()
+    )
+    # Plot daily aggregated results
+    fig, ax = plt.subplots(figsize=(14, 6))
+    ax.plot(
+        daily_results["date"],
+        daily_results["sales"],
+        "o-",
+        label="Actual",
+        alpha=0.7,
+        linewidth=2,
+    )
+    ax.plot(
+        daily_results["date"],
+        daily_results["prediction"],
+        "s--",
+        label="Predicted",
+        alpha=0.7,
+        linewidth=2,
+    )
+    # Add title and labels
+    ax.set_title("Total Daily Sales: Actual vs Predicted")
+    ax.set_xlabel("Date")
+    ax.set_ylabel("Total Sales")
+    # Format x-axis dates
+    date_formatter = DateFormatter("%Y-%m-%d")
+    ax.xaxis.set_major_formatter(date_formatter)
+    plt.xticks(rotation=45)
+    # Add grid and legend
+    ax.grid(True, linestyle="--", alpha=0.7)
+    ax.legend()
+    # Adjust layout and save
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "total_daily_sales.png"))
+    plt.close(fig)
+    # 2. Aggregate by store
+    store_results = (
+        test_results.groupby(["store_name", "date"])
+        .agg({"sales": "sum", "prediction": "sum"})
+        .reset_index()
+    )
+    # Plot for each store
+    stores = store_results["store_name"].unique()
+    for store in stores:
+        store_data = store_results[store_results["store_name"] == store]
+        fig, ax = plt.subplots(figsize=(14, 6))
+        ax.plot(
+            store_data["date"],
+            store_data["sales"],
+            "o-",
+            label="Actual",
+            alpha=0.7,
+            linewidth=2,
+        )
+        ax.plot(
+            store_data["date"],
+            store_data["prediction"],
+            "s--",
+            label="Predicted",
+            alpha=0.7,
+            linewidth=2,
+        )
+        # Add title and labels
+        ax.set_title(f"Store: {store} - Total Daily Sales")
+        ax.set_xlabel("Date")
+        ax.set_ylabel("Total Sales")
+        # Format x-axis dates
+        ax.xaxis.set_major_formatter(date_formatter)
+        plt.xticks(rotation=45)
+        # Add grid and legend
+        ax.grid(True, linestyle="--", alpha=0.7)
+        ax.legend()
+        # Adjust layout and save
+        plt.tight_layout()
+        safe_store = store.replace(" ", "_").replace("/", "_")
+        plt.savefig(os.path.join(output_dir, f"store_{safe_store}_total.png"))
+        plt.close(fig)
+    # 3. Aggregate by item
+    item_results = (
+        test_results.groupby(["item_name", "date"])
+        .agg({"sales": "sum", "prediction": "sum"})
+        .reset_index()
+    )
+    # Plot for each item
+    items = item_results["item_name"].unique()
+    for item in items:
+        item_data = item_results[item_results["item_name"] == item]
+        fig, ax = plt.subplots(figsize=(14, 6))
+        ax.plot(
+            item_data["date"],
+            item_data["sales"],
+            "o-",
+            label="Actual",
+            alpha=0.7,
+            linewidth=2,
+        )
+        ax.plot(
+            item_data["date"],
+            item_data["prediction"],
+            "s--",
+            label="Predicted",
+            alpha=0.7,
+            linewidth=2,
+        )
+        # Add title and labels
+        ax.set_title(f"Item: {item} - Total Daily Sales")
+        ax.set_xlabel("Date")
+        ax.set_ylabel("Total Sales")
+        # Format x-axis dates
+        ax.xaxis.set_major_formatter(date_formatter)
+        plt.xticks(rotation=45)
+        # Add grid and legend
+        ax.grid(True, linestyle="--", alpha=0.7)
+        ax.legend()
+        # Adjust layout and save
+        plt.tight_layout()
+        safe_item = item.replace(" ", "_").replace("/", "_")
+        plt.savefig(os.path.join(output_dir, f"item_{safe_item}_total.png"))
+        plt.close(fig)
+    print(f"Aggregated visualizations saved to {output_dir}/")
+def create_interactive_dashboard(test_results, output_dir="visualizations"):
+    """
+    Create an interactive HTML dashboard with plots for all store-item combinations.
+    Requires Plotly and Dash libraries.
+    Args:
+        test_results: DataFrame containing test results
+        output_dir: Directory to save the dashboard
+    """
+    try:
+        import plotly.express as px
+        import plotly.graph_objects as go
+        from plotly.subplots import make_subplots
+        print("Creating interactive dashboard...")
+        # Create output directory if it doesn't exist
+        os.makedirs(output_dir, exist_ok=True)
+        # Ensure date is in datetime format
+        if not pd.api.types.is_datetime64_any_dtype(test_results["date"]):
+            test_results["date"] = pd.to_datetime(test_results["date"])
+        # Create overall performance figure
+        daily_results = (
+            test_results.groupby("date")
+            .agg({"sales": "sum", "prediction": "sum"})
+            .reset_index()
+        )
+        fig = go.Figure()
+        fig.add_trace(
+            go.Scatter(
+                x=daily_results["date"],
+                y=daily_results["sales"],
+                mode="lines+markers",
+                name="Actual",
+                line=dict(color="blue"),
+            )
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=daily_results["date"],
+                y=daily_results["prediction"],
+                mode="lines+markers",
+                name="Predicted",
+                line=dict(color="red", dash="dash"),
+            )
+        )
+        fig.update_layout(
+            title="Overall Sales Performance: Actual vs Predicted",
+            xaxis_title="Date",
+            yaxis_title="Total Sales",
+            legend_title="Series",
+            height=600,
+        )
+        # Save the overall chart as HTML
+        fig.write_html(os.path.join(output_dir, "overall_performance.html"))
+        # Create an error heatmap
+        store_item_error = (
+            test_results.groupby(["store_name", "item_name"])
+            .apply(
+                lambda x: np.mean(np.abs((x["sales"] - x["prediction"]) / x["sales"]))
+                * 100
+            )
+            .reset_index()
+        )
+        store_item_error.columns = ["store_name", "item_name", "mape"]
+        # Pivot the data for the heatmap
+        heatmap_data = store_item_error.pivot(
+            index="store_name", columns="item_name", values="mape"
+        )
+        # Create heatmap figure
+        heatmap_fig = px.imshow(
+            heatmap_data,
+            labels=dict(x="Item", y="Store", color="MAPE (%)"),
+            x=heatmap_data.columns,
+            y=heatmap_data.index,
+            color_continuous_scale="RdBu_r",
+            title="Mean Absolute Percentage Error by Store and Item",
+        )
+        heatmap_fig.update_layout(height=800, width=1200)
+        # Save the heatmap as HTML
+        heatmap_fig.write_html(os.path.join(output_dir, "error_heatmap.html"))
+        print(f"Interactive dashboard elements saved to {output_dir}/")
+    except ImportError:
+        print("Could not create interactive dashboard. Plotly library is required.")
+        print("Install it with: pip install plotly dash")
+def visualize_error_distribution(test_results, output_dir="visualizations"):
+    """
+    Visualize the distribution and patterns of prediction errors.
+    Args:
+        test_results: DataFrame containing test results
+        output_dir: Directory to save the visualizations
+    """
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+    # Calculate errors
+    test_results["error"] = test_results["sales"] - test_results["prediction"]
+    test_results["abs_error"] = np.abs(test_results["error"])
+    test_results["pct_error"] = (test_results["error"] / test_results["sales"]) * 100
+    # 1. Error distribution histogram
+    plt.figure(figsize=(12, 6))
+    sns.histplot(test_results["error"], kde=True, bins=50)
+    plt.axvline(x=0, color="red", linestyle="--")
+    plt.title("Distribution of Prediction Errors")
+    plt.xlabel("Error (Actual - Predicted)")
+    plt.ylabel("Frequency")
+    plt.grid(True, linestyle="--", alpha=0.7)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "error_distribution.png"))
+    plt.close()
+    # 2. Error vs Actual Sales
+    plt.figure(figsize=(12, 6))
+    plt.scatter(test_results["sales"], test_results["error"], alpha=0.5)
+    plt.axhline(y=0, color="red", linestyle="--")
+    plt.title("Prediction Error vs Actual Sales")
+    plt.xlabel("Actual Sales")
+    plt.ylabel("Error (Actual - Predicted)")
+    plt.grid(True, linestyle="--", alpha=0.7)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "error_vs_sales.png"))
+    plt.close()
+    # 3. Error over time
+    plt.figure(figsize=(14, 6))
+    # Ensure date is in datetime format
+    if not pd.api.types.is_datetime64_any_dtype(test_results["date"]):
+        test_results["date"] = pd.to_datetime(test_results["date"])
+    # Group by date to see overall error trend
+    daily_error = test_results.groupby("date")["error"].mean().reset_index()
+    plt.plot(daily_error["date"], daily_error["error"], "o-")
+    plt.axhline(y=0, color="red", linestyle="--")
+    plt.title("Mean Prediction Error Over Time")
+    plt.xlabel("Date")
+    plt.ylabel("Mean Error")
+    date_formatter = DateFormatter("%Y-%m-%d")
+    plt.gca().xaxis.set_major_formatter(date_formatter)
+    plt.xticks(rotation=45)
+    plt.grid(True, linestyle="--", alpha=0.7)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "error_over_time.png"))
+    plt.close()
+    # 4. Error by day of week
+    test_results["day_of_week"] = test_results["date"].dt.dayofweek
+    test_results["day_name"] = test_results["date"].dt.day_name()
+    plt.figure(figsize=(12, 6))
+    day_error = (
+        test_results.groupby("day_name")["pct_error"]
+        .mean()
+        .reindex(
+            [
+                "Monday",
+                "Tuesday",
+                "Wednesday",
+                "Thursday",
+                "Friday",
+                "Saturday",
+                "Sunday",
+            ]
+        )
+    )
+    sns.barplot(x=day_error.index, y=day_error.values)
+    plt.title("Mean Percentage Error by Day of Week")
+    plt.xlabel("Day of Week")
+    plt.ylabel("Mean Percentage Error (%)")
+    plt.axhline(y=0, color="red", linestyle="--")
+    plt.grid(True, linestyle="--", alpha=0.7)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "error_by_day_of_week.png"))
+    plt.close()
+    # 5. Error by category - only if 'category' column exists
+    if "category" in test_results.columns:
+        plt.figure(figsize=(12, 6))
+        cat_error = test_results.groupby("category")["pct_error"].mean().sort_values()
+        sns.barplot(x=cat_error.index, y=cat_error.values)
+        plt.title("Mean Percentage Error by Category")
+        plt.xlabel("Category")
+        plt.ylabel("Mean Percentage Error (%)")
+        plt.axhline(y=0, color="red", linestyle="--")
+        plt.xticks(rotation=45)
+        plt.grid(True, linestyle="--", alpha=0.7)
+        plt.tight_layout()
+        plt.savefig(os.path.join(output_dir, "error_by_category.png"))
+        plt.close()
+    print(f"Error analysis visualizations saved to {output_dir}/")
+def create_forecast_dashboard(
+    model, X_test, y_test, test_results, data, output_dir="visualizations"
+):
+    """
+    Create a comprehensive dashboard of forecast visualizations.
+    Args:
+        model: Trained model
+        X_test: Test features
+        y_test: Test target values
+        test_results: DataFrame with test results
+        data: Original data with date, store, item info
+        output_dir: Directory to save visualizations
+    """
+    # Create all visualizations
+    print("Creating forecast visualizations...")
+    # 1. Individual store-item visualizations (limited to avoid too many plots)
+    # Get the top 20 store-item combinations by sales volume
+    store_item_sales = (
+        test_results.groupby(["store_name", "item_name"])["sales"].sum().reset_index()
+    )
+    top_combinations = store_item_sales.sort_values("sales", ascending=False).head(20)
+    # Filter test_results to include only these top combinations
+    top_results = pd.merge(
+        test_results,
+        top_combinations[["store_name", "item_name"]],
+        on=["store_name", "item_name"],
+    )
+    # Create visualizations for top combinations
+    visualize_predictions_by_store_item(top_results, output_dir)
+    # 2. Aggregated visualizations
+    visualize_aggregated_predictions(test_results, output_dir)
+    # 3. Error distribution and patterns
+    visualize_error_distribution(test_results, output_dir)
+    # 4. Try to create interactive dashboard if plotly is available
+    create_interactive_dashboard(test_results, output_dir)
+    print("Forecast visualization dashboard created successfully!")