Spaces:

BeyzaTopbas
/

Store-Sales-Forecasting

Sleeping

File size: 4,484 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import joblib
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# ================= SETTINGS =================
USE_LOG_TARGET = True

st.set_page_config(page_title="Store Sales Forecasting", layout="wide")

BASE_DIR = os.path.dirname(__file__)

model = joblib.load(os.path.join(BASE_DIR, "model.pkl"))
feature_names = joblib.load(os.path.join(BASE_DIR, "features.pkl"))

# ================= LOAD TEST DATA =================
X_test_path = os.path.join(BASE_DIR, "X_test.npy")
y_test_path = os.path.join(BASE_DIR, "y_test.npy")

if os.path.exists(X_test_path):
    X_test = np.load(X_test_path)
    y_test = np.load(y_test_path)

    y_pred_test = model.predict(X_test)

    if USE_LOG_TARGET:
        y_pred_test = np.expm1(y_pred_test)
        y_test = np.expm1(y_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
else:
    rmse = None

# ================= TITLE =================
st.title("🛒 Store Sales Forecasting")
st.markdown("Predict daily store sales using Machine Learning.")

tab1, tab2 = st.tabs(["🔮 Prediction", "📊 Model Insights"])

# ================= PREDICTION TAB =================
with tab1:

    st.subheader("Input Features")

    families = [c.replace("family_", "") for c in feature_names if "family_" in c]

    if st.button("🎲 Load Example"):
        store_nbr = 1
        onpromotion = 5
        date = pd.to_datetime("2017-08-15")
        family = families[0]
    else:
        date = st.date_input("Date")

        col1, col2 = st.columns(2)

        with col1:
            store_nbr = st.number_input("Store Number", 1)
            onpromotion = st.number_input("On Promotion", 0)

        with col2:
            family = st.selectbox("Product Family", families)

    year = date.year
    month = date.month
    day = date.day
    dayofweek = date.weekday()

    input_dict = dict.fromkeys(feature_names, 0)

    input_dict["store_nbr"] = store_nbr
    input_dict["onpromotion"] = onpromotion
    input_dict["year"] = year
    input_dict["month"] = month
    input_dict["day"] = day
    input_dict["dayofweek"] = dayofweek
    input_dict[f"family_{family}"] = 1

    features = pd.DataFrame([input_dict])

    # ================= PREDICT =================
    if st.button("Predict Sales"):

        with st.spinner("Making prediction..."):

            pred = model.predict(features)[0]

            if USE_LOG_TARGET:
                pred = np.expm1(pred)

        st.markdown("## 📈 Predicted Sales")

        col1, col2 = st.columns(2)

        with col1:
            st.metric("💰 Sales", f"{pred:,.2f}")

        with col2:
            st.metric("🏪 Store", store_nbr)

        # download
        result_df = pd.DataFrame({
            "store_nbr": [store_nbr],
            "family": [family],
            "prediction": [pred]
        })

        st.download_button(
            "⬇ Download prediction",
            result_df.to_csv(index=False),
            "prediction.csv",
            "text/csv"
        )

# ================= MODEL INSIGHTS =================
with tab2:

    st.subheader("Model Performance")

    if rmse:
        st.metric("RMSE", f"{rmse:,.2f}")
    else:
        st.info("Upload X_test.npy & y_test.npy to display RMSE.")

    # ================= FEATURE IMPORTANCE =================
    if hasattr(model, "feature_importances_"):

        st.subheader("Top Feature Importances")

        importance = pd.Series(
            model.feature_importances_,
            index=feature_names
        )

        top = importance.sort_values(ascending=False).head(15)

        fig, ax = plt.subplots()
        top.sort_values().plot(kind="barh", ax=ax)
        st.pyplot(fig)

        # grouped importance
        st.subheader("Grouped Importance")

        family_imp = importance[importance.index.str.contains("family_")].sum()
        other_imp = importance[~importance.index.str.contains("family_")]

        grouped = pd.concat([
            pd.Series({"family_total": family_imp}),
            other_imp
        ]).sort_values(ascending=False).head(10)

        fig2, ax2 = plt.subplots()
        grouped.sort_values().plot(kind="barh", ax=ax2)
        st.pyplot(fig2)

    # ================= MODEL INFO =================
    st.subheader("Model Info")

    st.info(f"""
Model type: **{type(model).__name__}**

Features used: **{len(feature_names)}**

Log target: **{USE_LOG_TARGET}**
""")