Spaces:

BeyzaTopbas
/

Store-Sales-Forecasting

Sleeping

App Files Files Community

BeyzaTopbas commited on Feb 25

Commit

6582ce8

verified ·

1 Parent(s): c695e1a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +90 -23

src/streamlit_app.py CHANGED Viewed

@@ -6,7 +6,9 @@ import os
 import matplotlib.pyplot as plt
 from sklearn.metrics import mean_squared_error
-# ================= CONFIG =================
 st.set_page_config(page_title="Store Sales Forecasting", layout="wide")
 BASE_DIR = os.path.dirname(__file__)
@@ -14,17 +16,23 @@ BASE_DIR = os.path.dirname(__file__)
 model = joblib.load(os.path.join(BASE_DIR, "model.pkl"))
 feature_names = joblib.load(os.path.join(BASE_DIR, "features.pkl"))
-# test data (optioneel voor insights)
 X_test_path = os.path.join(BASE_DIR, "X_test.npy")
 y_test_path = os.path.join(BASE_DIR, "y_test.npy")
 if os.path.exists(X_test_path):
     X_test = np.load(X_test_path)
     y_test = np.load(y_test_path)
     y_pred_test = model.predict(X_test)
     rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
 else:
-    X_test, y_test, rmse = None, None, None
 # ================= TITLE =================
 st.title("🛒 Store Sales Forecasting")
@@ -39,22 +47,28 @@ with tab1:
     families = [c.replace("family_", "") for c in feature_names if "family_" in c]
-    col1, col2 = st.columns(2)
-    with col1:
-        store_nbr = st.number_input("Store Number", 1)
-        onpromotion = st.number_input("On Promotion", 0)
-        family = st.selectbox("Product Family", families)
-    with col2:
-        date = st.date_input("Date")
-        year = date.year
-        month = date.month
-        day = date.day
-        dayofweek = date.weekday()
-    # -------- One-hot encoding in background --------
     input_dict = dict.fromkeys(feature_names, 0)
     input_dict["store_nbr"] = store_nbr
@@ -63,7 +77,6 @@ with tab1:
     input_dict["month"] = month
     input_dict["day"] = day
     input_dict["dayofweek"] = dayofweek
     input_dict[f"family_{family}"] = 1
     features = pd.DataFrame([input_dict])
@@ -71,22 +84,48 @@ with tab1:
     # ================= PREDICT =================
     if st.button("Predict Sales"):
-        prediction = model.predict(features)[0]
         st.markdown("## 📈 Predicted Sales")
-        st.success(f"💰 {prediction:,.2f}")
 # ================= MODEL INSIGHTS =================
 with tab2:
     st.subheader("Model Performance")
-    if rmse is not None:
         st.metric("RMSE", f"{rmse:,.2f}")
     else:
         st.info("Upload X_test.npy & y_test.npy to display RMSE.")
-    # -------- Feature Importance --------
     if hasattr(model, "feature_importances_"):
         st.subheader("Top Feature Importances")
@@ -94,8 +133,36 @@ with tab2:
         importance = pd.Series(
             model.feature_importances_,
             index=feature_names
-        ).sort_values(ascending=False).head(15)
         fig, ax = plt.subplots()
-        importance.sort_values().plot(kind="barh", ax=ax)
-        st.pyplot(fig)

 import matplotlib.pyplot as plt
 from sklearn.metrics import mean_squared_error
+# ================= SETTINGS =================
+USE_LOG_TARGET = True
 st.set_page_config(page_title="Store Sales Forecasting", layout="wide")
 BASE_DIR = os.path.dirname(__file__)
 model = joblib.load(os.path.join(BASE_DIR, "model.pkl"))
 feature_names = joblib.load(os.path.join(BASE_DIR, "features.pkl"))
+# ================= LOAD TEST DATA =================
 X_test_path = os.path.join(BASE_DIR, "X_test.npy")
 y_test_path = os.path.join(BASE_DIR, "y_test.npy")
 if os.path.exists(X_test_path):
     X_test = np.load(X_test_path)
     y_test = np.load(y_test_path)
     y_pred_test = model.predict(X_test)
+    if USE_LOG_TARGET:
+        y_pred_test = np.expm1(y_pred_test)
+        y_test = np.expm1(y_test)
     rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
 else:
+    rmse = None
 # ================= TITLE =================
 st.title("🛒 Store Sales Forecasting")
     families = [c.replace("family_", "") for c in feature_names if "family_" in c]
+    if st.button("🎲 Load Example"):
+        store_nbr = 1
+        onpromotion = 5
+        date = pd.to_datetime("2017-08-15")
+        family = families[0]
+    else:
+        date = st.date_input("Date")
+        col1, col2 = st.columns(2)
+        with col1:
+            store_nbr = st.number_input("Store Number", 1)
+            onpromotion = st.number_input("On Promotion", 0)
+        with col2:
+            family = st.selectbox("Product Family", families)
+    year = date.year
+    month = date.month
+    day = date.day
+    dayofweek = date.weekday()
     input_dict = dict.fromkeys(feature_names, 0)
     input_dict["store_nbr"] = store_nbr
     input_dict["month"] = month
     input_dict["day"] = day
     input_dict["dayofweek"] = dayofweek
     input_dict[f"family_{family}"] = 1
     features = pd.DataFrame([input_dict])
     # ================= PREDICT =================
     if st.button("Predict Sales"):
+        with st.spinner("Making prediction..."):
+            pred = model.predict(features)[0]
+            if USE_LOG_TARGET:
+                pred = np.expm1(pred)
         st.markdown("## 📈 Predicted Sales")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.metric("💰 Sales", f"{pred:,.2f}")
+        with col2:
+            st.metric("🏪 Store", store_nbr)
+        # download
+        result_df = pd.DataFrame({
+            "store_nbr": [store_nbr],
+            "family": [family],
+            "prediction": [pred]
+        })
+        st.download_button(
+            "⬇ Download prediction",
+            result_df.to_csv(index=False),
+            "prediction.csv",
+            "text/csv"
+        )
 # ================= MODEL INSIGHTS =================
 with tab2:
     st.subheader("Model Performance")
+    if rmse:
         st.metric("RMSE", f"{rmse:,.2f}")
     else:
         st.info("Upload X_test.npy & y_test.npy to display RMSE.")
+    # ================= FEATURE IMPORTANCE =================
     if hasattr(model, "feature_importances_"):
         st.subheader("Top Feature Importances")
         importance = pd.Series(
             model.feature_importances_,
             index=feature_names
+        )
+        top = importance.sort_values(ascending=False).head(15)
         fig, ax = plt.subplots()
+        top.sort_values().plot(kind="barh", ax=ax)
+        st.pyplot(fig)
+        # grouped importance
+        st.subheader("Grouped Importance")
+        family_imp = importance[importance.index.str.contains("family_")].sum()
+        other_imp = importance[~importance.index.str.contains("family_")]
+        grouped = pd.concat([
+            pd.Series({"family_total": family_imp}),
+            other_imp
+        ]).sort_values(ascending=False).head(10)
+        fig2, ax2 = plt.subplots()
+        grouped.sort_values().plot(kind="barh", ax=ax2)
+        st.pyplot(fig2)
+    # ================= MODEL INFO =================
+    st.subheader("Model Info")
+    st.info(f"""
+Model type: **{type(model).__name__}**
+Features used: **{len(feature_names)}**
+Log target: **{USE_LOG_TARGET}**
+""")