Spaces:

BeyzaTopbas
/

Store-Sales-Forecasting

Sleeping

App Files Files Community

BeyzaTopbas commited on Feb 25

Commit

3192444

verified ·

1 Parent(s): d876b29

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +45 -29

src/streamlit_app.py CHANGED Viewed

@@ -7,25 +7,24 @@ import matplotlib.pyplot as plt
 from sklearn.metrics import mean_squared_error
 # ================= CONFIG =================
-st.set_page_config(page_title="Store Sales Forecasting", layout="centered")
 BASE_DIR = os.path.dirname(__file__)
 model = joblib.load(os.path.join(BASE_DIR, "model.pkl"))
 feature_names = joblib.load(os.path.join(BASE_DIR, "features.pkl"))
-# ================= LOAD TEST DATA (OPTIONAL) =================
 X_test_path = os.path.join(BASE_DIR, "X_test.npy")
 y_test_path = os.path.join(BASE_DIR, "y_test.npy")
 if os.path.exists(X_test_path):
     X_test = np.load(X_test_path)
     y_test = np.load(y_test_path)
-    y_pred = model.predict(X_test)
-    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
 else:
-    X_test, y_test, y_pred, rmse = None, None, None, None
 # ================= TITLE =================
 st.title("🛒 Store Sales Forecasting")
@@ -33,44 +32,61 @@ st.markdown("Predict daily store sales using Machine Learning.")
 tab1, tab2 = st.tabs(["🔮 Prediction", "📊 Model Insights"])
-# ================= TAB 1 – PREDICTION =================
 with tab1:
     st.subheader("Input Features")
-    input_data = {}
-    for feature in feature_names:
-        input_data[feature] = st.number_input(feature, value=0.0)
-    input_df = pd.DataFrame([input_data])
     if st.button("Predict Sales"):
-        prediction = model.predict(input_df)[0]
         st.markdown("## 📈 Predicted Sales")
-        st.success(f"${prediction:,.2f}")
-# ================= TAB 2 – MODEL INSIGHTS =================
 with tab2:
     st.subheader("Model Performance")
-    if X_test is None:
-        st.info("Upload X_test.npy and y_test.npy to see performance.")
     else:
-        st.metric("RMSE", round(rmse, 2))
-        # Actual vs Predicted
-        fig, ax = plt.subplots(figsize=(10, 4))
-        ax.plot(y_test[:200], label="Actual")
-        ax.plot(y_pred[:200], label="Predicted")
-        ax.legend()
-        ax.set_title("Actual vs Predicted Sales")
-        st.pyplot(fig)
-    # ================= FEATURE IMPORTANCE =================
     if hasattr(model, "feature_importances_"):
         st.subheader("Top Feature Importances")
@@ -80,6 +96,6 @@ with tab2:
             index=feature_names
         ).sort_values(ascending=False).head(15)
-        fig2, ax2 = plt.subplots()
-        importance.sort_values().plot(kind="barh", ax=ax2)
-        st.pyplot(fig2)

 from sklearn.metrics import mean_squared_error
 # ================= CONFIG =================
+st.set_page_config(page_title="Store Sales Forecasting", layout="wide")
 BASE_DIR = os.path.dirname(__file__)
 model = joblib.load(os.path.join(BASE_DIR, "model.pkl"))
 feature_names = joblib.load(os.path.join(BASE_DIR, "features.pkl"))
+# test data (optioneel voor insights)
 X_test_path = os.path.join(BASE_DIR, "X_test.npy")
 y_test_path = os.path.join(BASE_DIR, "y_test.npy")
 if os.path.exists(X_test_path):
     X_test = np.load(X_test_path)
     y_test = np.load(y_test_path)
+    y_pred_test = model.predict(X_test)
+    rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
 else:
+    X_test, y_test, rmse = None, None, None
 # ================= TITLE =================
 st.title("🛒 Store Sales Forecasting")
 tab1, tab2 = st.tabs(["🔮 Prediction", "📊 Model Insights"])
+# ================= PREDICTION TAB =================
 with tab1:
     st.subheader("Input Features")
+    families = [c.replace("family_", "") for c in feature_names if "family_" in c]
+    col1, col2 = st.columns(2)
+    with col1:
+        store_nbr = st.number_input("Store Number", 1)
+        onpromotion = st.number_input("On Promotion", 0)
+        family = st.selectbox("Product Family", families)
+    with col2:
+        date = st.date_input("Date")
+        year = date.year
+        month = date.month
+        day = date.day
+        dayofweek = date.weekday()
+    # -------- One-hot encoding in background --------
+    input_dict = dict.fromkeys(feature_names, 0)
+    input_dict["store_nbr"] = store_nbr
+    input_dict["onpromotion"] = onpromotion
+    input_dict["year"] = year
+    input_dict["month"] = month
+    input_dict["day"] = day
+    input_dict["dayofweek"] = dayofweek
+    input_dict[f"family_{family}"] = 1
+    features = pd.DataFrame([input_dict])
+    # ================= PREDICT =================
     if st.button("Predict Sales"):
+        prediction = model.predict(features)[0]
         st.markdown("## 📈 Predicted Sales")
+        st.success(f"💰 {prediction:,.2f}")
+# ================= MODEL INSIGHTS =================
 with tab2:
     st.subheader("Model Performance")
+    if rmse is not None:
+        st.metric("RMSE", f"{rmse:,.2f}")
     else:
+        st.info("Upload X_test.npy & y_test.npy to display RMSE.")
+    # -------- Feature Importance --------
     if hasattr(model, "feature_importances_"):
         st.subheader("Top Feature Importances")
             index=feature_names
         ).sort_values(ascending=False).head(15)
+        fig, ax = plt.subplots()
+        importance.sort_values().plot(kind="barh", ax=ax)
+        st.pyplot(fig)