Spaces:

cheeka84
/

super-kart-pred

Sleeping

App Files Files Community

cheeka84 commited on Oct 4, 2025

Commit

88ffb06

verified ·

1 Parent(s): 7ac1ab2

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +15 -12
app.py +104 -0
requirements.txt +7 -3

Dockerfile CHANGED Viewed

@@ -1,20 +1,23 @@
-FROM python:3.13.5-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
 RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use a minimal base image with Python 3.9 installed
+FROM python:3.9
+# Set the working directory inside the container to /app
 WORKDIR /app
+# Copy all files from the current directory on the host to the container's /app directory
+COPY . .
+# Install Python dependencies listed in requirements.txt
 RUN pip3 install -r requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+# Define the command to run the Streamlit app on port "8501" and make it accessible externally
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+# --- make Streamlit writable in containers (avoids '/.streamlit' PermissionError)
+os.environ.setdefault("HOME", "/tmp")
+os.makedirs(os.path.expanduser("~/.streamlit"), exist_ok=True)
+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+from datetime import datetime
+from huggingface_hub import hf_hub_download
+st.set_page_config(page_title="SuperKart Sales Prediction", page_icon="🛒")
+st.title("🛒 SuperKart — Predict Product-Store Sales (Regression)")
+st.caption("Enter product & store attributes to predict `Product_Store_Sales_Total`")
+# ----------------------------
+# Model download/load
+# ----------------------------
+# Set your model repo (where train.py uploaded the chosen regressor)
+MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "cheeka84/super-kart-pred")
+# We don't know which won (XGBoost/RandomForest), so try both filenames:
+CANDIDATE_FILES = [
+    "superkart_xgboost_regressor.joblib",
+    "superkart_random_forest_regressor.joblib",
+]
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
+def load_model():
+    last_err = None
+    for fname in CANDIDATE_FILES:
+        try:
+            path = hf_hub_download(
+                repo_id=MODEL_REPO_ID,
+                filename=fname,
+                repo_type="model",
+                token=HF_TOKEN  # omit if repo is public
+            )
+            return joblib.load(path), fname
+        except Exception as e:
+            last_err = e
+    raise RuntimeError(f"Could not download any model from {MODEL_REPO_ID}. "
+                       f"Tried: {CANDIDATE_FILES}. Last error: {last_err}")
+model, model_file = load_model()
+st.success(f"Loaded model: `{model_file}` from {MODEL_REPO_ID}")
+# ----------------------------
+# Input UI (match training features)
+# ----------------------------
+col1, col2 = st.columns(2)
+with col1:
+    product_weight = st.number_input("Product_Weight", min_value=0.0, value=500.0, step=1.0)
+    product_area   = st.number_input("Product_Allocated_Area", min_value=0.0, value=50.0, step=1.0)
+    product_mrp    = st.number_input("Product_MRP", min_value=0.0, value=199.0, step=1.0)
+    est_year       = st.number_input("Store_Establishment_Year", min_value=1950, max_value=datetime.now().year, value=2015, step=1)
+with col2:
+    sugar_content  = st.text_input("Product_Sugar_Content", value="Low", help="e.g., Low/Medium/High/No Sugar")
+    product_type   = st.text_input("Product_Type", value="Beverages")
+    store_id       = st.text_input("Store_Id", value="S1")
+    store_size     = st.text_input("Store_Size", value="Medium")
+    city_type      = st.text_input("Store_Location_City_Type", value="Tier 2")
+    store_type     = st.text_input("Store_Type", value="Grocery")
+# Engineered features (prep.py added these; compute here as well)
+current_year = datetime.now().year
+store_age     = max(0, min(200, current_year - int(est_year)))  # clip [0,200]
+price_per_area = float(product_mrp) / float(product_area) if product_area not in (0, None) else 0.0
+# Build single-row DataFrame with ALL expected columns (extras are fine)
+row = {
+    "Product_Weight": product_weight,
+    "Product_Allocated_Area": product_area,
+    "Product_MRP": product_mrp,
+    "Store_Establishment_Year": est_year,
+    "Store_Age": store_age,
+    "Price_per_Area": price_per_area,
+    "Product_Sugar_Content": sugar_content.strip(),
+    "Product_Type": product_type.strip(),
+    "Store_Id": store_id.strip(),
+    "Store_Size": store_size.strip(),
+    "Store_Location_City_Type": city_type.strip(),
+    "Store_Type": store_type.strip(),
+}
+input_df = pd.DataFrame([row])
+st.subheader("Input preview")
+st.dataframe(input_df)
+# ----------------------------
+# Predict
+# ----------------------------
+if st.button("Predict sales"):
+    try:
+        # Pipeline expects DataFrame with training column names; we provided them.
+        pred = model.predict(input_df)[0]
+        st.markdown(f"### 🔮 Predicted `Product_Store_Sales_Total`: **{pred:,.2f}**")
+    except Exception as e:
+        st.error(f"Prediction failed: {e}")
+        st.exception(e)
+st.info("Note: Unknown category values are safely ignored by the one-hot encoder (handled as all-zero columns).")

requirements.txt CHANGED Viewed

@@ -1,3 +1,7 @@
-altair
-pandas
-streamlit

+pandas==2.2.2
+huggingface_hub==0.32.6
+streamlit==1.43.2
+joblib==1.5.1
+scikit-learn==1.6.0
+xgboost==2.1.4
+mlflow==3.0.1