cheeka84 commited on
Commit
88ffb06
·
verified ·
1 Parent(s): 7ac1ab2

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +15 -12
  2. app.py +104 -0
  3. requirements.txt +7 -3
Dockerfile CHANGED
@@ -1,20 +1,23 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
 
14
  RUN pip3 install -r requirements.txt
15
 
16
- EXPOSE 8501
 
 
 
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9
3
 
4
+ # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
+ # Copy all files from the current directory on the host to the container's /app directory
8
+ COPY . .
 
 
 
 
 
 
9
 
10
+ # Install Python dependencies listed in requirements.txt
11
  RUN pip3 install -r requirements.txt
12
 
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
 
20
+ COPY --chown=user . $HOME/app
21
 
22
+ # Define the command to run the Streamlit app on port "8501" and make it accessible externally
23
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # --- make Streamlit writable in containers (avoids '/.streamlit' PermissionError)
3
+ os.environ.setdefault("HOME", "/tmp")
4
+ os.makedirs(os.path.expanduser("~/.streamlit"), exist_ok=True)
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import numpy as np
9
+ import joblib
10
+ from datetime import datetime
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ st.set_page_config(page_title="SuperKart Sales Prediction", page_icon="🛒")
14
+
15
+ st.title("🛒 SuperKart — Predict Product-Store Sales (Regression)")
16
+ st.caption("Enter product & store attributes to predict `Product_Store_Sales_Total`")
17
+
18
+ # ----------------------------
19
+ # Model download/load
20
+ # ----------------------------
21
+ # Set your model repo (where train.py uploaded the chosen regressor)
22
+ MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "cheeka84/super-kart-pred")
23
+ # We don't know which won (XGBoost/RandomForest), so try both filenames:
24
+ CANDIDATE_FILES = [
25
+ "superkart_xgboost_regressor.joblib",
26
+ "superkart_random_forest_regressor.joblib",
27
+ ]
28
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
29
+
30
+ def load_model():
31
+ last_err = None
32
+ for fname in CANDIDATE_FILES:
33
+ try:
34
+ path = hf_hub_download(
35
+ repo_id=MODEL_REPO_ID,
36
+ filename=fname,
37
+ repo_type="model",
38
+ token=HF_TOKEN # omit if repo is public
39
+ )
40
+ return joblib.load(path), fname
41
+ except Exception as e:
42
+ last_err = e
43
+ raise RuntimeError(f"Could not download any model from {MODEL_REPO_ID}. "
44
+ f"Tried: {CANDIDATE_FILES}. Last error: {last_err}")
45
+
46
+ model, model_file = load_model()
47
+ st.success(f"Loaded model: `{model_file}` from {MODEL_REPO_ID}")
48
+
49
+ # ----------------------------
50
+ # Input UI (match training features)
51
+ # ----------------------------
52
+ col1, col2 = st.columns(2)
53
+ with col1:
54
+ product_weight = st.number_input("Product_Weight", min_value=0.0, value=500.0, step=1.0)
55
+ product_area = st.number_input("Product_Allocated_Area", min_value=0.0, value=50.0, step=1.0)
56
+ product_mrp = st.number_input("Product_MRP", min_value=0.0, value=199.0, step=1.0)
57
+ est_year = st.number_input("Store_Establishment_Year", min_value=1950, max_value=datetime.now().year, value=2015, step=1)
58
+
59
+ with col2:
60
+ sugar_content = st.text_input("Product_Sugar_Content", value="Low", help="e.g., Low/Medium/High/No Sugar")
61
+ product_type = st.text_input("Product_Type", value="Beverages")
62
+ store_id = st.text_input("Store_Id", value="S1")
63
+ store_size = st.text_input("Store_Size", value="Medium")
64
+ city_type = st.text_input("Store_Location_City_Type", value="Tier 2")
65
+ store_type = st.text_input("Store_Type", value="Grocery")
66
+
67
+ # Engineered features (prep.py added these; compute here as well)
68
+ current_year = datetime.now().year
69
+ store_age = max(0, min(200, current_year - int(est_year))) # clip [0,200]
70
+ price_per_area = float(product_mrp) / float(product_area) if product_area not in (0, None) else 0.0
71
+
72
+ # Build single-row DataFrame with ALL expected columns (extras are fine)
73
+ row = {
74
+ "Product_Weight": product_weight,
75
+ "Product_Allocated_Area": product_area,
76
+ "Product_MRP": product_mrp,
77
+ "Store_Establishment_Year": est_year,
78
+ "Store_Age": store_age,
79
+ "Price_per_Area": price_per_area,
80
+ "Product_Sugar_Content": sugar_content.strip(),
81
+ "Product_Type": product_type.strip(),
82
+ "Store_Id": store_id.strip(),
83
+ "Store_Size": store_size.strip(),
84
+ "Store_Location_City_Type": city_type.strip(),
85
+ "Store_Type": store_type.strip(),
86
+ }
87
+ input_df = pd.DataFrame([row])
88
+
89
+ st.subheader("Input preview")
90
+ st.dataframe(input_df)
91
+
92
+ # ----------------------------
93
+ # Predict
94
+ # ----------------------------
95
+ if st.button("Predict sales"):
96
+ try:
97
+ # Pipeline expects DataFrame with training column names; we provided them.
98
+ pred = model.predict(input_df)[0]
99
+ st.markdown(f"### 🔮 Predicted `Product_Store_Sales_Total`: **{pred:,.2f}**")
100
+ except Exception as e:
101
+ st.error(f"Prediction failed: {e}")
102
+ st.exception(e)
103
+
104
+ st.info("Note: Unknown category values are safely ignored by the one-hot encoder (handled as all-zero columns).")
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ pandas==2.2.2
2
+ huggingface_hub==0.32.6
3
+ streamlit==1.43.2
4
+ joblib==1.5.1
5
+ scikit-learn==1.6.0
6
+ xgboost==2.1.4
7
+ mlflow==3.0.1