jkng77433 commited on
Commit
5cb2287
·
verified ·
1 Parent(s): 8e32bf0

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +25 -0
  2. app.py +213 -0
  3. requirements.txt +8 -0
  4. superkart_rf_best_pipeline.joblib +3 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Optional system deps if pandas/sklearn need them
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ WORKDIR /app
9
+
10
+ # Install Python deps first (better layer caching)
11
+ COPY requirements.txt /app/requirements.txt
12
+ RUN pip install --no-cache-dir -r /app/requirements.txt
13
+
14
+ # Copy app and model
15
+ COPY app.py /app/app.py
16
+ COPY superkart_rf_best_pipeline.joblib /app/superkart_rf_best_pipeline.joblib
17
+
18
+ # Hugging Face Spaces default port
19
+ ENV PORT=7860
20
+ EXPOSE 7860
21
+
22
+ # Start Flask app (assuming app.py runs Flask on PORT)
23
+ # If you expose a Flask app named "app" via gunicorn, use the line below instead:
24
+ # CMD ["gunicorn", "-w", "2", "-k", "uvicorn.workers.UvicornWorker", "app:app", "--bind", "0.0.0.0:7860"]
25
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import joblib
4
+ import pandas as pd
5
+ import numpy as np
6
+ from datetime import datetime
7
+ from flask import Flask, request, jsonify
8
+ import shutil # if using ensure_model_present
9
+
10
+ # Resolve base directory robustly (works in Colab/Notebook and scripts)
11
+ try:
12
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13
+ except NameError:
14
+ # __file__ is not defined in notebooks; fall back to CWD
15
+ BASE_DIR = os.getcwd()
16
+
17
+ DEFAULT_MODEL_PATH = os.path.join(BASE_DIR, "superkart_rf_best_pipeline.joblib")
18
+ MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)
19
+
20
+ APP_NAME = "SuperKart_Sales_Forecast_API"
21
+ # DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), "superkart_rf_best_pipeline.joblib")
22
+ MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)
23
+ CURRENT_YEAR = int(os.environ.get("CURRENT_YEAR", datetime.now().year))
24
+
25
+ # Optional helper
26
+ def ensure_model_present():
27
+ if MODEL_PATH == DEFAULT_MODEL_PATH and not os.path.exists(MODEL_PATH):
28
+ candidates = [
29
+ os.path.join("/content/backend_files", "superkart_rf_best_pipeline.joblib"),
30
+ os.path.join("/content", "superkart_rf_best_pipeline.joblib"),
31
+ ]
32
+ for src in candidates:
33
+ if os.path.exists(src):
34
+ os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
35
+ shutil.copy(src, MODEL_PATH)
36
+ print(f"[INFO] Copied model from {src} to {MODEL_PATH}")
37
+ return
38
+ raise FileNotFoundError(
39
+ f"Model file not found. Checked: {candidates}. "
40
+ "Upload the model or set env var MODEL_PATH to the correct file."
41
+ )
42
+
43
+ RAW_FIELDS = [
44
+ "Product_Id",
45
+ "Product_Weight",
46
+ "Product_Sugar_Content",
47
+ "Product_Allocated_Area",
48
+ "Product_Type",
49
+ "Product_MRP",
50
+ "Store_Id",
51
+ "Store_Establishment_Year",
52
+ "Store_Age",
53
+ "Store_Size",
54
+ "Store_Location_City_Type",
55
+ "Store_Type",
56
+ ]
57
+
58
+ def map_product_category(pid):
59
+ pid = str(pid)
60
+ prefix = pid[:2].upper()
61
+ if prefix == "FD": return "Food"
62
+ if prefix == "NC": return "Non-Consumable"
63
+ if prefix == "DR": return "Drinks"
64
+ return "Other"
65
+
66
+ def clean_sugar(x):
67
+ s = str(x).strip().lower()
68
+ if "low" in s: return "Low Sugar"
69
+ if "no" in s: return "No Sugar"
70
+ if "reg" in s or "regular" in s: return "Regular"
71
+ return s.title() if s else s
72
+
73
+ def bin_allocated_area(x):
74
+ v = pd.to_numeric(x, errors="coerce")
75
+ if pd.isna(v):
76
+ return np.nan
77
+ # Use the same thresholds you trained with; these are placeholders
78
+ if v < 0.02:
79
+ return "Very Small"
80
+ elif v < 0.05:
81
+ return "Small"
82
+ elif v < 0.10:
83
+ return "Medium"
84
+ else:
85
+ return "Large"
86
+
87
+ def bin_mrp(x):
88
+ v = pd.to_numeric(x, errors="coerce")
89
+ if pd.isna(v): return np.nan
90
+ if v < 100: return "Low"
91
+ elif v < 150: return "Medium"
92
+ elif v < 200: return "High"
93
+ else: return "Premium"
94
+
95
+ def engineer_features(df_raw: pd.DataFrame) -> pd.DataFrame:
96
+ df = df_raw.copy()
97
+
98
+ if "Product_Id" in df.columns:
99
+ df["Product_Category"] = df["Product_Id"].map(map_product_category)
100
+ else:
101
+ df["Product_Category"] = np.nan
102
+
103
+ if "Product_Sugar_Content" in df.columns:
104
+ df["Product_Sugar_Content"] = df["Product_Sugar_Content"].apply(clean_sugar)
105
+
106
+ if "Store_Age" not in df.columns or df["Store_Age"].isna().all():
107
+ if "Store_Establishment_Year" in df.columns:
108
+ df["Store_Age"] = (CURRENT_YEAR - pd.to_numeric(df["Store_Establishment_Year"], errors="coerce")).clip(lower=0)
109
+ else:
110
+ df["Store_Age"] = np.nan
111
+
112
+ if "Product_MRP" in df.columns:
113
+ df["MRP_Bins"] = df["Product_MRP"].apply(bin_mrp)
114
+ else:
115
+ df["MRP_Bins"] = np.nan
116
+
117
+ if "Product_MRP" in df.columns and "Product_Weight" in df.columns:
118
+ mrp = pd.to_numeric(df["Product_MRP"], errors="coerce")
119
+ wgt = pd.to_numeric(df["Product_Weight"], errors="coerce").replace(0, np.nan)
120
+ df["Unit_Value"] = mrp / wgt
121
+ else:
122
+ df["Unit_Value"] = np.nan
123
+
124
+ if "Store_Type" in df.columns and "Product_Type" in df.columns:
125
+ df["Store_Product_Interaction"] = df["Store_Type"].astype(str) + "__" + df["Product_Type"].astype(str)
126
+ else:
127
+ df["Store_Product_Interaction"] = np.nan
128
+
129
+ if "MRP_Bins" in df.columns and "Store_Type" in df.columns:
130
+ df["MRPBin_StoreType"] = df["MRP_Bins"].astype(str) + "__" + df["Store_Type"].astype(str)
131
+
132
+ return df
133
+
134
+ app = Flask(APP_NAME)
135
+
136
+ # Ensure model present (optional)
137
+ try:
138
+ ensure_model_present()
139
+ except NameError:
140
+ pass # helper not defined if you removed it
141
+ except Exception as e:
142
+ print(f"[WARN] {e}")
143
+
144
+ # Load model
145
+ try:
146
+ model = joblib.load(MODEL_PATH)
147
+ print(f"[INFO] Loaded model from {MODEL_PATH}")
148
+ except Exception as e:
149
+ print(f"[ERROR] Failed to load model: {e}")
150
+ model = None
151
+
152
+ @app.get("/")
153
+ def root():
154
+ return jsonify({
155
+ "service": APP_NAME,
156
+ "status": "ok",
157
+ "message": "POST to /v1/forecast/single (JSON) or /v1/forecast/batch (CSV as 'file')",
158
+ "raw_fields": RAW_FIELDS
159
+ })
160
+
161
+ @app.post("/v1/forecast/single")
162
+ def predict_single():
163
+ if model is None:
164
+ return jsonify({"error": "Model not loaded"}), 500
165
+
166
+ payload = request.get_json(silent=True) or {}
167
+ row = {col: payload.get(col, None) for col in RAW_FIELDS}
168
+ df_raw = pd.DataFrame([row])
169
+
170
+ try:
171
+ df_feat = engineer_features(df_raw)
172
+ for c in ["Product_Id", "Store_Id"]:
173
+ if c in df_feat.columns:
174
+ df_feat = df_feat.drop(columns=[c])
175
+
176
+ yhat = float(model.predict(df_feat)[0])
177
+ return jsonify({
178
+ "Predicted_Product_Store_Sales_Total": round(yhat, 2),
179
+ "input_used": df_feat.iloc[0].to_dict()
180
+ })
181
+ except Exception as e:
182
+ return jsonify({"error": f"Inference failed: {e}"}), 400
183
+
184
+ @app.post("/v1/forecast/batch")
185
+ def predict_batch():
186
+ if model is None:
187
+ return jsonify({"error": "Model not loaded"}), 500
188
+
189
+ file = request.files.get("file")
190
+ if file is None:
191
+ return jsonify({"error": "Please POST a CSV file under form field 'file'"}), 400
192
+
193
+ try:
194
+ df_raw = pd.read_csv(file)
195
+ for col in RAW_FIELDS:
196
+ if col not in df_raw.columns:
197
+ df_raw[col] = None
198
+
199
+ df_feat = engineer_features(df_raw)
200
+ for c in ["Product_Id", "Store_Id"]:
201
+ if c in df_feat.columns:
202
+ df_feat = df_feat.drop(columns=[c])
203
+
204
+ preds = model.predict(df_feat)
205
+ out = df_raw.copy()
206
+ out["Predicted_Product_Store_Sales_Total"] = preds
207
+ return jsonify(out.to_dict(orient="records"))
208
+ except Exception as e:
209
+ return jsonify({"error": f"Inference failed: {e}"}), 400
210
+
211
+ if __name__ == "__main__":
212
+ port = int(os.environ.get("PORT", 7860))
213
+ app.run(host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ flask==3.0.3
2
+ pandas==2.2.2
3
+ numpy==2.0.2
4
+ scikit-learn==1.6.1
5
+ joblib==1.4.2
6
+ gunicorn==20.1.0
7
+ requests==2.32.3
8
+ huggingface_hub==0.30.1
superkart_rf_best_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8e6cdf3574946ec58674dbb3bf7846e563737be6bd548cf26f7221006367e6
3
+ size 240654163