sudhirpgcmma02 commited on
Commit
4f28860
·
verified ·
1 Parent(s): d254e39

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +18 -0
  2. README.md +33 -10
  3. api.py +141 -0
  4. loader.py +34 -0
  5. requirements.txt +11 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1
4
+ ENV PYTHONUNBUFFERED=1
5
+ WORKDIR /app
6
+
7
+ # System deps (optional)
8
+ RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY requirements.txt /app/requirements.txt
11
+ RUN pip install --no-cache-dir -r /app/requirements.txt
12
+
13
+ # Copy app
14
+ COPY . /app
15
+
16
+ # Expose the port expected by HF ($PORT will be provided)
17
+ ENV PORT=7860
18
+ CMD ["gunicorn", "-b", "0.0.0.0:${PORT}", "api:app", "--workers", "4", "--threads", "8", "--timeout", "180"]
README.md CHANGED
@@ -1,10 +1,33 @@
1
- ---
2
- title: Revenue Prediction
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SuperKart Backend (Flask API)
2
+
3
+ Endpoints:
4
+ - `GET /health` -> health check
5
+ - `POST /predict` -> JSON with `store_id` and `features` or `features_list`
6
+ - `POST /predict_batch` -> multipart CSV with `store_id` column
7
+
8
+ ## Run locally
9
+
10
+ ```bash
11
+ pip install -r requirements.txt
12
+ export PORT=7860
13
+ python api.py
14
+ # or gunicorn
15
+ gunicorn -b 0.0.0.0:$PORT api:app --workers 2 --threads 8 --timeout 180
16
+ ```
17
+
18
+ ## Deploy to Hugging Face Spaces (Docker)
19
+
20
+ 1. Create a new **Space** → **Docker** → name: `superkart-backend`.
21
+ 2. Upload files in this folder (including `Dockerfile`).
22
+ 3. Add your trained `models/` directory:
23
+ ```
24
+ models/
25
+ store_101/
26
+ RandomForest.joblib
27
+ metadata.json
28
+ store_102/
29
+ XGBoost.joblib
30
+ metadata.json
31
+ ```
32
+ 4. The Space will build and expose the API at:
33
+ `https://<your-username>-superkart-backend.hf.space`
api.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import json
4
+ from typing import Any, Dict, List
5
+
6
+ from flask import Flask, request, jsonify
7
+ from flask_cors import CORS
8
+ import pandas as pd
9
+
10
+ from loader import load_store_model
11
+
12
+ app = Flask(__name__)
13
+ CORS(app)
14
+
15
+ @app.route("/health", methods=["GET"])
16
+ def health():
17
+ return jsonify({"status": "ok", "message": "SuperKart backend running"}), 200
18
+
19
+ def _predict_single(store_id: Any, features: Dict[str, Any]):
20
+ model, meta = load_store_model(str(store_id))
21
+ df = pd.DataFrame([features])
22
+ yhat = model.predict(df)
23
+ return float(yhat[0]), meta
24
+
25
+ @app.route("/predict", methods=["POST"])
26
+ def predict():
27
+ """POST JSON:
28
+ {
29
+ "store_id": "101",
30
+ "features": { ... single row ... }
31
+ }
32
+ OR
33
+ {
34
+ "store_id": "101",
35
+ "features_list": [ {...}, {...} ] # multiple rows for same store
36
+ }
37
+ """
38
+ try:
39
+ payload = request.get_json(force=True, silent=False)
40
+ except Exception as e:
41
+ return jsonify({"error": f"Invalid JSON: {e}"}), 400
42
+
43
+ if not payload:
44
+ return jsonify({"error": "Empty payload"}), 400
45
+
46
+ store_id = str(payload.get("store_id", "")).strip()
47
+ if not store_id:
48
+ return jsonify({"error": "Missing 'store_id'"}), 400
49
+
50
+ try:
51
+ model, meta = load_store_model(store_id)
52
+ except FileNotFoundError as e:
53
+ return jsonify({"error": str(e)}), 404
54
+
55
+ if "features" in payload:
56
+ df = pd.DataFrame([payload["features"]])
57
+ yhat = model.predict(df)
58
+ return jsonify({
59
+ "store_id": store_id,
60
+ "n_rows": 1,
61
+ "predictions": [float(yhat[0])],
62
+ "model": meta.get("model"),
63
+ "metrics": meta.get("metrics", {}),
64
+ "features_used": meta.get("features", [])
65
+ }), 200
66
+
67
+ elif "features_list" in payload:
68
+ rows = payload["features_list"]
69
+ if not isinstance(rows, list) or len(rows) == 0:
70
+ return jsonify({"error": "'features_list' must be a non-empty list"}), 400
71
+ df = pd.DataFrame(rows)
72
+ yhat = model.predict(df)
73
+ return jsonify({
74
+ "store_id": store_id,
75
+ "n_rows": len(df),
76
+ "predictions": [float(v) for v in yhat],
77
+ "model": meta.get("model"),
78
+ "metrics": meta.get("metrics", {}),
79
+ "features_used": meta.get("features", [])
80
+ }), 200
81
+
82
+ else:
83
+ return jsonify({"error": "Provide either 'features' or 'features_list'"}), 400
84
+
85
+ @app.route("/predict_batch", methods=["POST"])
86
+ def predict_batch():
87
+ """Multipart form with a CSV file:
88
+ - expects a 'file' field
89
+ - CSV must include a 'store_id' column and the necessary features.
90
+ Will route each row to that store's model and return merged results.
91
+ """
92
+ if "file" not in request.files:
93
+ return jsonify({"error": "No file uploaded with field name 'file'"}), 400
94
+ f = request.files["file"]
95
+ try:
96
+ df = pd.read_csv(f)
97
+ except Exception as e:
98
+ return jsonify({"error": f"Failed to read CSV: {e}"}), 400
99
+
100
+ if "store_id" not in df.columns:
101
+ return jsonify({"error": "CSV must include 'store_id' column"}), 400
102
+
103
+ preds = []
104
+ errors = []
105
+ # Simple cache for models during batch call
106
+ cache = {}
107
+
108
+ for idx, row in df.iterrows():
109
+ sid = str(row["store_id"])
110
+ feats = row.drop(labels=["store_id"]).to_dict()
111
+ try:
112
+ if sid not in cache:
113
+ cache[sid] = load_store_model(sid)
114
+ model, meta = cache[sid]
115
+ yhat = model.predict(pd.DataFrame([feats]))[0]
116
+ preds.append(float(yhat))
117
+ except FileNotFoundError as e:
118
+ preds.append(None)
119
+ errors.append({"row": int(idx), "store_id": sid, "error": str(e)})
120
+ except Exception as e:
121
+ preds.append(None)
122
+ errors.append({"row": int(idx), "store_id": sid, "error": f"{type(e).__name__}: {e}"})
123
+
124
+ df_out = df.copy()
125
+ df_out["predicted_sales"] = preds
126
+
127
+ # Return as JSON (truncated) and CSV file bytes
128
+ buf = io.StringIO()
129
+ df_out.to_csv(buf, index=False)
130
+ buf.seek(0)
131
+
132
+ return jsonify({
133
+ "rows": len(df_out),
134
+ "errors": errors,
135
+ "csv": buf.getvalue()
136
+ }), 200
137
+
138
+ if __name__ == "__main__":
139
+ # Local dev
140
+ port = int(os.environ.get("PORT", 7860))
141
+ app.run(host="0.0.0.0", port=port, debug=True)
loader.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import joblib
4
+
5
+ _MODEL_CACHE = {}
6
+
7
+ def load_store_model(store_id: str, base_dir: str = "models"):
8
+ """Load and cache best model + metadata for a given store_id.
9
+ Looks under {base_dir}/store_{store_id}/
10
+ Returns (model, metadata) or raises FileNotFoundError.
11
+ """
12
+ key = (base_dir, str(store_id))
13
+ if key in _MODEL_CACHE:
14
+ return _MODEL_CACHE[key]
15
+
16
+ store_dir = os.path.join(base_dir, f"store_{store_id}")
17
+ meta_path = os.path.join(store_dir, "metadata.json")
18
+ if not os.path.exists(meta_path):
19
+ raise FileNotFoundError(f"metadata.json not found for store {store_id} in {store_dir}")
20
+
21
+ with open(meta_path, "r") as f:
22
+ metadata = json.load(f)
23
+
24
+ model_name = metadata.get("model")
25
+ if not model_name:
26
+ raise FileNotFoundError(f"'model' not defined in metadata.json for store {store_id}")
27
+
28
+ model_path = os.path.join(store_dir, f"{model_name}.joblib")
29
+ if not os.path.exists(model_path):
30
+ raise FileNotFoundError(f"Model file missing: {model_path}")
31
+
32
+ model = joblib.load(model_path)
33
+ _MODEL_CACHE[key] = (model, metadata)
34
+ return model, metadata
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ numpy==2.0.2
3
+ scikit-learn==1.6.1
4
+ xgboost==2.1.4
5
+ joblib==1.4.2
6
+ Werkzeug==2.2.2
7
+ flask==2.2.2
8
+ gunicorn==20.1.0
9
+ requests==2.28.1
10
+ uvicorn[standard]
11
+ streamlit==1.43.2