Spaces:

sudhirpgcmma02
/

Revenue_Prediction

Runtime error

App Files Files Community

sudhirpgcmma02 commited on Aug 24, 2025

Commit

4f28860

verified ·

1 Parent(s): d254e39

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

Dockerfile +18 -0
README.md +33 -10
api.py +141 -0
loader.py +34 -0
requirements.txt +11 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+# System deps (optional)
+RUN apt-get update && apt-get install -y --no-install-recommends     build-essential  && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+# Copy app
+COPY . /app
+# Expose the port expected by HF ($PORT will be provided)
+ENV PORT=7860
+CMD ["gunicorn", "-b", "0.0.0.0:${PORT}", "api:app", "--workers", "4", "--threads", "8", "--timeout", "180"]

README.md CHANGED Viewed

@@ -1,10 +1,33 @@
----
-title: Revenue Prediction
-emoji: 🌍
-colorFrom: yellow
-colorTo: blue
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# SuperKart Backend (Flask API)
+Endpoints:
+- `GET /health` -> health check
+- `POST /predict` -> JSON with `store_id` and `features` or `features_list`
+- `POST /predict_batch` -> multipart CSV with `store_id` column
+## Run locally
+```bash
+pip install -r requirements.txt
+export PORT=7860
+python api.py
+# or gunicorn
+gunicorn -b 0.0.0.0:$PORT api:app --workers 2 --threads 8 --timeout 180
+```
+## Deploy to Hugging Face Spaces (Docker)
+1. Create a new **Space** → **Docker** → name: `superkart-backend`.
+2. Upload files in this folder (including `Dockerfile`).
+3. Add your trained `models/` directory:
+   ```
+   models/
+     store_101/
+       RandomForest.joblib
+       metadata.json
+     store_102/
+       XGBoost.joblib
+       metadata.json
+   ```
+4. The Space will build and expose the API at:
+   `https://<your-username>-superkart-backend.hf.space`

api.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+import io
+import json
+from typing import Any, Dict, List
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import pandas as pd
+from loader import load_store_model
+app = Flask(__name__)
+CORS(app)
+@app.route("/health", methods=["GET"])
+def health():
+    return jsonify({"status": "ok", "message": "SuperKart backend running"}), 200
+def _predict_single(store_id: Any, features: Dict[str, Any]):
+    model, meta = load_store_model(str(store_id))
+    df = pd.DataFrame([features])
+    yhat = model.predict(df)
+    return float(yhat[0]), meta
+@app.route("/predict", methods=["POST"])
+def predict():
+    """POST JSON:
+    {
+      "store_id": "101",
+      "features": { ... single row ... }
+    }
+    OR
+    {
+      "store_id": "101",
+      "features_list": [ {...}, {...} ]   # multiple rows for same store
+    }
+    """
+    try:
+        payload = request.get_json(force=True, silent=False)
+    except Exception as e:
+        return jsonify({"error": f"Invalid JSON: {e}"}), 400
+    if not payload:
+        return jsonify({"error": "Empty payload"}), 400
+    store_id = str(payload.get("store_id", "")).strip()
+    if not store_id:
+        return jsonify({"error": "Missing 'store_id'"}), 400
+    try:
+        model, meta = load_store_model(store_id)
+    except FileNotFoundError as e:
+        return jsonify({"error": str(e)}), 404
+    if "features" in payload:
+        df = pd.DataFrame([payload["features"]])
+        yhat = model.predict(df)
+        return jsonify({
+            "store_id": store_id,
+            "n_rows": 1,
+            "predictions": [float(yhat[0])],
+            "model": meta.get("model"),
+            "metrics": meta.get("metrics", {}),
+            "features_used": meta.get("features", [])
+        }), 200
+    elif "features_list" in payload:
+        rows = payload["features_list"]
+        if not isinstance(rows, list) or len(rows) == 0:
+            return jsonify({"error": "'features_list' must be a non-empty list"}), 400
+        df = pd.DataFrame(rows)
+        yhat = model.predict(df)
+        return jsonify({
+            "store_id": store_id,
+            "n_rows": len(df),
+            "predictions": [float(v) for v in yhat],
+            "model": meta.get("model"),
+            "metrics": meta.get("metrics", {}),
+            "features_used": meta.get("features", [])
+        }), 200
+    else:
+        return jsonify({"error": "Provide either 'features' or 'features_list'"}), 400
+@app.route("/predict_batch", methods=["POST"])
+def predict_batch():
+    """Multipart form with a CSV file:
+    - expects a 'file' field
+    - CSV must include a 'store_id' column and the necessary features.
+    Will route each row to that store's model and return merged results.
+    """
+    if "file" not in request.files:
+        return jsonify({"error": "No file uploaded with field name 'file'"}), 400
+    f = request.files["file"]
+    try:
+        df = pd.read_csv(f)
+    except Exception as e:
+        return jsonify({"error": f"Failed to read CSV: {e}"}), 400
+    if "store_id" not in df.columns:
+        return jsonify({"error": "CSV must include 'store_id' column"}), 400
+    preds = []
+    errors = []
+    # Simple cache for models during batch call
+    cache = {}
+    for idx, row in df.iterrows():
+        sid = str(row["store_id"])
+        feats = row.drop(labels=["store_id"]).to_dict()
+        try:
+            if sid not in cache:
+                cache[sid] = load_store_model(sid)
+            model, meta = cache[sid]
+            yhat = model.predict(pd.DataFrame([feats]))[0]
+            preds.append(float(yhat))
+        except FileNotFoundError as e:
+            preds.append(None)
+            errors.append({"row": int(idx), "store_id": sid, "error": str(e)})
+        except Exception as e:
+            preds.append(None)
+            errors.append({"row": int(idx), "store_id": sid, "error": f"{type(e).__name__}: {e}"})
+    df_out = df.copy()
+    df_out["predicted_sales"] = preds
+    # Return as JSON (truncated) and CSV file bytes
+    buf = io.StringIO()
+    df_out.to_csv(buf, index=False)
+    buf.seek(0)
+    return jsonify({
+        "rows": len(df_out),
+        "errors": errors,
+        "csv": buf.getvalue()
+    }), 200
+if __name__ == "__main__":
+    # Local dev
+    port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port, debug=True)

loader.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import json
+import joblib
+_MODEL_CACHE = {}
+def load_store_model(store_id: str, base_dir: str = "models"):
+    """Load and cache best model + metadata for a given store_id.
+    Looks under {base_dir}/store_{store_id}/
+    Returns (model, metadata) or raises FileNotFoundError.
+    """
+    key = (base_dir, str(store_id))
+    if key in _MODEL_CACHE:
+        return _MODEL_CACHE[key]
+    store_dir = os.path.join(base_dir, f"store_{store_id}")
+    meta_path = os.path.join(store_dir, "metadata.json")
+    if not os.path.exists(meta_path):
+        raise FileNotFoundError(f"metadata.json not found for store {store_id} in {store_dir}")
+    with open(meta_path, "r") as f:
+        metadata = json.load(f)
+    model_name = metadata.get("model")
+    if not model_name:
+        raise FileNotFoundError(f"'model' not defined in metadata.json for store {store_id}")
+    model_path = os.path.join(store_dir, f"{model_name}.joblib")
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file missing: {model_path}")
+    model = joblib.load(model_path)
+    _MODEL_CACHE[key] = (model, metadata)
+    return model, metadata

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pandas==2.2.2
+numpy==2.0.2
+scikit-learn==1.6.1
+xgboost==2.1.4
+joblib==1.4.2
+Werkzeug==2.2.2
+flask==2.2.2
+gunicorn==20.1.0
+requests==2.28.1
+uvicorn[standard]
+streamlit==1.43.2