Spaces:

jkng77433
/

Backend

Sleeping

App Files Files Community

jkng77433 commited on Nov 4, 2025

Commit

5cb2287

verified ·

1 Parent(s): 8e32bf0

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +25 -0
app.py +213 -0
requirements.txt +8 -0
superkart_rf_best_pipeline.joblib +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.10-slim
+# Optional system deps if pandas/sklearn need them
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+ && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps first (better layer caching)
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+# Copy app and model
+COPY app.py /app/app.py
+COPY superkart_rf_best_pipeline.joblib /app/superkart_rf_best_pipeline.joblib
+# Hugging Face Spaces default port
+ENV PORT=7860
+EXPOSE 7860
+# Start Flask app (assuming app.py runs Flask on PORT)
+# If you expose a Flask app named "app" via gunicorn, use the line below instead:
+# CMD ["gunicorn", "-w", "2", "-k", "uvicorn.workers.UvicornWorker", "app:app", "--bind", "0.0.0.0:7860"]
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import os
+import joblib
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from flask import Flask, request, jsonify
+import shutil  # if using ensure_model_present
+# Resolve base directory robustly (works in Colab/Notebook and scripts)
+try:
+    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+except NameError:
+    # __file__ is not defined in notebooks; fall back to CWD
+    BASE_DIR = os.getcwd()
+DEFAULT_MODEL_PATH = os.path.join(BASE_DIR, "superkart_rf_best_pipeline.joblib")
+MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)
+APP_NAME = "SuperKart_Sales_Forecast_API"
+# DEFAULT_MODEL_PATH = os.path.join(os.path.dirname(__file__), "superkart_rf_best_pipeline.joblib")
+MODEL_PATH = os.environ.get("MODEL_PATH", DEFAULT_MODEL_PATH)
+CURRENT_YEAR = int(os.environ.get("CURRENT_YEAR", datetime.now().year))
+# Optional helper
+def ensure_model_present():
+    if MODEL_PATH == DEFAULT_MODEL_PATH and not os.path.exists(MODEL_PATH):
+        candidates = [
+            os.path.join("/content/backend_files", "superkart_rf_best_pipeline.joblib"),
+            os.path.join("/content", "superkart_rf_best_pipeline.joblib"),
+        ]
+        for src in candidates:
+            if os.path.exists(src):
+                os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
+                shutil.copy(src, MODEL_PATH)
+                print(f"[INFO] Copied model from {src} to {MODEL_PATH}")
+                return
+        raise FileNotFoundError(
+            f"Model file not found. Checked: {candidates}. "
+            "Upload the model or set env var MODEL_PATH to the correct file."
+        )
+RAW_FIELDS = [
+    "Product_Id",
+    "Product_Weight",
+    "Product_Sugar_Content",
+    "Product_Allocated_Area",
+    "Product_Type",
+    "Product_MRP",
+    "Store_Id",
+    "Store_Establishment_Year",
+    "Store_Age",
+    "Store_Size",
+    "Store_Location_City_Type",
+    "Store_Type",
+]
+def map_product_category(pid):
+    pid = str(pid)
+    prefix = pid[:2].upper()
+    if prefix == "FD": return "Food"
+    if prefix == "NC": return "Non-Consumable"
+    if prefix == "DR": return "Drinks"
+    return "Other"
+def clean_sugar(x):
+    s = str(x).strip().lower()
+    if "low" in s: return "Low Sugar"
+    if "no" in s:  return "No Sugar"
+    if "reg" in s or "regular" in s: return "Regular"
+    return s.title() if s else s
+def bin_allocated_area(x):
+    v = pd.to_numeric(x, errors="coerce")
+    if pd.isna(v):
+        return np.nan
+    # Use the same thresholds you trained with; these are placeholders
+    if v < 0.02:
+        return "Very Small"
+    elif v < 0.05:
+        return "Small"
+    elif v < 0.10:
+        return "Medium"
+    else:
+        return "Large"
+def bin_mrp(x):
+    v = pd.to_numeric(x, errors="coerce")
+    if pd.isna(v): return np.nan
+    if v < 100: return "Low"
+    elif v < 150: return "Medium"
+    elif v < 200: return "High"
+    else: return "Premium"
+def engineer_features(df_raw: pd.DataFrame) -> pd.DataFrame:
+    df = df_raw.copy()
+    if "Product_Id" in df.columns:
+        df["Product_Category"] = df["Product_Id"].map(map_product_category)
+    else:
+        df["Product_Category"] = np.nan
+    if "Product_Sugar_Content" in df.columns:
+        df["Product_Sugar_Content"] = df["Product_Sugar_Content"].apply(clean_sugar)
+    if "Store_Age" not in df.columns or df["Store_Age"].isna().all():
+        if "Store_Establishment_Year" in df.columns:
+            df["Store_Age"] = (CURRENT_YEAR - pd.to_numeric(df["Store_Establishment_Year"], errors="coerce")).clip(lower=0)
+        else:
+            df["Store_Age"] = np.nan
+    if "Product_MRP" in df.columns:
+        df["MRP_Bins"] = df["Product_MRP"].apply(bin_mrp)
+    else:
+        df["MRP_Bins"] = np.nan
+    if "Product_MRP" in df.columns and "Product_Weight" in df.columns:
+        mrp = pd.to_numeric(df["Product_MRP"], errors="coerce")
+        wgt = pd.to_numeric(df["Product_Weight"], errors="coerce").replace(0, np.nan)
+        df["Unit_Value"] = mrp / wgt
+    else:
+        df["Unit_Value"] = np.nan
+    if "Store_Type" in df.columns and "Product_Type" in df.columns:
+        df["Store_Product_Interaction"] = df["Store_Type"].astype(str) + "__" + df["Product_Type"].astype(str)
+    else:
+        df["Store_Product_Interaction"] = np.nan
+    if "MRP_Bins" in df.columns and "Store_Type" in df.columns:
+        df["MRPBin_StoreType"] = df["MRP_Bins"].astype(str) + "__" + df["Store_Type"].astype(str)
+    return df
+app = Flask(APP_NAME)
+# Ensure model present (optional)
+try:
+    ensure_model_present()
+except NameError:
+    pass  # helper not defined if you removed it
+except Exception as e:
+    print(f"[WARN] {e}")
+# Load model
+try:
+    model = joblib.load(MODEL_PATH)
+    print(f"[INFO] Loaded model from {MODEL_PATH}")
+except Exception as e:
+    print(f"[ERROR] Failed to load model: {e}")
+    model = None
+@app.get("/")
+def root():
+    return jsonify({
+        "service": APP_NAME,
+        "status": "ok",
+        "message": "POST to /v1/forecast/single (JSON) or /v1/forecast/batch (CSV as 'file')",
+        "raw_fields": RAW_FIELDS
+    })
+@app.post("/v1/forecast/single")
+def predict_single():
+    if model is None:
+        return jsonify({"error": "Model not loaded"}), 500
+    payload = request.get_json(silent=True) or {}
+    row = {col: payload.get(col, None) for col in RAW_FIELDS}
+    df_raw = pd.DataFrame([row])
+    try:
+        df_feat = engineer_features(df_raw)
+        for c in ["Product_Id", "Store_Id"]:
+            if c in df_feat.columns:
+                df_feat = df_feat.drop(columns=[c])
+        yhat = float(model.predict(df_feat)[0])
+        return jsonify({
+            "Predicted_Product_Store_Sales_Total": round(yhat, 2),
+            "input_used": df_feat.iloc[0].to_dict()
+        })
+    except Exception as e:
+        return jsonify({"error": f"Inference failed: {e}"}), 400
+@app.post("/v1/forecast/batch")
+def predict_batch():
+    if model is None:
+        return jsonify({"error": "Model not loaded"}), 500
+    file = request.files.get("file")
+    if file is None:
+        return jsonify({"error": "Please POST a CSV file under form field 'file'"}), 400
+    try:
+        df_raw = pd.read_csv(file)
+        for col in RAW_FIELDS:
+            if col not in df_raw.columns:
+                df_raw[col] = None
+        df_feat = engineer_features(df_raw)
+        for c in ["Product_Id", "Store_Id"]:
+            if c in df_feat.columns:
+                df_feat = df_feat.drop(columns=[c])
+        preds = model.predict(df_feat)
+        out = df_raw.copy()
+        out["Predicted_Product_Store_Sales_Total"] = preds
+        return jsonify(out.to_dict(orient="records"))
+    except Exception as e:
+        return jsonify({"error": f"Inference failed: {e}"}), 400
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask==3.0.3
+pandas==2.2.2
+numpy==2.0.2
+scikit-learn==1.6.1
+joblib==1.4.2
+gunicorn==20.1.0
+requests==2.32.3
+huggingface_hub==0.30.1

superkart_rf_best_pipeline.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c8e6cdf3574946ec58674dbb3bf7846e563737be6bd548cf26f7221006367e6
+size 240654163