Spaces:

AngadSi
/

superkart-sales-forecast-api

Sleeping

App Files Files Community

3v324v23 commited on Oct 5, 2025

Commit

8e6e78d

1 Parent(s): 9446900

Harden predict: align columns, coerce dtypes, better errors

Browse files

Files changed (1) hide show

app.py +18 -48

app.py CHANGED Viewed

@@ -1,87 +1,57 @@
-import os, json, joblib, pandas as pd, numpy as np
 from datetime import datetime
 from flask import Flask, request, jsonify
 app = Flask(__name__)
 MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
 model = joblib.load(MODEL_PATH)
-# must match training features
 EXPECTED_COLS = [
-    "Product_Id",
-    "Product_Weight",
-    "Product_Sugar_Content",
-    "Product_Allocated_Area",
-    "Product_Type",
-    "Product_MRP",
-    "Store_Id",
-    "Store_Establishment_Year",
-    "Store_Age",  # <-- engineered feature expected by the model
-    "Store_Size",
-    "Store_Location_City_Type",
-    "Store_Type",
 ]
 NUMERIC_COLS = {
-    "Product_Weight",
-    "Product_Allocated_Area",
-    "Product_MRP",
-    "Store_Establishment_Year",
-    "Store_Age",
 }
 CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
 @app.get("/health")
 def health():
-    return {"status": "ok", "expected_features": EXPECTED_COLS, "current_year": CURRENT_YEAR}, 200
 def _compute_store_age(df: pd.DataFrame) -> pd.Series:
-    # coerce to numeric first; invalid -> NaN
     years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
-    age = CURRENT_YEAR - years
-    # clip to [0, 200] to avoid negatives
-    age = age.clip(lower=0, upper=200)
     return age
 def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
-    # add missing columns with NaN
     for c in EXPECTED_COLS:
         if c not in df.columns:
             df[c] = np.nan
-    # compute Store_Age if missing or nulls
-    if df["Store_Age"].isna().any() or "Store_Age" not in df.columns:
         df["Store_Age"] = _compute_store_age(df)
-    # coerce numeric cols
     for c in NUMERIC_COLS:
         df[c] = pd.to_numeric(df[c], errors="coerce")
-    # ensure categorical/object for the rest
     for c in set(EXPECTED_COLS) - NUMERIC_COLS:
         df[c] = df[c].astype("string")
-    # reorder exactly as training
-    df = df[EXPECTED_COLS]
-    return df
 @app.post("/predict")
 def predict():
     try:
         payload = request.get_json(force=True)
-        if isinstance(payload, dict):
-            df = pd.DataFrame([payload])
-        elif isinstance(payload, list):
-            df = pd.DataFrame(payload)
-        else:
-            return jsonify({"error": "payload must be a dict or list of dicts"}), 400
         df = _coerce_and_align(df)
         preds = model.predict(df)
-        return jsonify({"predictions": [float(x) for x in preds]}), 200
     except Exception as e:
         return jsonify({"error": str(e)}), 500

+import os, joblib, pandas as pd, numpy as np
 from datetime import datetime
 from flask import Flask, request, jsonify
 app = Flask(__name__)
 MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
 model = joblib.load(MODEL_PATH)
+# Must match training features (include Store_Age)
 EXPECTED_COLS = [
+    "Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area",
+    "Product_Type","Product_MRP","Store_Id","Store_Establishment_Year",
+    "Store_Age","Store_Size","Store_Location_City_Type","Store_Type"
 ]
 NUMERIC_COLS = {
+    "Product_Weight","Product_Allocated_Area","Product_MRP",
+    "Store_Establishment_Year","Store_Age"
 }
 CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
 @app.get("/health")
 def health():
+    return {"status":"ok","expected_features":EXPECTED_COLS,"current_year":CURRENT_YEAR}, 200
 def _compute_store_age(df: pd.DataFrame) -> pd.Series:
     years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
+    age = (CURRENT_YEAR - years).clip(lower=0, upper=200)
     return age
 def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
+    # add missing training columns
     for c in EXPECTED_COLS:
         if c not in df.columns:
             df[c] = np.nan
+    # compute Store_Age if missing/NaN
+    if df["Store_Age"].isna().any():
         df["Store_Age"] = _compute_store_age(df)
+    # numeric coercion
     for c in NUMERIC_COLS:
         df[c] = pd.to_numeric(df[c], errors="coerce")
+    # categorical as string
     for c in set(EXPECTED_COLS) - NUMERIC_COLS:
         df[c] = df[c].astype("string")
+    # reorder to training order
+    return df[EXPECTED_COLS]
 @app.post("/predict")
 def predict():
     try:
         payload = request.get_json(force=True)
+        df = pd.DataFrame(payload if isinstance(payload, list) else [payload])
         df = _coerce_and_align(df)
         preds = model.predict(df)
+        return jsonify({"predictions":[float(x) for x in preds]}), 200
     except Exception as e:
         return jsonify({"error": str(e)}), 500