Harden predict: align columns, coerce dtypes, better errors
Browse files
app.py
CHANGED
|
@@ -1,87 +1,57 @@
|
|
| 1 |
-
import os,
|
| 2 |
from datetime import datetime
|
| 3 |
from flask import Flask, request, jsonify
|
| 4 |
|
| 5 |
app = Flask(__name__)
|
| 6 |
-
|
| 7 |
MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
|
| 8 |
model = joblib.load(MODEL_PATH)
|
| 9 |
|
| 10 |
-
#
|
| 11 |
EXPECTED_COLS = [
|
| 12 |
-
"Product_Id",
|
| 13 |
-
"
|
| 14 |
-
"
|
| 15 |
-
"Product_Allocated_Area",
|
| 16 |
-
"Product_Type",
|
| 17 |
-
"Product_MRP",
|
| 18 |
-
"Store_Id",
|
| 19 |
-
"Store_Establishment_Year",
|
| 20 |
-
"Store_Age", # <-- engineered feature expected by the model
|
| 21 |
-
"Store_Size",
|
| 22 |
-
"Store_Location_City_Type",
|
| 23 |
-
"Store_Type",
|
| 24 |
]
|
| 25 |
-
|
| 26 |
NUMERIC_COLS = {
|
| 27 |
-
"Product_Weight",
|
| 28 |
-
"
|
| 29 |
-
"Product_MRP",
|
| 30 |
-
"Store_Establishment_Year",
|
| 31 |
-
"Store_Age",
|
| 32 |
}
|
| 33 |
-
|
| 34 |
CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
|
| 35 |
|
| 36 |
@app.get("/health")
|
| 37 |
def health():
|
| 38 |
-
return {"status":
|
| 39 |
|
| 40 |
def _compute_store_age(df: pd.DataFrame) -> pd.Series:
|
| 41 |
-
# coerce to numeric first; invalid -> NaN
|
| 42 |
years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
|
| 43 |
-
age = CURRENT_YEAR - years
|
| 44 |
-
# clip to [0, 200] to avoid negatives
|
| 45 |
-
age = age.clip(lower=0, upper=200)
|
| 46 |
return age
|
| 47 |
|
| 48 |
def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
|
| 49 |
-
# add missing
|
| 50 |
for c in EXPECTED_COLS:
|
| 51 |
if c not in df.columns:
|
| 52 |
df[c] = np.nan
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
if df["Store_Age"].isna().any() or "Store_Age" not in df.columns:
|
| 56 |
df["Store_Age"] = _compute_store_age(df)
|
| 57 |
-
|
| 58 |
-
# coerce numeric cols
|
| 59 |
for c in NUMERIC_COLS:
|
| 60 |
df[c] = pd.to_numeric(df[c], errors="coerce")
|
| 61 |
-
|
| 62 |
-
# ensure categorical/object for the rest
|
| 63 |
for c in set(EXPECTED_COLS) - NUMERIC_COLS:
|
| 64 |
df[c] = df[c].astype("string")
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
df = df[EXPECTED_COLS]
|
| 68 |
-
return df
|
| 69 |
|
| 70 |
@app.post("/predict")
|
| 71 |
def predict():
|
| 72 |
try:
|
| 73 |
payload = request.get_json(force=True)
|
| 74 |
-
if isinstance(payload,
|
| 75 |
-
df = pd.DataFrame([payload])
|
| 76 |
-
elif isinstance(payload, list):
|
| 77 |
-
df = pd.DataFrame(payload)
|
| 78 |
-
else:
|
| 79 |
-
return jsonify({"error": "payload must be a dict or list of dicts"}), 400
|
| 80 |
-
|
| 81 |
df = _coerce_and_align(df)
|
| 82 |
preds = model.predict(df)
|
| 83 |
-
return jsonify({"predictions":
|
| 84 |
-
|
| 85 |
except Exception as e:
|
| 86 |
return jsonify({"error": str(e)}), 500
|
| 87 |
|
|
|
|
| 1 |
+
import os, joblib, pandas as pd, numpy as np
|
| 2 |
from datetime import datetime
|
| 3 |
from flask import Flask, request, jsonify
|
| 4 |
|
| 5 |
app = Flask(__name__)
|
|
|
|
| 6 |
MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
|
| 7 |
model = joblib.load(MODEL_PATH)
|
| 8 |
|
| 9 |
+
# Must match training features (include Store_Age)
|
| 10 |
EXPECTED_COLS = [
|
| 11 |
+
"Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area",
|
| 12 |
+
"Product_Type","Product_MRP","Store_Id","Store_Establishment_Year",
|
| 13 |
+
"Store_Age","Store_Size","Store_Location_City_Type","Store_Type"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
]
|
|
|
|
| 15 |
NUMERIC_COLS = {
|
| 16 |
+
"Product_Weight","Product_Allocated_Area","Product_MRP",
|
| 17 |
+
"Store_Establishment_Year","Store_Age"
|
|
|
|
|
|
|
|
|
|
| 18 |
}
|
|
|
|
| 19 |
CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
|
| 20 |
|
| 21 |
@app.get("/health")
|
| 22 |
def health():
|
| 23 |
+
return {"status":"ok","expected_features":EXPECTED_COLS,"current_year":CURRENT_YEAR}, 200
|
| 24 |
|
| 25 |
def _compute_store_age(df: pd.DataFrame) -> pd.Series:
|
|
|
|
| 26 |
years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
|
| 27 |
+
age = (CURRENT_YEAR - years).clip(lower=0, upper=200)
|
|
|
|
|
|
|
| 28 |
return age
|
| 29 |
|
| 30 |
def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
|
| 31 |
+
# add missing training columns
|
| 32 |
for c in EXPECTED_COLS:
|
| 33 |
if c not in df.columns:
|
| 34 |
df[c] = np.nan
|
| 35 |
+
# compute Store_Age if missing/NaN
|
| 36 |
+
if df["Store_Age"].isna().any():
|
|
|
|
| 37 |
df["Store_Age"] = _compute_store_age(df)
|
| 38 |
+
# numeric coercion
|
|
|
|
| 39 |
for c in NUMERIC_COLS:
|
| 40 |
df[c] = pd.to_numeric(df[c], errors="coerce")
|
| 41 |
+
# categorical as string
|
|
|
|
| 42 |
for c in set(EXPECTED_COLS) - NUMERIC_COLS:
|
| 43 |
df[c] = df[c].astype("string")
|
| 44 |
+
# reorder to training order
|
| 45 |
+
return df[EXPECTED_COLS]
|
|
|
|
|
|
|
| 46 |
|
| 47 |
@app.post("/predict")
|
| 48 |
def predict():
|
| 49 |
try:
|
| 50 |
payload = request.get_json(force=True)
|
| 51 |
+
df = pd.DataFrame(payload if isinstance(payload, list) else [payload])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
df = _coerce_and_align(df)
|
| 53 |
preds = model.predict(df)
|
| 54 |
+
return jsonify({"predictions":[float(x) for x in preds]}), 200
|
|
|
|
| 55 |
except Exception as e:
|
| 56 |
return jsonify({"error": str(e)}), 500
|
| 57 |
|