3v324v23's picture
Harden predict: align columns, coerce dtypes, better errors
8e6e78d
import os, joblib, pandas as pd, numpy as np
from datetime import datetime
from flask import Flask, request, jsonify
app = Flask(__name__)
MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
model = joblib.load(MODEL_PATH)
# Must match training features (include Store_Age)
EXPECTED_COLS = [
"Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area",
"Product_Type","Product_MRP","Store_Id","Store_Establishment_Year",
"Store_Age","Store_Size","Store_Location_City_Type","Store_Type"
]
NUMERIC_COLS = {
"Product_Weight","Product_Allocated_Area","Product_MRP",
"Store_Establishment_Year","Store_Age"
}
CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
@app.get("/health")
def health():
return {"status":"ok","expected_features":EXPECTED_COLS,"current_year":CURRENT_YEAR}, 200
def _compute_store_age(df: pd.DataFrame) -> pd.Series:
years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
age = (CURRENT_YEAR - years).clip(lower=0, upper=200)
return age
def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
# add missing training columns
for c in EXPECTED_COLS:
if c not in df.columns:
df[c] = np.nan
# compute Store_Age if missing/NaN
if df["Store_Age"].isna().any():
df["Store_Age"] = _compute_store_age(df)
# numeric coercion
for c in NUMERIC_COLS:
df[c] = pd.to_numeric(df[c], errors="coerce")
# categorical as string
for c in set(EXPECTED_COLS) - NUMERIC_COLS:
df[c] = df[c].astype("string")
# reorder to training order
return df[EXPECTED_COLS]
@app.post("/predict")
def predict():
try:
payload = request.get_json(force=True)
df = pd.DataFrame(payload if isinstance(payload, list) else [payload])
df = _coerce_and_align(df)
preds = model.predict(df)
return jsonify({"predictions":[float(x) for x in preds]}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
port = int(os.getenv("PORT", 7860))
app.run(host="0.0.0.0", port=port)