3v324v23 commited on
Commit
8e6e78d
·
1 Parent(s): 9446900

Harden predict: align columns, coerce dtypes, better errors

Browse files
Files changed (1) hide show
  1. app.py +18 -48
app.py CHANGED
@@ -1,87 +1,57 @@
1
- import os, json, joblib, pandas as pd, numpy as np
2
  from datetime import datetime
3
  from flask import Flask, request, jsonify
4
 
5
  app = Flask(__name__)
6
-
7
  MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
8
  model = joblib.load(MODEL_PATH)
9
 
10
- # must match training features
11
  EXPECTED_COLS = [
12
- "Product_Id",
13
- "Product_Weight",
14
- "Product_Sugar_Content",
15
- "Product_Allocated_Area",
16
- "Product_Type",
17
- "Product_MRP",
18
- "Store_Id",
19
- "Store_Establishment_Year",
20
- "Store_Age", # <-- engineered feature expected by the model
21
- "Store_Size",
22
- "Store_Location_City_Type",
23
- "Store_Type",
24
  ]
25
-
26
  NUMERIC_COLS = {
27
- "Product_Weight",
28
- "Product_Allocated_Area",
29
- "Product_MRP",
30
- "Store_Establishment_Year",
31
- "Store_Age",
32
  }
33
-
34
  CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
35
 
36
  @app.get("/health")
37
  def health():
38
- return {"status": "ok", "expected_features": EXPECTED_COLS, "current_year": CURRENT_YEAR}, 200
39
 
40
  def _compute_store_age(df: pd.DataFrame) -> pd.Series:
41
- # coerce to numeric first; invalid -> NaN
42
  years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
43
- age = CURRENT_YEAR - years
44
- # clip to [0, 200] to avoid negatives
45
- age = age.clip(lower=0, upper=200)
46
  return age
47
 
48
  def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
49
- # add missing columns with NaN
50
  for c in EXPECTED_COLS:
51
  if c not in df.columns:
52
  df[c] = np.nan
53
-
54
- # compute Store_Age if missing or nulls
55
- if df["Store_Age"].isna().any() or "Store_Age" not in df.columns:
56
  df["Store_Age"] = _compute_store_age(df)
57
-
58
- # coerce numeric cols
59
  for c in NUMERIC_COLS:
60
  df[c] = pd.to_numeric(df[c], errors="coerce")
61
-
62
- # ensure categorical/object for the rest
63
  for c in set(EXPECTED_COLS) - NUMERIC_COLS:
64
  df[c] = df[c].astype("string")
65
-
66
- # reorder exactly as training
67
- df = df[EXPECTED_COLS]
68
- return df
69
 
70
  @app.post("/predict")
71
  def predict():
72
  try:
73
  payload = request.get_json(force=True)
74
- if isinstance(payload, dict):
75
- df = pd.DataFrame([payload])
76
- elif isinstance(payload, list):
77
- df = pd.DataFrame(payload)
78
- else:
79
- return jsonify({"error": "payload must be a dict or list of dicts"}), 400
80
-
81
  df = _coerce_and_align(df)
82
  preds = model.predict(df)
83
- return jsonify({"predictions": [float(x) for x in preds]}), 200
84
-
85
  except Exception as e:
86
  return jsonify({"error": str(e)}), 500
87
 
 
1
+ import os, joblib, pandas as pd, numpy as np
2
  from datetime import datetime
3
  from flask import Flask, request, jsonify
4
 
5
  app = Flask(__name__)
 
6
  MODEL_PATH = os.getenv("MODEL_PATH", "model_pipeline.joblib")
7
  model = joblib.load(MODEL_PATH)
8
 
9
+ # Must match training features (include Store_Age)
10
  EXPECTED_COLS = [
11
+ "Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area",
12
+ "Product_Type","Product_MRP","Store_Id","Store_Establishment_Year",
13
+ "Store_Age","Store_Size","Store_Location_City_Type","Store_Type"
 
 
 
 
 
 
 
 
 
14
  ]
 
15
  NUMERIC_COLS = {
16
+ "Product_Weight","Product_Allocated_Area","Product_MRP",
17
+ "Store_Establishment_Year","Store_Age"
 
 
 
18
  }
 
19
  CURRENT_YEAR = int(os.getenv("CURRENT_YEAR", datetime.now().year))
20
 
21
  @app.get("/health")
22
  def health():
23
+ return {"status":"ok","expected_features":EXPECTED_COLS,"current_year":CURRENT_YEAR}, 200
24
 
25
  def _compute_store_age(df: pd.DataFrame) -> pd.Series:
 
26
  years = pd.to_numeric(df.get("Store_Establishment_Year"), errors="coerce")
27
+ age = (CURRENT_YEAR - years).clip(lower=0, upper=200)
 
 
28
  return age
29
 
30
  def _coerce_and_align(df: pd.DataFrame) -> pd.DataFrame:
31
+ # add missing training columns
32
  for c in EXPECTED_COLS:
33
  if c not in df.columns:
34
  df[c] = np.nan
35
+ # compute Store_Age if missing/NaN
36
+ if df["Store_Age"].isna().any():
 
37
  df["Store_Age"] = _compute_store_age(df)
38
+ # numeric coercion
 
39
  for c in NUMERIC_COLS:
40
  df[c] = pd.to_numeric(df[c], errors="coerce")
41
+ # categorical as string
 
42
  for c in set(EXPECTED_COLS) - NUMERIC_COLS:
43
  df[c] = df[c].astype("string")
44
+ # reorder to training order
45
+ return df[EXPECTED_COLS]
 
 
46
 
47
  @app.post("/predict")
48
  def predict():
49
  try:
50
  payload = request.get_json(force=True)
51
+ df = pd.DataFrame(payload if isinstance(payload, list) else [payload])
 
 
 
 
 
 
52
  df = _coerce_and_align(df)
53
  preds = model.predict(df)
54
+ return jsonify({"predictions":[float(x) for x in preds]}), 200
 
55
  except Exception as e:
56
  return jsonify({"error": str(e)}), 500
57