COCODEDE04 commited on
Commit
5f4bae5
·
verified ·
1 Parent(s): 3f46446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -147
app.py CHANGED
@@ -1,118 +1,162 @@
1
- import os
2
- import json
3
- import traceback
4
- from typing import Any, Dict
5
 
6
  import numpy as np
7
  import tensorflow as tf
8
- import joblib
9
  from fastapi import FastAPI, Request
10
- from fastapi.responses import JSONResponse
11
  from fastapi.middleware.cors import CORSMiddleware
12
 
13
- # -------------------- CONFIG --------------------
14
- MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
15
- STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
16
- IMPUTER_PATH = os.getenv("IMPUTER_PATH", "imputer.joblib")
17
- SCALER_PATH = os.getenv("SCALER_PATH", "scaler.joblib")
18
-
19
- CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
20
- # ------------------------------------------------
21
-
22
- print("Loading model / imputer / scaler...")
23
-
24
- # ---- Model ----
25
- model = tf.keras.models.load_model(MODEL_PATH, compile=False)
26
-
27
- # ---- Stats ----
28
- with open(STATS_PATH, "r") as f:
29
- stats: Dict[str, Dict[str, float]] = json.load(f)
30
-
31
- FEATURES = list(stats.keys())
32
-
33
- # ---- Optional artifacts ----
34
- try:
35
- imputer = joblib.load(IMPUTER_PATH)
36
- print("Imputer loaded.")
37
- except Exception:
38
- imputer = None
39
- print("⚠️ No imputer found — skipping median imputation.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- try:
42
- scaler = joblib.load(SCALER_PATH)
43
- print("Scaler loaded.")
44
- except Exception:
45
- scaler = None
46
- print("⚠️ No scaler found — using manual z-scoring.")
47
 
48
- # -------------------- HELPERS --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def coerce_float(val: Any) -> float:
50
- """Convert strings like '49.709,14' or '0,005' to float."""
51
  if isinstance(val, (int, float)):
52
  return float(val)
53
- s = str(val).strip().replace(" ", "")
54
  if s == "":
55
- return 0.0
56
- if "," in s and "." in s:
57
- if s.rfind(",") > s.rfind("."):
58
- s = s.replace(".", "").replace(",", ".")
 
 
 
 
 
 
59
  else:
60
  s = s.replace(",", "")
61
- elif "," in s:
62
  s = s.replace(",", ".")
63
- try:
64
- return float(s)
65
- except Exception:
66
- return 0.0
67
 
68
 
69
- def _z(val: Any, mean: float, sd: float) -> float:
70
  try:
71
  v = coerce_float(val)
72
  except Exception:
73
  return 0.0
74
- if not sd or sd == 0:
75
  return 0.0
76
  return (v - mean) / sd
77
 
78
 
79
- def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
80
- """Convert (N, K−1) logits to (N, K) probabilities for CORAL ordinal output."""
81
- logits = tf.convert_to_tensor(logits_np, dtype=tf.float32)
82
- sig = tf.math.sigmoid(logits)
83
- left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
84
- right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
85
- probs = tf.clip_by_value(left - right, 1e-12, 1.0)
86
- return probs.numpy()
87
-
88
-
89
- def build_matrix_from_payload(payload: Dict[str, Any]):
90
- """Builds a z-scored matrix and returns (X, z_detail, missing)."""
91
- z = []
92
- z_detail = {}
93
- missing = []
94
- for f in FEATURES:
95
- mean = stats[f]["mean"]
96
- sd = stats[f]["std"]
97
- if f in payload:
98
- zf = _z(payload[f], mean, sd)
99
- else:
100
- zf = _z(0.0, mean, sd)
101
- missing.append(f)
102
- z.append(zf)
103
- z_detail[f] = zf
104
-
105
- X = np.array([z], dtype=np.float32)
106
-
107
- if imputer is not None:
108
- X = imputer.transform(X)
109
- if scaler is not None:
110
- X = scaler.transform(X)
111
-
112
- return X, z_detail, missing
113
-
114
-
115
- # -------------------- APP INIT --------------------
116
  app = FastAPI(title="Static Fingerprint API", version="1.1.0")
117
 
118
  app.add_middleware(
@@ -123,12 +167,39 @@ app.add_middleware(
123
  allow_headers=["*"],
124
  )
125
 
126
- # -------------------- ROUTES --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  @app.get("/")
128
  def root():
129
  return {
130
  "message": "Static Fingerprint API is running.",
131
- "try": ["GET /health", "POST /predict"],
132
  }
133
 
134
 
@@ -136,73 +207,130 @@ def root():
136
  def health():
137
  return {
138
  "status": "ok",
139
- "model_file": MODEL_PATH,
140
- "stats_file": STATS_PATH,
141
  "features": FEATURES,
142
  "classes": CLASSES,
143
- "imputer_loaded": imputer is not None,
144
- "scaler_loaded": scaler is not None,
 
 
 
145
  }
146
 
147
 
148
  @app.post("/echo")
149
  async def echo(req: Request):
150
- """Echoes back any JSON payload (debug)."""
151
  payload = await req.json()
152
  return {"received": payload}
153
 
154
 
155
- @app.post("/predict")
156
- async def predict(req: Request):
157
  """
158
- POST JSON mapping each feature to a numeric value.
159
- Example:
160
- {
161
- "autosuf_oper": 1.0,
162
- "cov_improductiva": 0.9,
163
- ...
164
- }
165
  """
166
- try:
167
- payload = await req.json()
168
- if not isinstance(payload, dict):
169
- return JSONResponse(
170
- status_code=400,
171
- content={"error": "Expected a JSON object mapping feature -> value."},
172
- )
173
-
174
- X, z_detail, missing = build_matrix_from_payload(payload)
175
- raw = model.predict(X, verbose=0)
176
-
177
- # Detect output type (CORAL or softmax)
178
- if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
179
- probs = coral_probs_from_logits(raw)[0]
180
- decode_mode = "auto_coral_monotone"
181
  else:
182
- logits = tf.convert_to_tensor(raw, dtype=tf.float32)
183
- probs = tf.nn.softmax(logits, axis=1).numpy()[0]
184
- decode_mode = "softmax"
185
-
186
- probs = probs / np.sum(probs)
187
- pred_idx = int(np.argmax(probs))
188
-
189
- return {
190
- "input_ok": (len(missing) == 0),
191
- "missing": missing,
192
- "z_scores": z_detail,
193
- "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
194
- "predicted_state": CLASSES[pred_idx],
195
- "debug": {
196
- "raw_shape": list(raw.shape),
197
- "decode_mode": decode_mode,
198
- "raw_first_row": [float(x) for x in raw[0].tolist()],
199
- },
200
- }
201
 
202
- except Exception as e:
203
- tb = traceback.format_exc()
204
- print("🔥 ERROR in /predict:", tb)
205
- return JSONResponse(
206
- status_code=500,
207
- content={"error": str(e), "traceback": tb},
208
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os, json, glob
3
+ from typing import Any, Dict, List, Optional
 
4
 
5
  import numpy as np
6
  import tensorflow as tf
 
7
  from fastapi import FastAPI, Request
 
8
  from fastapi.middleware.cors import CORSMiddleware
9
 
10
+ # ----------------- CONFIG -----------------
11
+ DEFAULT_MODEL_CANDIDATES = ["best_model.h5", "best_model.keras"]
12
+ DEFAULT_IMPUTER_CANDIDATES = ["imputer.joblib", "imputer.pkl", "imputer.sav"]
13
+ DEFAULT_SCALER_CANDIDATES = ["scaler.joblib", "scaler.pkl", "scaler.sav"]
14
+ DEFAULT_STATS_PATH = "means_std.json"
15
+
16
+ CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"] # index 0=Top ... 4=Low
17
+ APPLY_CORAL_MONOTONE = True # nudge thresholds to be non-increasing before decode
18
+ # ------------------------------------------
19
+
20
+ HERE = os.path.dirname(os.path.abspath(__file__))
21
+
22
+
23
+ # ---------- utilities: robust file resolving & logging ----------
24
+ def resolve_first(*names: str) -> Optional[str]:
25
+ """Return absolute path to the first existing file among provided names
26
+ by checking HERE, CWD, then recursive matches."""
27
+ for base in (HERE, os.getcwd()):
28
+ for n in names:
29
+ p = os.path.join(base, n)
30
+ if os.path.isfile(p):
31
+ return p
32
+ # recursive fallback (handles subfolders)
33
+ patterns: List[str] = []
34
+ for n in names:
35
+ patterns += [os.path.join(HERE, "**", n),
36
+ os.path.join(os.getcwd(), "**", n)]
37
+ for pat in patterns:
38
+ for p in glob.glob(pat, recursive=True):
39
+ if os.path.isfile(p):
40
+ return p
41
+ return None
42
+
43
+
44
+ def describe_dir():
45
+ try:
46
+ print("CWD:", os.getcwd())
47
+ print("Repo dir (HERE):", HERE)
48
+ print("Repo listing:", os.listdir(HERE))
49
+ except Exception as e:
50
+ print("listdir error:", e)
51
 
 
 
 
 
 
 
52
 
53
+ def load_joblib(label: str, candidates: List[str]):
54
+ import joblib
55
+ print(f"Looking for {label} among: {candidates}")
56
+ describe_dir()
57
+ path = resolve_first(*candidates)
58
+ if not path:
59
+ print(f"⚠️ {label} not found.")
60
+ return None
61
+ try:
62
+ print(f"Loading {label} from {path} ({os.path.getsize(path)} bytes)")
63
+ except Exception:
64
+ print(f"Loading {label} from {path}")
65
+ try:
66
+ return joblib.load(path)
67
+ except Exception as e:
68
+ print(f"⚠️ Failed to load {label}: {repr(e)}")
69
+ return None
70
+
71
+
72
+ def load_model_robust() -> tf.keras.Model:
73
+ print("Resolving model...")
74
+ # env override supported
75
+ env_model = os.getenv("MODEL_PATH")
76
+ if env_model:
77
+ candidates = [env_model]
78
+ else:
79
+ candidates = DEFAULT_MODEL_CANDIDATES
80
+ path = resolve_first(*candidates)
81
+ if not path:
82
+ raise FileNotFoundError(f"Model file not found. Tried: {candidates}")
83
+ print(f"Loading model from {path} ({os.path.getsize(path)} bytes)")
84
+ # We don't need custom objects for inference; compile=False is safer
85
+ return tf.keras.models.load_model(path, compile=False)
86
+
87
+
88
+ def load_means_std(stats_path: Optional[str]) -> Optional[Dict[str, Dict[str, float]]]:
89
+ path = stats_path or os.getenv("STATS_PATH") or DEFAULT_STATS_PATH
90
+ path = resolve_first(path) if path else None
91
+ if not path:
92
+ print("⚠️ means_std.json not found.")
93
+ return None
94
+ print(f"Loading means/std from {path} ({os.path.getsize(path)} bytes)")
95
+ with open(path, "r") as f:
96
+ return json.load(f)
97
+
98
+
99
+ # ---------- numeric coercion ----------
100
  def coerce_float(val: Any) -> float:
101
+ """Accepts numeric, or locale strings like '49.709,14' -> 49709.14"""
102
  if isinstance(val, (int, float)):
103
  return float(val)
104
+ s = str(val).strip()
105
  if s == "":
106
+ raise ValueError("empty")
107
+ s = s.replace(" ", "")
108
+ has_dot = "." in s
109
+ has_comma = "," in s
110
+ if has_dot and has_comma:
111
+ last_dot = s.rfind(".")
112
+ last_comma = s.rfind(",")
113
+ if last_comma > last_dot:
114
+ s = s.replace(".", "")
115
+ s = s.replace(",", ".")
116
  else:
117
  s = s.replace(",", "")
118
+ elif has_comma and not has_dot:
119
  s = s.replace(",", ".")
120
+ return float(s)
 
 
 
121
 
122
 
123
+ def z_manual(val: Any, mean: float, sd: float) -> float:
124
  try:
125
  v = coerce_float(val)
126
  except Exception:
127
  return 0.0
128
+ if not sd:
129
  return 0.0
130
  return (v - mean) / sd
131
 
132
 
133
+ # ---------- CORAL decoding ----------
134
+ def coral_probs_from_logits(logits_np: np.ndarray, monotone: bool = False) -> np.ndarray:
135
+ """
136
+ logits: (N, K-1) cumulative logits.
137
+ If monotone=True, enforce non-increasing thresholds per sample before decode.
138
+ """
139
+ logits = np.asarray(logits_np, dtype=np.float32)
140
+
141
+ if monotone:
142
+ # clamp each row to be non-increasing: t1 >= t2 >= t3 >= ...
143
+ # for Top=0 best to Low=4 worst, cumulative boundary logits
144
+ for i in range(logits.shape[0]):
145
+ row = logits[i]
146
+ # make it non-increasing by cumulative minimum from left to right
147
+ for j in range(1, row.shape[0]):
148
+ if row[j] > row[j - 1]:
149
+ row[j] = row[j - 1]
150
+ logits[i] = row
151
+
152
+ sig = 1.0 / (1.0 + np.exp(-logits)) # sigmoid
153
+ left = np.concatenate([np.ones((sig.shape[0], 1), dtype=np.float32), sig], axis=1)
154
+ right = np.concatenate([sig, np.zeros((sig.shape[0], 1), dtype=np.float32)], axis=1)
155
+ probs = np.clip(left - right, 1e-12, 1.0)
156
+ return probs
157
+
158
+
159
+ # ---------- FastAPI app ----------
 
 
 
 
 
 
 
 
 
 
160
  app = FastAPI(title="Static Fingerprint API", version="1.1.0")
161
 
162
  app.add_middleware(
 
167
  allow_headers=["*"],
168
  )
169
 
170
+ print("Loading model / imputer / scaler...")
171
+ model = load_model_robust()
172
+ imputer = load_joblib("imputer", DEFAULT_IMPUTER_CANDIDATES)
173
+ scaler = load_joblib("scaler", DEFAULT_SCALER_CANDIDATES)
174
+ stats = load_means_std(os.getenv("STATS_PATH"))
175
+
176
+ # Feature order:
177
+ # Prefer scaler.feature_names_in_ if present (sklearn >=1.0),
178
+ # else imputer.feature_names_in_,
179
+ # else the order in means_std.json,
180
+ # else fail loudly.
181
+ if hasattr(scaler, "feature_names_in_"):
182
+ FEATURES: List[str] = list(scaler.feature_names_in_)
183
+ print("FEATURES from scaler.feature_names_in_")
184
+ elif hasattr(imputer, "feature_names_in_"):
185
+ FEATURES = list(imputer.feature_names_in_)
186
+ print("FEATURES from imputer.feature_names_in_")
187
+ elif isinstance(stats, dict):
188
+ FEATURES = list(stats.keys())
189
+ print("FEATURES from means_std.json order")
190
+ else:
191
+ raise RuntimeError("Cannot determine feature order. Provide scaler/imputer with feature_names_in_ or a means_std.json.")
192
+
193
+ print("Feature order:", FEATURES)
194
+ print("Artifacts present:",
195
+ {"imputer": imputer is not None, "scaler": scaler is not None, "stats": stats is not None})
196
+
197
+
198
  @app.get("/")
199
  def root():
200
  return {
201
  "message": "Static Fingerprint API is running.",
202
+ "try": ["GET /health", "POST /predict", "POST /echo"],
203
  }
204
 
205
 
 
207
  def health():
208
  return {
209
  "status": "ok",
 
 
210
  "features": FEATURES,
211
  "classes": CLASSES,
212
+ "artifacts": {
213
+ "imputer": bool(imputer is not None),
214
+ "scaler": bool(scaler is not None),
215
+ "means_std": bool(stats is not None),
216
+ },
217
  }
218
 
219
 
220
  @app.post("/echo")
221
  async def echo(req: Request):
 
222
  payload = await req.json()
223
  return {"received": payload}
224
 
225
 
226
+ def preprocess_payload_to_X(payload: Dict[str, Any]) -> Dict[str, Any]:
 
227
  """
228
+ Returns dict with:
229
+ - X: np.ndarray shape (1, n_features) ready for model
230
+ - z_scores: dict feature -> z value (if available)
231
+ - missing: list of features not provided
232
+ - used: dict feature -> raw value used (after imputation)
 
 
233
  """
234
+ missing: List[str] = []
235
+ used_vals: List[float] = []
236
+ z_scores: Dict[str, float] = {}
237
+ used_raw: Dict[str, float] = {}
238
+
239
+ # Build raw feature vector in correct order
240
+ raw_vec: List[float] = []
241
+ for f in FEATURES:
242
+ if f in payload:
243
+ v = coerce_float(payload[f])
 
 
 
 
 
244
  else:
245
+ missing.append(f)
246
+ v = np.nan # let imputer handle it (median), or we'll fill below
247
+ raw_vec.append(v)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
+ raw = np.array([raw_vec], dtype=np.float32)
250
+
251
+ # Impute if available
252
+ if imputer is not None:
253
+ raw_imp = imputer.transform(raw)
254
+ else:
255
+ # If no imputer, simple median fill using means_std or zero
256
+ raw_imp = raw.copy()
257
+ for j, f in enumerate(FEATURES):
258
+ if np.isnan(raw_imp[0, j]):
259
+ if stats and f in stats:
260
+ raw_imp[0, j] = stats[f].get("mean", 0.0)
261
+ else:
262
+ raw_imp[0, j] = 0.0
263
+
264
+ # Scale if available
265
+ if scaler is not None:
266
+ X = scaler.transform(raw_imp).astype(np.float32)
267
+ # we can still compute z-scores from scaler if it exposes scale_ and mean_
268
+ if hasattr(scaler, "mean_") and hasattr(scaler, "scale_"):
269
+ for j, f in enumerate(FEATURES):
270
+ mu = float(scaler.mean_[j])
271
+ sd = float(scaler.scale_[j])
272
+ z = 0.0 if sd == 0 else (float(raw_imp[0, j]) - mu) / sd
273
+ z_scores[f] = float(z)
274
+ else:
275
+ # manual z-score using means_std.json
276
+ if not stats:
277
+ raise RuntimeError("No scaler and no means_std.json — cannot standardize.")
278
+ z_list: List[float] = []
279
+ for j, f in enumerate(FEATURES):
280
+ mu = float(stats[f]["mean"])
281
+ sd = float(stats[f]["std"])
282
+ z = z_manual(raw_imp[0, j], mu, sd)
283
+ z_list.append(z)
284
+ z_scores[f] = float(z)
285
+ X = np.array([z_list], dtype=np.float32)
286
+
287
+ # capture used raw values (after imputation)
288
+ for j, f in enumerate(FEATURES):
289
+ used_val = float(raw_imp[0, j])
290
+ used_raw[f] = used_val
291
+ used_vals.append(used_val)
292
+
293
+ return {
294
+ "X": X,
295
+ "z_scores": z_scores,
296
+ "missing": missing,
297
+ "used": used_raw,
298
+ }
299
+
300
+
301
+ @app.post("/predict")
302
+ async def predict(req: Request):
303
+ payload = await req.json()
304
+ if not isinstance(payload, dict):
305
+ return {"error": "Expected a JSON object mapping feature -> value."}
306
+
307
+ prep = preprocess_payload_to_X(payload)
308
+ X: np.ndarray = prep["X"]
309
+
310
+ raw = model.predict(X, verbose=0)
311
+
312
+ # CORAL (K-1) vs softmax (K)
313
+ debug: Dict[str, Any] = {"raw_shape": list(raw.shape)}
314
+ if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
315
+ decode_mode = "auto_coral_monotone" if APPLY_CORAL_MONOTONE else "auto_coral"
316
+ probs = coral_probs_from_logits(raw, monotone=APPLY_CORAL_MONOTONE)[0]
317
+ else:
318
+ decode_mode = "auto_softmax"
319
+ probs = raw[0]
320
+ s = float(np.sum(probs))
321
+ if s > 0:
322
+ probs = probs / s
323
+ debug["decode_mode"] = decode_mode
324
+ debug["raw_first_row"] = [float(x) for x in np.array(raw[0]).ravel().tolist()]
325
+
326
+ pred_idx = int(np.argmax(probs))
327
+
328
+ return {
329
+ "input_ok": (len(prep["missing"]) == 0),
330
+ "missing": prep["missing"],
331
+ "used_raw": prep["used"], # values after imputation
332
+ "z_scores": prep["z_scores"], # standardized (from scaler or stats)
333
+ "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
334
+ "predicted_state": CLASSES[pred_idx],
335
+ "debug": debug,
336
+ }