COCODEDE04 commited on
Commit
a46e832
·
verified ·
1 Parent(s): c761c99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -278
app.py CHANGED
@@ -1,336 +1,339 @@
1
- # app.py
2
- import os, json, glob
3
  from typing import Any, Dict, List, Optional
4
 
5
  import numpy as np
6
  import tensorflow as tf
7
  from fastapi import FastAPI, Request
8
  from fastapi.middleware.cors import CORSMiddleware
 
9
 
10
  # ----------------- CONFIG -----------------
11
- DEFAULT_MODEL_CANDIDATES = ["best_model.h5", "best_model.keras"]
12
- DEFAULT_IMPUTER_CANDIDATES = ["imputer.joblib", "imputer.pkl", "imputer.sav"]
13
- DEFAULT_SCALER_CANDIDATES = ["scaler.joblib", "scaler.pkl", "scaler.sav"]
14
- DEFAULT_STATS_PATH = "means_std.json"
15
-
16
- CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"] # index 0=Top ... 4=Low
17
- APPLY_CORAL_MONOTONE = True # nudge thresholds to be non-increasing before decode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # ------------------------------------------
19
 
20
- HERE = os.path.dirname(os.path.abspath(__file__))
21
-
22
-
23
- # ---------- utilities: robust file resolving & logging ----------
24
- def resolve_first(*names: str) -> Optional[str]:
25
- """Return absolute path to the first existing file among provided names
26
- by checking HERE, CWD, then recursive matches."""
27
- for base in (HERE, os.getcwd()):
28
- for n in names:
29
- p = os.path.join(base, n)
30
- if os.path.isfile(p):
31
- return p
32
- # recursive fallback (handles subfolders)
33
- patterns: List[str] = []
34
- for n in names:
35
- patterns += [os.path.join(HERE, "**", n),
36
- os.path.join(os.getcwd(), "**", n)]
37
- for pat in patterns:
38
- for p in glob.glob(pat, recursive=True):
39
- if os.path.isfile(p):
40
- return p
41
- return None
42
-
43
-
44
- def describe_dir():
45
- try:
46
- print("CWD:", os.getcwd())
47
- print("Repo dir (HERE):", HERE)
48
- print("Repo listing:", os.listdir(HERE))
49
- except Exception as e:
50
- print("listdir error:", e)
51
-
52
-
53
- def load_joblib(label: str, candidates: List[str]):
54
- import joblib
55
- print(f"Looking for {label} among: {candidates}")
56
- describe_dir()
57
- path = resolve_first(*candidates)
58
- if not path:
59
- print(f"⚠️ {label} not found.")
60
- return None
61
- try:
62
- print(f"Loading {label} from {path} ({os.path.getsize(path)} bytes)")
63
- except Exception:
64
- print(f"Loading {label} from {path}")
65
- try:
66
- return joblib.load(path)
67
- except Exception as e:
68
- print(f"⚠️ Failed to load {label}: {repr(e)}")
69
- return None
70
-
71
 
72
- def load_model_robust() -> tf.keras.Model:
73
- print("Resolving model...")
74
- # env override supported
75
- env_model = os.getenv("MODEL_PATH")
76
- if env_model:
77
- candidates = [env_model]
78
- else:
79
- candidates = DEFAULT_MODEL_CANDIDATES
80
- path = resolve_first(*candidates)
81
- if not path:
82
- raise FileNotFoundError(f"Model file not found. Tried: {candidates}")
83
- print(f"Loading model from {path} ({os.path.getsize(path)} bytes)")
84
- # We don't need custom objects for inference; compile=False is safer
85
- return tf.keras.models.load_model(path, compile=False)
86
-
87
-
88
- def load_means_std(stats_path: Optional[str]) -> Optional[Dict[str, Dict[str, float]]]:
89
- path = stats_path or os.getenv("STATS_PATH") or DEFAULT_STATS_PATH
90
- path = resolve_first(path) if path else None
91
- if not path:
92
- print("⚠️ means_std.json not found.")
93
- return None
94
- print(f"Loading means/std from {path} ({os.path.getsize(path)} bytes)")
95
- with open(path, "r") as f:
96
- return json.load(f)
97
-
98
-
99
- # ---------- numeric coercion ----------
100
  def coerce_float(val: Any) -> float:
101
- """Accepts numeric, or locale strings like '49.709,14' -> 49709.14"""
102
- if isinstance(val, (int, float)):
 
 
 
 
 
103
  return float(val)
 
104
  s = str(val).strip()
105
  if s == "":
106
  raise ValueError("empty")
 
107
  s = s.replace(" ", "")
108
- has_dot = "." in s
109
- has_comma = "," in s
110
  if has_dot and has_comma:
111
- last_dot = s.rfind(".")
112
- last_comma = s.rfind(",")
113
- if last_comma > last_dot:
114
  s = s.replace(".", "")
115
  s = s.replace(",", ".")
116
  else:
117
  s = s.replace(",", "")
118
  elif has_comma and not has_dot:
119
  s = s.replace(",", ".")
 
120
  return float(s)
121
 
122
 
123
- def z_manual(val: Any, mean: float, sd: float) -> float:
124
- try:
125
- v = coerce_float(val)
126
- except Exception:
127
- return 0.0
128
- if not sd:
129
- return 0.0
130
- return (v - mean) / sd
131
 
132
 
133
- # ---------- CORAL decoding ----------
134
- def coral_probs_from_logits(logits_np: np.ndarray, monotone: bool = False) -> np.ndarray:
135
  """
136
- logits: (N, K-1) cumulative logits.
137
- If monotone=True, enforce non-increasing thresholds per sample before decode.
138
  """
139
- logits = np.asarray(logits_np, dtype=np.float32)
140
-
141
- if monotone:
142
- # clamp each row to be non-increasing: t1 >= t2 >= t3 >= ...
143
- # for Top=0 best to Low=4 worst, cumulative boundary logits
144
- for i in range(logits.shape[0]):
145
- row = logits[i]
146
- # make it non-increasing by cumulative minimum from left to right
147
- for j in range(1, row.shape[0]):
148
- if row[j] > row[j - 1]:
149
- row[j] = row[j - 1]
150
- logits[i] = row
151
-
152
- sig = 1.0 / (1.0 + np.exp(-logits)) # sigmoid
153
- left = np.concatenate([np.ones((sig.shape[0], 1), dtype=np.float32), sig], axis=1)
154
- right = np.concatenate([sig, np.zeros((sig.shape[0], 1), dtype=np.float32)], axis=1)
155
- probs = np.clip(left - right, 1e-12, 1.0)
156
- return probs
157
-
158
-
159
- # ---------- FastAPI app ----------
160
- app = FastAPI(title="Static Fingerprint API", version="1.1.0")
161
-
162
- app.add_middleware(
163
- CORSMiddleware,
164
- allow_origins=["*"],
165
- allow_credentials=False,
166
- allow_methods=["*"],
167
- allow_headers=["*"],
168
- )
169
-
170
  print("Loading model / imputer / scaler...")
171
- model = load_model_robust()
172
- imputer = load_joblib("imputer", DEFAULT_IMPUTER_CANDIDATES)
173
- scaler = load_joblib("scaler", DEFAULT_SCALER_CANDIDATES)
174
- stats = load_means_std(os.getenv("STATS_PATH"))
175
-
176
- # Feature order:
177
- # Prefer scaler.feature_names_in_ if present (sklearn >=1.0),
178
- # else imputer.feature_names_in_,
179
- # else the order in means_std.json,
180
- # else fail loudly.
181
- if hasattr(scaler, "feature_names_in_"):
182
- FEATURES: List[str] = list(scaler.feature_names_in_)
183
- print("FEATURES from scaler.feature_names_in_")
184
- elif hasattr(imputer, "feature_names_in_"):
185
- FEATURES = list(imputer.feature_names_in_)
186
- print("FEATURES from imputer.feature_names_in_")
187
- elif isinstance(stats, dict):
188
- FEATURES = list(stats.keys())
189
- print("FEATURES from means_std.json order")
190
- else:
191
- raise RuntimeError("Cannot determine feature order. Provide scaler/imputer with feature_names_in_ or a means_std.json.")
192
 
193
- print("Feature order:", FEATURES)
194
- print("Artifacts present:",
195
- {"imputer": imputer is not None, "scaler": scaler is not None, "stats": stats is not None})
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
- @app.get("/")
199
- def root():
200
- return {
201
- "message": "Static Fingerprint API is running.",
202
- "try": ["GET /health", "POST /predict", "POST /echo"],
203
- }
 
204
 
205
 
206
- @app.get("/health")
207
- def health():
208
- return {
209
- "status": "ok",
210
- "features": FEATURES,
211
- "classes": CLASSES,
212
- "artifacts": {
213
- "imputer": bool(imputer is not None),
214
- "scaler": bool(scaler is not None),
215
- "means_std": bool(stats is not None),
216
- },
217
- }
218
 
219
 
220
- @app.post("/echo")
221
- async def echo(req: Request):
222
- payload = await req.json()
223
- return {"received": payload}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
 
226
- def preprocess_payload_to_X(payload: Dict[str, Any]) -> Dict[str, Any]:
 
227
  """
228
- Returns dict with:
229
- - X: np.ndarray shape (1, n_features) ready for model
230
- - z_scores: dict feature -> z value (if available)
231
- - missing: list of features not provided
232
- - used: dict feature -> raw value used (after imputation)
233
  """
234
- missing: List[str] = []
235
- used_vals: List[float] = []
236
- z_scores: Dict[str, float] = {}
237
- used_raw: Dict[str, float] = {}
238
-
239
- # Build raw feature vector in correct order
240
- raw_vec: List[float] = []
241
  for f in FEATURES:
242
  if f in payload:
243
- v = coerce_float(payload[f])
 
 
 
244
  else:
245
- missing.append(f)
246
- v = np.nan # let imputer handle it (median), or we'll fill below
247
- raw_vec.append(v)
248
 
249
- raw = np.array([raw_vec], dtype=np.float32)
250
 
251
- # Impute if available
252
  if imputer is not None:
253
- raw_imp = imputer.transform(raw)
254
- else:
255
- # If no imputer, simple median fill using means_std or zero
256
- raw_imp = raw.copy()
257
- for j, f in enumerate(FEATURES):
258
- if np.isnan(raw_imp[0, j]):
259
- if stats and f in stats:
260
- raw_imp[0, j] = stats[f].get("mean", 0.0)
261
- else:
262
- raw_imp[0, j] = 0.0
263
-
264
- # Scale if available
 
 
 
 
 
 
 
265
  if scaler is not None:
266
- X = scaler.transform(raw_imp).astype(np.float32)
267
- # we can still compute z-scores from scaler if it exposes scale_ and mean_
268
- if hasattr(scaler, "mean_") and hasattr(scaler, "scale_"):
269
- for j, f in enumerate(FEATURES):
270
- mu = float(scaler.mean_[j])
271
- sd = float(scaler.scale_[j])
272
- z = 0.0 if sd == 0 else (float(raw_imp[0, j]) - mu) / sd
273
- z_scores[f] = float(z)
274
  else:
275
- # manual z-score using means_std.json
276
- if not stats:
277
- raise RuntimeError("No scaler and no means_std.json — cannot standardize.")
278
- z_list: List[float] = []
279
- for j, f in enumerate(FEATURES):
280
- mu = float(stats[f]["mean"])
281
- sd = float(stats[f]["std"])
282
- z = z_manual(raw_imp[0, j], mu, sd)
283
- z_list.append(z)
284
- z_scores[f] = float(z)
285
- X = np.array([z_list], dtype=np.float32)
286
-
287
- # capture used raw values (after imputation)
288
- for j, f in enumerate(FEATURES):
289
- used_val = float(raw_imp[0, j])
290
- used_raw[f] = used_val
291
- used_vals.append(used_val)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  return {
294
- "X": X,
295
- "z_scores": z_scores,
296
- "missing": missing,
297
- "used": used_raw,
 
 
 
 
298
  }
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  @app.post("/predict")
302
  async def predict(req: Request):
303
- payload = await req.json()
304
- if not isinstance(payload, dict):
305
- return {"error": "Expected a JSON object mapping feature -> value."}
306
-
307
- prep = preprocess_payload_to_X(payload)
308
- X: np.ndarray = prep["X"]
309
-
310
- raw = model.predict(X, verbose=0)
311
-
312
- # CORAL (K-1) vs softmax (K)
313
- debug: Dict[str, Any] = {"raw_shape": list(raw.shape)}
314
- if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
315
- decode_mode = "auto_coral_monotone" if APPLY_CORAL_MONOTONE else "auto_coral"
316
- probs = coral_probs_from_logits(raw, monotone=APPLY_CORAL_MONOTONE)[0]
317
- else:
318
- decode_mode = "auto_softmax"
319
- probs = raw[0]
320
- s = float(np.sum(probs))
321
- if s > 0:
322
- probs = probs / s
323
- debug["decode_mode"] = decode_mode
324
- debug["raw_first_row"] = [float(x) for x in np.array(raw[0]).ravel().tolist()]
325
-
326
- pred_idx = int(np.argmax(probs))
327
-
328
- return {
329
- "input_ok": (len(prep["missing"]) == 0),
330
- "missing": prep["missing"],
331
- "used_raw": prep["used"], # values after imputation
332
- "z_scores": prep["z_scores"], # standardized (from scaler or stats)
333
- "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
334
- "predicted_state": CLASSES[pred_idx],
335
- "debug": debug,
336
- }
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, io, traceback
 
2
  from typing import Any, Dict, List, Optional
3
 
4
  import numpy as np
5
  import tensorflow as tf
6
  from fastapi import FastAPI, Request
7
  from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
 
10
  # ----------------- CONFIG -----------------
11
+ MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
12
+ STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
13
+ IMPUTER_CANDIDATES = ["imputer.joblib", "imputer.pkl", "imputer.sav"]
14
+ SCALER_CANDIDATES = ["scaler.joblib", "scaler.pkl", "scaler.sav"]
15
+
16
+ CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
17
+
18
+ # ⛔ DO NOT CHANGE: exact order used in training
19
+ FEATURES: List[str] = [
20
+ "autosuf_oper",
21
+ "improductiva",
22
+ "gastos_fin_over_avg_cart",
23
+ "_equity",
24
+ "grado_absorcion",
25
+ "_cartera_bruta",
26
+ "gastos_oper_over_ing_oper",
27
+ "cartera_vencida_ratio",
28
+ "roe_pre_tax",
29
+ "_assets",
30
+ "_liab",
31
+ "equity_over_assets",
32
+ "_margen_bruto",
33
+ "prov_over_cartera",
34
+ "gastos_oper_over_cart",
35
+ "ing_cartera_over_ing_total",
36
+ "debt_to_equity",
37
+ "prov_gasto_over_cart",
38
+ "cov_improductiva",
39
+ "rend_cart_over_avg_cart",
40
+ "roa_pre_tax",
41
+ ]
42
  # ------------------------------------------
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # --------- helpers: I/O + numeric coercion ---------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def coerce_float(val: Any) -> float:
47
+ """
48
+ Accepts numeric, or strings like:
49
+ "49.709,14" -> 49709.14
50
+ "49,709.14" -> 49709.14
51
+ "0,005" -> 0.005
52
+ """
53
+ if isinstance(val, (int, float, np.number)):
54
  return float(val)
55
+
56
  s = str(val).strip()
57
  if s == "":
58
  raise ValueError("empty")
59
+
60
  s = s.replace(" ", "")
61
+ has_dot, has_comma = "." in s, "," in s
62
+
63
  if has_dot and has_comma:
64
+ # Decide decimal by last occurrence
65
+ if s.rfind(",") > s.rfind("."):
 
66
  s = s.replace(".", "")
67
  s = s.replace(",", ".")
68
  else:
69
  s = s.replace(",", "")
70
  elif has_comma and not has_dot:
71
  s = s.replace(",", ".")
72
+ # else leave as-is
73
  return float(s)
74
 
75
 
76
+ def load_json(path: str) -> dict:
77
+ with open(path, "r") as f:
78
+ return json.load(f)
 
 
 
 
 
79
 
80
 
81
+ def load_joblib_if_exists(candidates: List[str]):
 
82
  """
83
+ Try loading a joblib/pickle artifact (imputer/scaler).
84
+ Returns (obj, path_str or None, error_str or None).
85
  """
86
+ for name in candidates:
87
+ p = os.path.join(os.getcwd(), name)
88
+ if os.path.isfile(p):
89
+ try:
90
+ # Import inside to avoid hard dependency if not used
91
+ import joblib # type: ignore
92
+ with open(p, "rb") as fh:
93
+ obj = joblib.load(fh)
94
+ return obj, p, None
95
+ except Exception as e:
96
+ return None, p, f"{type(e).__name__}({e})"
97
+ return None, None, None
98
+
99
+
100
+ # --------- model / artifacts load ---------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  print("Loading model / imputer / scaler...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # Model
104
+ model = tf.keras.models.load_model(MODEL_PATH, compile=False)
 
105
 
106
+ # Imputer
107
+ imputer, imputer_path, imputer_err = load_joblib_if_exists(IMPUTER_CANDIDATES)
108
+ if imputer_path and imputer_err:
109
+ print(f"⚠️ Failed to load imputer from {imputer_path}: {imputer_err}")
110
+ elif imputer:
111
+ print(f"Loaded imputer from {imputer_path}")
112
+ else:
113
+ print("⚠️ No imputer found — skipping median imputation.")
114
+
115
+ # Scaler
116
+ scaler, scaler_path, scaler_err = load_joblib_if_exists(SCALER_CANDIDATES)
117
+ if scaler_path and scaler_err:
118
+ print(f"⚠️ Failed to load scaler from {scaler_path}: {scaler_err}")
119
+ elif scaler:
120
+ print(f"Loaded scaler from {scaler_path}")
121
+ else:
122
+ print("⚠️ No scaler found — using manual z-scoring if stats are available.")
123
 
124
+ # Stats (means/std) for fallback manual z-score
125
+ stats = {}
126
+ if os.path.isfile(STATS_PATH):
127
+ stats = load_json(STATS_PATH)
128
+ print(f"Loaded means/std from {STATS_PATH}")
129
+ else:
130
+ print("⚠️ No means_std.json found — manual z-scoring will be unavailable if scaler missing.")
131
 
132
 
133
+ # --------- decoding for CORAL vs softmax ---------
134
+ def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
135
+ """
136
+ (N, K-1) logits -> (N, K) probabilities for CORAL ordinal output.
137
+ """
138
+ logits = tf.convert_to_tensor(logits_np, dtype=tf.float32)
139
+ sig = tf.math.sigmoid(logits) # (N, K-1)
140
+ left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
141
+ right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
142
+ probs = tf.clip_by_value(left - right, 1e-12, 1.0)
143
+ return probs.numpy()
 
144
 
145
 
146
+ def decode_logits(raw: np.ndarray) -> (np.ndarray, str):
147
+ """
148
+ raw: (1, M) array
149
+ Returns (probs (K,), mode_str).
150
+ Detects CORAL (M=K-1) vs Softmax (M=K).
151
+ """
152
+ if raw.ndim != 2:
153
+ raise ValueError(f"Unexpected raw shape {raw.shape}")
154
+ M = raw.shape[1]
155
+ K = len(CLASSES)
156
+
157
+ if M == K - 1:
158
+ # CORAL logits
159
+ probs = coral_probs_from_logits(raw)[0]
160
+ return probs, "auto_coral"
161
+ elif M == K:
162
+ # Softmax or unnormalized scores
163
+ row = raw[0]
164
+ exps = np.exp(row - np.max(row))
165
+ probs = exps / np.sum(exps)
166
+ return probs, "auto_softmax"
167
+ else:
168
+ # Fallback: normalize across whatever is there
169
+ row = raw[0]
170
+ s = float(np.sum(np.abs(row)))
171
+ probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
172
+ return probs, f"fallback_M{M}_K{K}"
173
 
174
 
175
+ # --------- preprocessing pipeline ---------
176
+ def build_raw_vector(payload: Dict[str, Any]) -> np.ndarray:
177
  """
178
+ Build raw feature vector in exact training order.
179
+ Missing -> np.nan (imputer will handle if available).
180
+ Values coerced to float robustly.
 
 
181
  """
182
+ vals = []
 
 
 
 
 
 
183
  for f in FEATURES:
184
  if f in payload:
185
+ try:
186
+ vals.append(coerce_float(payload[f]))
187
+ except Exception:
188
+ vals.append(np.nan)
189
  else:
190
+ vals.append(np.nan)
191
+ return np.array(vals, dtype=np.float32)
 
192
 
 
193
 
194
+ def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
195
  if imputer is not None:
196
+ # imputer expects 2D
197
+ return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
198
+ # fallback: replace NaNs with feature means from stats if available, else 0
199
+ out = x.copy()
200
+ for i, f in enumerate(FEATURES):
201
+ if np.isnan(out[i]):
202
+ if f in stats and "mean" in stats[f]:
203
+ out[i] = float(stats[f]["mean"])
204
+ else:
205
+ out[i] = 0.0
206
+ return out
207
+
208
+
209
+ def apply_scaling_or_stats(raw_vec: np.ndarray) -> (np.ndarray, Dict[str, float], str):
210
+ """
211
+ Returns (z_vec, z_detail_dict, mode_str)
212
+ - If scaler present: scaler.transform
213
+ - Else: manual (x-mean)/std using stats
214
+ """
215
  if scaler is not None:
216
+ z = scaler.transform(raw_vec.reshape(1, -1)).astype(np.float32)[0]
217
+ z_detail = {f: float(z[i]) for i, f in enumerate(FEATURES)}
218
+ return z, z_detail, "sklearn_scaler"
 
 
 
 
 
219
  else:
220
+ z = np.zeros_like(raw_vec, dtype=np.float32)
221
+ z_detail: Dict[str, float] = {}
222
+ for i, f in enumerate(FEATURES):
223
+ mean = stats.get(f, {}).get("mean", 0.0)
224
+ sd = stats.get(f, {}).get("std", 1.0)
225
+ if not sd:
226
+ sd = 1.0
227
+ z[i] = (raw_vec[i] - mean) / sd
228
+ z_detail[f] = float(z[i])
229
+ return z, z_detail, "manual_stats"
230
+
231
+
232
+ # ----------------- FastAPI -----------------
233
+ app = FastAPI(title="Static Fingerprint API", version="1.1.0")
234
+ app.add_middleware(
235
+ CORSMiddleware,
236
+ allow_origins=["*"],
237
+ allow_credentials=False,
238
+ allow_methods=["*"],
239
+ allow_headers=["*"],
240
+ )
241
+
242
+ @app.get("/")
243
+ def root():
244
+ return {
245
+ "message": "Static Fingerprint API is running.",
246
+ "try": ["GET /health", "POST /predict", "POST /debug/z"],
247
+ }
248
+
249
+ @app.get("/health")
250
+ def health():
251
+ stats_keys = []
252
+ try:
253
+ if os.path.isfile(STATS_PATH):
254
+ stats_keys = list(load_json(STATS_PATH).keys())
255
+ except Exception:
256
+ pass
257
 
258
  return {
259
+ "status": "ok",
260
+ "classes": CLASSES,
261
+ "features_training_order": FEATURES,
262
+ "features_in_means_std": stats_keys,
263
+ "model_file": MODEL_PATH,
264
+ "imputer": bool(imputer),
265
+ "scaler": bool(scaler),
266
+ "stats_available": bool(stats),
267
  }
268
 
269
+ @app.post("/debug/z")
270
+ async def debug_z(req: Request):
271
+ try:
272
+ payload = await req.json()
273
+ if not isinstance(payload, dict):
274
+ return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
275
+
276
+ raw = build_raw_vector(payload)
277
+ raw_imp = apply_imputer_if_any(raw)
278
+ z, z_detail, mode = apply_scaling_or_stats(raw_imp)
279
+
280
+ rows = []
281
+ for i, f in enumerate(FEATURES):
282
+ rows.append({
283
+ "feature": f,
284
+ "input_value": None if np.isnan(raw[i]) else float(raw[i]),
285
+ "imputed_value": float(raw_imp[i]),
286
+ "z": float(z[i]),
287
+ "mean": stats.get(f, {}).get("mean", None),
288
+ "std": stats.get(f, {}).get("std", None),
289
+ })
290
+
291
+ return {"preprocess_mode": mode, "rows": rows}
292
+ except Exception as e:
293
+ return JSONResponse(status_code=500, content={"error": str(e), "trace": traceback.format_exc()})
294
 
295
  @app.post("/predict")
296
  async def predict(req: Request):
297
+ """
298
+ Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
299
+ Missing features are imputed if imputer present; else filled with means (if stats) or 0.
300
+ """
301
+ try:
302
+ payload = await req.json()
303
+ if not isinstance(payload, dict):
304
+ return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
305
+
306
+ # Build in EXACT training order
307
+ raw = build_raw_vector(payload) # may contain NaNs
308
+ raw_imp = apply_imputer_if_any(raw) # impute
309
+ z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp) # scale / z-score
310
+
311
+ # Predict
312
+ X = z_vec.reshape(1, -1).astype(np.float32)
313
+ raw_logits = model.predict(X, verbose=0)
314
+ probs, mode = decode_logits(raw_logits)
315
+
316
+ # Package response
317
+ pred_idx = int(np.argmax(probs))
318
+ probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
319
+ missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
320
+
321
+ return {
322
+ "input_ok": (len(missing) == 0),
323
+ "missing": missing,
324
+ "preprocess": {
325
+ "imputer": bool(imputer),
326
+ "scaler": bool(scaler),
327
+ "z_mode": z_mode,
328
+ },
329
+ "z_scores": z_detail, # per feature
330
+ "probabilities": probs_dict,
331
+ "predicted_state": CLASSES[pred_idx],
332
+ "debug": {
333
+ "raw_shape": list(raw_logits.shape),
334
+ "decode_mode": mode,
335
+ "raw_first_row": [float(v) for v in raw_logits[0]],
336
+ },
337
+ }
338
+ except Exception as e:
339
+ return JSONResponse(status_code=500, content={"error": str(e), "trace": traceback.format_exc()})