COCODEDE04 commited on
Commit
a850728
·
verified ·
1 Parent(s): 8109a99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -108
app.py CHANGED
@@ -1,5 +1,5 @@
1
- import os, json, traceback
2
- from typing import Any, Dict, List
3
 
4
  import numpy as np
5
  import tensorflow as tf
@@ -7,7 +7,7 @@ from fastapi import FastAPI, Request
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse
9
 
10
- # Try SHAP
11
  try:
12
  import shap
13
  SHAP_AVAILABLE = True
@@ -94,7 +94,8 @@ def load_joblib_if_exists(candidates: List[str]):
94
  p = os.path.join(os.getcwd(), name)
95
  if os.path.isfile(p):
96
  try:
97
- import joblib # lazy import
 
98
  with open(p, "rb") as fh:
99
  obj = joblib.load(fh)
100
  return obj, p, None
@@ -146,6 +147,8 @@ def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
146
  left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
147
  right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
148
  probs = tf.clip_by_value(left - right, 1e-12, 1.0)
 
 
149
  return probs.numpy()
150
 
151
 
@@ -161,14 +164,17 @@ def decode_logits(raw: np.ndarray) -> (np.ndarray, str):
161
  K = len(CLASSES)
162
 
163
  if M == K - 1:
 
164
  probs = coral_probs_from_logits(raw)[0]
165
  return probs, "auto_coral"
166
  elif M == K:
 
167
  row = raw[0]
168
  exps = np.exp(row - np.max(row))
169
  probs = exps / np.sum(exps)
170
  return probs, "auto_softmax"
171
  else:
 
172
  row = raw[0]
173
  s = float(np.sum(np.abs(row)))
174
  probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
@@ -196,6 +202,7 @@ def build_raw_vector(payload: Dict[str, Any]) -> np.ndarray:
196
 
197
  def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
198
  if imputer is not None:
 
199
  return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
200
  # fallback: replace NaNs with feature means from stats if available, else 0
201
  out = x.copy()
@@ -231,53 +238,35 @@ def apply_scaling_or_stats(raw_vec: np.ndarray) -> (np.ndarray, Dict[str, float]
231
  return z, z_detail, "manual_stats"
232
 
233
 
234
- # --------- SHAP: model wrapper & explainer ---------
235
  def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
236
  """
237
- Batch-safe wrapper for SHAP and other callers.
238
-
239
- Input:
240
- z_batch_np: (N, n_features) or (n_features,) in z-space
241
-
242
- Output:
243
- probs: (N, K) matrix of class probabilities
244
  """
245
- z = np.array(z_batch_np, dtype=np.float32)
246
-
247
- # Ensure 2D: (N, D)
248
- if z.ndim == 1:
249
- z = z.reshape(1, -1)
250
-
251
- raw = model.predict(z, verbose=0) # shape: (N, M)
252
  if raw.ndim != 2:
253
  raise ValueError(f"Unexpected raw shape from model: {raw.shape}")
254
-
255
  N, M = raw.shape
256
  K = len(CLASSES)
257
 
258
  if M == K - 1:
259
- # CORAL: logits for K-1 thresholds → K probabilities
260
  probs = coral_probs_from_logits(raw) # (N, K)
261
  elif M == K:
262
- # Softmax or unnormalized scores, per row
263
  exps = np.exp(raw - np.max(raw, axis=1, keepdims=True))
264
- probs = exps / np.sum(exps, axis=1, keepdims=True) # (N, K)
265
  else:
266
- # Fallback: row-wise normalization
267
- s = np.sum(np.abs(raw), axis=1, keepdims=True) # (N, 1)
268
- probs = np.divide(
269
- raw,
270
- s,
271
- out=np.ones_like(raw) / max(M, 1),
272
- where=(s > 0),
273
- ) # (N, M)
274
-
275
  return probs
276
 
277
 
278
  EXPLAINER = None
279
  if SHAP_AVAILABLE:
280
  try:
 
281
  BACKGROUND_Z = np.zeros((50, len(FEATURES)), dtype=np.float32)
282
  EXPLAINER = shap.KernelExplainer(model_proba_from_z, BACKGROUND_Z)
283
  print("SHAP KernelExplainer initialized.")
@@ -325,7 +314,7 @@ def health():
325
  "imputer": bool(imputer),
326
  "scaler": bool(scaler),
327
  "stats_available": bool(stats),
328
- "shap_available": bool(EXPLAINER),
329
  }
330
 
331
 
@@ -361,100 +350,107 @@ async def predict(req: Request):
361
  """
362
  Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
363
  Missing features are imputed if imputer present; else filled with means (if stats) or 0.
364
-
365
- Returns:
366
- - probabilities over classes
367
- - z-scores per indicator
368
- - SHAP contributions for *all* classes (if SHAP is available), in z-space.
369
  """
370
  try:
371
  payload = await req.json()
372
  if not isinstance(payload, dict):
373
- return JSONResponse(
374
- status_code=400,
375
- content={"error": "Expected JSON object"},
376
- )
377
-
378
- # 1) Build raw feature vector in training order
379
- raw = build_raw_vector(payload) # may contain NaNs
380
- raw_imp = apply_imputer_if_any(raw) # impute
381
  z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp) # scale / z-score
382
 
383
- # 2) Predict
384
- X_z = z_vec.reshape(1, -1).astype(np.float32) # (1, D) in z-space
385
- raw_logits = model.predict(X_z, verbose=0) # (1, M)
386
- probs, decode_mode = decode_logits(raw_logits) # (K,)
387
 
388
  pred_idx = int(np.argmax(probs))
389
  probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
390
  missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
391
 
392
- # 3) SHAP for ALL classes (if explainer is available)
393
- shap_block: Dict[str, Any] = {"available": False}
394
- if EXPLAINER is not None and SHAP_AVAILABLE:
395
  try:
396
- # KernelExplainer built with model_proba_from_z, so we pass z-space
397
- shap_vals = EXPLAINER.shap_values(X_z, nsamples=50)
398
- K = len(CLASSES)
399
- D = len(FEATURES)
400
 
401
- all_classes: Dict[str, Dict[str, float]] = {}
402
-
403
- # Case 1: vector-output model → list of length K
404
  if isinstance(shap_vals, list):
405
- if len(shap_vals) != K:
406
- raise ValueError(
407
- f"Expected {K} SHAP arrays (one per class), got {len(shap_vals)}"
408
- )
409
-
410
- for c_idx, cname in enumerate(CLASSES):
411
- arr = np.asarray(shap_vals[c_idx])
412
- if arr.ndim != 2 or arr.shape[0] < 1 or arr.shape[1] != D:
413
- raise ValueError(
414
- f"Unexpected SHAP shape for class {cname}: {arr.shape}, expected (1,{D})"
415
- )
416
- vec = arr[0] # (D,)
417
- all_classes[cname] = {
418
- FEATURES[i]: float(vec[i]) for i in range(D)
419
- }
420
-
421
- # Case 2: some SHAP versions return a single (K,D) array
422
- elif isinstance(shap_vals, np.ndarray):
423
- arr = np.asarray(shap_vals)
424
- if arr.ndim == 3 and arr.shape[0] == 1 and arr.shape[2] == D:
425
- # shape (1, K, D) take [0]
426
- arr = arr[0]
427
- if arr.ndim != 2 or arr.shape[0] != K or arr.shape[1] != D:
 
 
 
 
 
 
428
  raise ValueError(
429
- f"Unexpected SHAP ndarray shape {arr.shape}; "
430
- f"expected (K,{D}) or (1,K,{D})"
431
  )
432
-
433
- for c_idx, cname in enumerate(CLASSES):
434
- vec = arr[c_idx] # (D,)
435
- all_classes[cname] = {
436
- FEATURES[i]: float(vec[i]) for i in range(D)
437
- }
438
 
439
  else:
440
- raise TypeError(
441
- f"Unsupported SHAP output type: {type(shap_vals).__name__}"
 
 
 
 
 
 
 
442
  )
443
 
444
- shap_block = {
445
- "available": True,
446
- "predicted_class": CLASSES[pred_idx],
447
- "all_classes": all_classes,
 
 
 
 
 
 
 
448
  }
449
 
450
- except Exception as e:
451
- shap_block = {
452
- "available": False,
453
- "error": str(e),
454
- "trace": traceback.format_exc(),
455
  }
456
 
457
- # 4) Final response
 
 
 
 
 
458
  return {
459
  "input_ok": (len(missing) == 0),
460
  "missing": missing,
@@ -463,13 +459,13 @@ async def predict(req: Request):
463
  "scaler": bool(scaler),
464
  "z_mode": z_mode,
465
  },
466
- "z_scores": z_detail, # per indicator, in z-space
467
- "probabilities": probs_dict,
468
  "predicted_state": CLASSES[pred_idx],
469
- "shap": shap_block,
470
  "debug": {
471
  "raw_shape": list(raw_logits.shape),
472
- "decode_mode": decode_mode,
473
  "raw_first_row": [float(v) for v in raw_logits[0]],
474
  },
475
  }
@@ -477,5 +473,5 @@ async def predict(req: Request):
477
  except Exception as e:
478
  return JSONResponse(
479
  status_code=500,
480
- content={"error": str(e), "trace": traceback.format_exc()},
481
  )
 
1
+ import os, json, io, traceback
2
+ from typing import Any, Dict, List, Optional
3
 
4
  import numpy as np
5
  import tensorflow as tf
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse
9
 
10
+ # ---------- SHAP optional import ----------
11
  try:
12
  import shap
13
  SHAP_AVAILABLE = True
 
94
  p = os.path.join(os.getcwd(), name)
95
  if os.path.isfile(p):
96
  try:
97
+ # Import inside to avoid hard dependency if not used
98
+ import joblib # type: ignore
99
  with open(p, "rb") as fh:
100
  obj = joblib.load(fh)
101
  return obj, p, None
 
147
  left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
148
  right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
149
  probs = tf.clip_by_value(left - right, 1e-12, 1.0)
150
+ # normalize row-wise just in case
151
+ probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
152
  return probs.numpy()
153
 
154
 
 
164
  K = len(CLASSES)
165
 
166
  if M == K - 1:
167
+ # CORAL logits
168
  probs = coral_probs_from_logits(raw)[0]
169
  return probs, "auto_coral"
170
  elif M == K:
171
+ # Softmax or unnormalized scores
172
  row = raw[0]
173
  exps = np.exp(row - np.max(row))
174
  probs = exps / np.sum(exps)
175
  return probs, "auto_softmax"
176
  else:
177
+ # Fallback: normalize across whatever is there
178
  row = raw[0]
179
  s = float(np.sum(np.abs(row)))
180
  probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
 
202
 
203
  def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
204
  if imputer is not None:
205
+ # imputer expects 2D
206
  return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
207
  # fallback: replace NaNs with feature means from stats if available, else 0
208
  out = x.copy()
 
238
  return z, z_detail, "manual_stats"
239
 
240
 
241
+ # --------- SHAP model wrapper & explainer ---------
242
  def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
243
  """
244
+ Wrapper for SHAP: takes (N, n_features) in z-space and returns (N, K) probabilities.
 
 
 
 
 
 
245
  """
246
+ raw = model.predict(z_batch_np, verbose=0)
 
 
 
 
 
 
247
  if raw.ndim != 2:
248
  raise ValueError(f"Unexpected raw shape from model: {raw.shape}")
 
249
  N, M = raw.shape
250
  K = len(CLASSES)
251
 
252
  if M == K - 1:
253
+ # CORAL
254
  probs = coral_probs_from_logits(raw) # (N, K)
255
  elif M == K:
256
+ # Softmax or scores
257
  exps = np.exp(raw - np.max(raw, axis=1, keepdims=True))
258
+ probs = exps / np.sum(exps, axis=1, keepdims=True)
259
  else:
260
+ # Fallback normalize
261
+ s = np.sum(np.abs(raw), axis=1, keepdims=True)
262
+ probs = np.divide(raw, s, out=np.ones_like(raw) / max(M, 1), where=(s > 0))
 
 
 
 
 
 
263
  return probs
264
 
265
 
266
  EXPLAINER = None
267
  if SHAP_AVAILABLE:
268
  try:
269
+ # Background: 50 "average" institutions at z=0
270
  BACKGROUND_Z = np.zeros((50, len(FEATURES)), dtype=np.float32)
271
  EXPLAINER = shap.KernelExplainer(model_proba_from_z, BACKGROUND_Z)
272
  print("SHAP KernelExplainer initialized.")
 
314
  "imputer": bool(imputer),
315
  "scaler": bool(scaler),
316
  "stats_available": bool(stats),
317
+ "shap_available": bool(EXPLAINER is not None),
318
  }
319
 
320
 
 
350
  """
351
  Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
352
  Missing features are imputed if imputer present; else filled with means (if stats) or 0.
 
 
 
 
 
353
  """
354
  try:
355
  payload = await req.json()
356
  if not isinstance(payload, dict):
357
+ return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
358
+
359
+ # ---------- PREPROCESSING ----------
360
+ raw = build_raw_vector(payload) # may contain NaNs
361
+ raw_imp = apply_imputer_if_any(raw) # impute
 
 
 
362
  z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp) # scale / z-score
363
 
364
+ # ---------- PREDICTION ----------
365
+ X = z_vec.reshape(1, -1).astype(np.float32)
366
+ raw_logits = model.predict(X, verbose=0)
367
+ probs, mode = decode_logits(raw_logits)
368
 
369
  pred_idx = int(np.argmax(probs))
370
  probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
371
  missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
372
 
373
+ # ---------- SHAP EXPLANATION (predicted class only) ----------
374
+ shap_out = {"error": "SHAP not computed"}
375
+ if EXPLAINER is not None:
376
  try:
377
+ shap_vals = EXPLAINER.shap_values(X, nsamples=100)
 
 
 
378
 
379
+ # 1) Pull raw SHAP tensor
 
 
380
  if isinstance(shap_vals, list):
381
+ # Classic multi-output: list[len = n_classes], each (n_samples, n_features)
382
+ raw_sv = np.array(shap_vals[pred_idx])
383
+ else:
384
+ # Single array, possibly (n_samples, n_features) or (n_samples, n_features, n_outputs)
385
+ raw_sv = np.array(shap_vals)
386
+
387
+ # 2) Normalize shapes to a 1D vector (n_features,) for the predicted class
388
+ if raw_sv.ndim == 1:
389
+ # Already (n_features,)
390
+ shap_vec = raw_sv.astype(float)
391
+
392
+ elif raw_sv.ndim == 2:
393
+ # (n_samples, n_features) or (n_features, 1)
394
+ if raw_sv.shape[0] == 1:
395
+ # (1, n_features)
396
+ shap_vec = raw_sv[0].astype(float)
397
+ elif raw_sv.shape[1] == 1:
398
+ # (n_features, 1)
399
+ shap_vec = raw_sv[:, 0].astype(float)
400
+ else:
401
+ # assume (n_samples, n_features), take first sample
402
+ shap_vec = raw_sv[0].astype(float)
403
+
404
+ elif raw_sv.ndim == 3:
405
+ # Most likely (n_samples, n_features, n_outputs)
406
+ n_samples, n_features, n_outputs = raw_sv.shape
407
+ if n_samples < 1:
408
+ raise ValueError(f"SHAP 3D output has zero samples: {raw_sv.shape}")
409
+ if pred_idx >= n_outputs:
410
  raise ValueError(
411
+ f"SHAP 3D output has only {n_outputs} outputs, "
412
+ f"cannot index class {pred_idx}"
413
  )
414
+ # take first sample, all features, predicted class
415
+ shap_vec = raw_sv[0, :, pred_idx].astype(float)
 
 
 
 
416
 
417
  else:
418
+ # Fallback: flatten all sample dims, keep first feature-block
419
+ flat = raw_sv.reshape(raw_sv.shape[0], -1)
420
+ shap_vec = flat[0].astype(float)
421
+
422
+ # 3) Sanity check length
423
+ if shap_vec.shape[0] != len(FEATURES):
424
+ raise ValueError(
425
+ f"Unexpected SHAP vector length {shap_vec.shape[0]} "
426
+ f"(expected {len(FEATURES)})"
427
  )
428
 
429
+ # 4) Expected value (baseline) for the predicted class
430
+ exp_raw = EXPLAINER.expected_value
431
+ if isinstance(exp_raw, (list, np.ndarray)):
432
+ exp_val = float(np.array(exp_raw)[pred_idx])
433
+ else:
434
+ exp_val = float(exp_raw)
435
+
436
+ # 5) Map feature -> contribution
437
+ shap_feature_contribs = {
438
+ FEATURES[i]: float(shap_vec[i])
439
+ for i in range(len(FEATURES))
440
  }
441
 
442
+ shap_out = {
443
+ "explained_class": CLASSES[pred_idx],
444
+ "expected_value": exp_val,
445
+ "shap_values": shap_feature_contribs,
 
446
  }
447
 
448
+ except Exception as e:
449
+ shap_out = {"error": str(e), "trace": traceback.format_exc()}
450
+ else:
451
+ shap_out = {"error": "SHAP not available on server"}
452
+
453
+ # ---------- RESPONSE ----------
454
  return {
455
  "input_ok": (len(missing) == 0),
456
  "missing": missing,
 
459
  "scaler": bool(scaler),
460
  "z_mode": z_mode,
461
  },
462
+ "z_scores": z_detail, # per feature (z-space)
463
+ "probabilities": probs_dict, # per class
464
  "predicted_state": CLASSES[pred_idx],
465
+ "shap": shap_out, # SHAP for predicted state only
466
  "debug": {
467
  "raw_shape": list(raw_logits.shape),
468
+ "decode_mode": mode,
469
  "raw_first_row": [float(v) for v in raw_logits[0]],
470
  },
471
  }
 
473
  except Exception as e:
474
  return JSONResponse(
475
  status_code=500,
476
+ content={"error": str(e), "trace": traceback.format_exc()}
477
  )