Spaces:

COCODEDE04
/

SF_FastAPI

Sleeping

App Files Files Community

COCODEDE04 commited on Nov 17, 2025

Commit

a6c0646

verified ·

1 Parent(s): 45857b7

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -94

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import os, json, io, traceback
-from typing import Any, Dict, List, Optional
 import numpy as np
 import tensorflow as tf
@@ -7,7 +7,7 @@ from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
-# ---------- SHAP optional import ----------
 try:
     import shap
     SHAP_AVAILABLE = True
@@ -94,8 +94,7 @@ def load_joblib_if_exists(candidates: List[str]):
         p = os.path.join(os.getcwd(), name)
         if os.path.isfile(p):
             try:
-                # Import inside to avoid hard dependency if not used
-                import joblib  # type: ignore
                 with open(p, "rb") as fh:
                     obj = joblib.load(fh)
                 return obj, p, None
@@ -147,8 +146,6 @@ def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
     left  = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
     right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
     probs = tf.clip_by_value(left - right, 1e-12, 1.0)
-    # normalize row-wise just in case
-    probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
     return probs.numpy()
@@ -164,17 +161,14 @@ def decode_logits(raw: np.ndarray) -> (np.ndarray, str):
     K = len(CLASSES)
     if M == K - 1:
-        # CORAL logits
         probs = coral_probs_from_logits(raw)[0]
         return probs, "auto_coral"
     elif M == K:
-        # Softmax or unnormalized scores
         row = raw[0]
         exps = np.exp(row - np.max(row))
         probs = exps / np.sum(exps)
         return probs, "auto_softmax"
     else:
-        # Fallback: normalize across whatever is there
         row = raw[0]
         s = float(np.sum(np.abs(row)))
         probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
@@ -202,7 +196,6 @@ def build_raw_vector(payload: Dict[str, Any]) -> np.ndarray:
 def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
     if imputer is not None:
-        # imputer expects 2D
         return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
     # fallback: replace NaNs with feature means from stats if available, else 0
     out = x.copy()
@@ -238,7 +231,7 @@ def apply_scaling_or_stats(raw_vec: np.ndarray) -> (np.ndarray, Dict[str, float]
         return z, z_detail, "manual_stats"
-# --------- SHAP model wrapper & explainer ---------
 def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
     """
     Wrapper for SHAP: takes (N, n_features) in z-space and returns (N, K) probabilities.
@@ -250,14 +243,11 @@ def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
     K = len(CLASSES)
     if M == K - 1:
-        # CORAL
         probs = coral_probs_from_logits(raw)  # (N, K)
     elif M == K:
-        # Softmax or scores
         exps = np.exp(raw - np.max(raw, axis=1, keepdims=True))
         probs = exps / np.sum(exps, axis=1, keepdims=True)
     else:
-        # Fallback normalize
         s = np.sum(np.abs(raw), axis=1, keepdims=True)
         probs = np.divide(raw, s, out=np.ones_like(raw) / max(M, 1), where=(s > 0))
     return probs
@@ -266,7 +256,6 @@ def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
 EXPLAINER = None
 if SHAP_AVAILABLE:
     try:
-        # Background: 50 "average" institutions at z=0
         BACKGROUND_Z = np.zeros((50, len(FEATURES)), dtype=np.float32)
         EXPLAINER = shap.KernelExplainer(model_proba_from_z, BACKGROUND_Z)
         print("SHAP KernelExplainer initialized.")
@@ -314,7 +303,7 @@ def health():
         "imputer": bool(imputer),
         "scaler": bool(scaler),
         "stats_available": bool(stats),
-        "shap_available": bool(EXPLAINER is not None),
     }
@@ -356,12 +345,12 @@ async def predict(req: Request):
         if not isinstance(payload, dict):
             return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
-        # ---------- PREPROCESSING ----------
         raw = build_raw_vector(payload)            # may contain NaNs
         raw_imp = apply_imputer_if_any(raw)        # impute
         z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scale / z-score
-        # ---------- PREDICTION ----------
         X = z_vec.reshape(1, -1).astype(np.float32)
         raw_logits = model.predict(X, verbose=0)
         probs, mode = decode_logits(raw_logits)
@@ -370,87 +359,62 @@ async def predict(req: Request):
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
-                # ---------- SHAP EXPLANATION (predicted class only) ----------
-        shap_out = {"error": "SHAP not computed"}
         if EXPLAINER is not None:
             try:
-                shap_vals = EXPLAINER.shap_values(X, nsamples=100)
-                # 1) Pull raw SHAP tensor
-                if isinstance(shap_vals, list):
-                    # Classic multi-output: list[len = n_classes], each (n_samples, n_features)
-                    raw_sv = np.array(shap_vals[pred_idx])
                 else:
-                    # Single array, possibly (n_samples, n_features) or (n_samples, n_features, n_outputs)
-                    raw_sv = np.array(shap_vals)
-                # 2) Normalize shapes to a 1D vector (n_features,) for the predicted class
-                if raw_sv.ndim == 1:
-                    # Already (n_features,)
-                    shap_vec = raw_sv.astype(float)
-                elif raw_sv.ndim == 2:
-                    # (n_samples, n_features) or (n_features, 1)
-                    if raw_sv.shape[0] == 1:
-                        # (1, n_features)
-                        shap_vec = raw_sv[0].astype(float)
-                    elif raw_sv.shape[1] == 1:
-                        # (n_features, 1)
-                        shap_vec = raw_sv[:, 0].astype(float)
                     else:
-                        # assume (n_samples, n_features), take first sample
-                        shap_vec = raw_sv[0].astype(float)
-                elif raw_sv.ndim == 3:
-                    # Most likely (n_samples, n_features, n_outputs)
-                    n_samples, n_features, n_outputs = raw_sv.shape
-                    if n_samples < 1:
-                        raise ValueError(f"SHAP 3D output has zero samples: {raw_sv.shape}")
-                    if pred_idx >= n_outputs:
-                        raise ValueError(
-                            f"SHAP 3D output has only {n_outputs} outputs, "
-                            f"cannot index class {pred_idx}"
-                        )
-                    # take first sample, all features, predicted class
-                    shap_vec = raw_sv[0, :, pred_idx].astype(float)
-                else:
-                    # Fallback: flatten all sample dims, keep first feature-block
-                    flat = raw_sv.reshape(raw_sv.shape[0], -1)
-                    shap_vec = flat[0].astype(float)
-                # 3) Sanity check length
-                if shap_vec.shape[0] != len(FEATURES):
-                    raise ValueError(
-                        f"Unexpected SHAP vector length {shap_vec.shape[0]} "
-                        f"(expected {len(FEATURES)})"
-                    )
-                # 4) Expected value (baseline) for the predicted class
-                exp_raw = EXPLAINER.expected_value
-                if isinstance(exp_raw, (list, np.ndarray)):
-                    exp_val = float(np.array(exp_raw)[pred_idx])
-                else:
-                    exp_val = float(exp_raw)
-                # 5) Map feature -> contribution
-                shap_feature_contribs = {
-                    FEATURES[i]: float(shap_vec[i])
-                    for i in range(len(FEATURES))
-                }
-                shap_out = {
-                    "explained_class": CLASSES[pred_idx],
-                    "expected_value": exp_val,
-                    "shap_values": shap_feature_contribs,
-                }
             except Exception as e:
-                shap_out = {"error": str(e), "trace": traceback.format_exc()}
-        else:
-            shap_out = {"error": "SHAP not available on server"}
-        # ---------- RESPONSE ----------
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
@@ -459,10 +423,10 @@ async def predict(req: Request):
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
-            "z_scores": z_detail,          # per feature (z-space)
-            "probabilities": probs_dict,   # per class
             "predicted_state": CLASSES[pred_idx],
-            "shap": shap_out,              # SHAP for predicted state only
             "debug": {
                 "raw_shape": list(raw_logits.shape),
                 "decode_mode": mode,

+import os, json, traceback
+from typing import Any, Dict, List
 import numpy as np
 import tensorflow as tf
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+# Try SHAP
 try:
     import shap
     SHAP_AVAILABLE = True
         p = os.path.join(os.getcwd(), name)
         if os.path.isfile(p):
             try:
+                import joblib  # lazy import
                 with open(p, "rb") as fh:
                     obj = joblib.load(fh)
                 return obj, p, None
     left  = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
     right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
     probs = tf.clip_by_value(left - right, 1e-12, 1.0)
     return probs.numpy()
     K = len(CLASSES)
     if M == K - 1:
         probs = coral_probs_from_logits(raw)[0]
         return probs, "auto_coral"
     elif M == K:
         row = raw[0]
         exps = np.exp(row - np.max(row))
         probs = exps / np.sum(exps)
         return probs, "auto_softmax"
     else:
         row = raw[0]
         s = float(np.sum(np.abs(row)))
         probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
 def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
     if imputer is not None:
         return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
     # fallback: replace NaNs with feature means from stats if available, else 0
     out = x.copy()
         return z, z_detail, "manual_stats"
+# --------- SHAP: model wrapper & explainer ---------
 def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
     """
     Wrapper for SHAP: takes (N, n_features) in z-space and returns (N, K) probabilities.
     K = len(CLASSES)
     if M == K - 1:
         probs = coral_probs_from_logits(raw)  # (N, K)
     elif M == K:
         exps = np.exp(raw - np.max(raw, axis=1, keepdims=True))
         probs = exps / np.sum(exps, axis=1, keepdims=True)
     else:
         s = np.sum(np.abs(raw), axis=1, keepdims=True)
         probs = np.divide(raw, s, out=np.ones_like(raw) / max(M, 1), where=(s > 0))
     return probs
 EXPLAINER = None
 if SHAP_AVAILABLE:
     try:
         BACKGROUND_Z = np.zeros((50, len(FEATURES)), dtype=np.float32)
         EXPLAINER = shap.KernelExplainer(model_proba_from_z, BACKGROUND_Z)
         print("SHAP KernelExplainer initialized.")
         "imputer": bool(imputer),
         "scaler": bool(scaler),
         "stats_available": bool(stats),
+        "shap_available": bool(EXPLAINER),
     }
         if not isinstance(payload, dict):
             return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
+        # Build in EXACT training order
         raw = build_raw_vector(payload)            # may contain NaNs
         raw_imp = apply_imputer_if_any(raw)        # impute
         z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scale / z-score
+        # Predict
         X = z_vec.reshape(1, -1).astype(np.float32)
         raw_logits = model.predict(X, verbose=0)
         probs, mode = decode_logits(raw_logits)
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
+        # ---------- SHAP for ALL classes ----------
+        shap_payload: Dict[str, Any] = {"available": bool(EXPLAINER)}
         if EXPLAINER is not None:
             try:
+                shap_raw = EXPLAINER.shap_values(X, nsamples=100)
+                shap_all_classes: Dict[str, Dict[str, float]] = {}
+                if isinstance(shap_raw, list):
+                    # standard KernelExplainer multi-output: list of length K, each (1, n_features)
+                    for c_idx, cls_name in enumerate(CLASSES):
+                        if c_idx >= len(shap_raw):
+                            break
+                        arr = np.array(shap_raw[c_idx])
+                        if arr.ndim == 2:
+                            vec = arr[0]
+                        else:
+                            vec = arr.reshape(-1)
+                        m = min(len(FEATURES), len(vec))
+                        shap_all_classes[cls_name] = {
+                            FEATURES[i]: float(vec[i]) for i in range(m)
+                        }
                 else:
+                    # Fallback: single ndarray, try to interpret first dim as classes
+                    arr = np.array(shap_raw)
+                    if arr.ndim == 3:
+                        # e.g. (K, 1, n_features) or (1, K, n_features)
+                        if arr.shape[1] == 1:
+                            arr2 = arr[:, 0, :]
+                        elif arr.shape[0] == 1:
+                            arr2 = arr[0, :, :]
+                        else:
+                            arr2 = arr.reshape(arr.shape[0], -1)
+                    elif arr.ndim == 2:
+                        # (K, n_features)
+                        arr2 = arr
                     else:
+                        raise ValueError(f"Unsupported SHAP array shape: {arr.shape}")
+                    K_eff = min(arr2.shape[0], len(CLASSES))
+                    for c_idx in range(K_eff):
+                        vec = arr2[c_idx]
+                        m = min(len(FEATURES), len(vec))
+                        shap_all_classes[CLASSES[c_idx]] = {
+                            FEATURES[i]: float(vec[i]) for i in range(m)
+                        }
+                shap_payload["all_classes"] = shap_all_classes
             except Exception as e:
+                shap_payload = {
+                    "available": False,
+                    "error": str(e),
+                    "trace": traceback.format_exc(),
+                }
+        # ---------- final response ----------
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
+            "z_scores": z_detail,      # per feature
+            "probabilities": probs_dict,
             "predicted_state": CLASSES[pred_idx],
+            "shap": shap_payload,      # FULL per-class SHAP matrix
             "debug": {
                 "raw_shape": list(raw_logits.shape),
                 "decode_mode": mode,