Spaces:

COCODEDE04
/

SF_FastAPI

Sleeping

App Files Files Community

COCODEDE04 commited on Nov 17, 2025

Commit

a850728

verified ·

1 Parent(s): 8109a99

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -108

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import os, json, traceback
-from typing import Any, Dict, List
 import numpy as np
 import tensorflow as tf
@@ -7,7 +7,7 @@ from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
-# Try SHAP
 try:
     import shap
     SHAP_AVAILABLE = True
@@ -94,7 +94,8 @@ def load_joblib_if_exists(candidates: List[str]):
         p = os.path.join(os.getcwd(), name)
         if os.path.isfile(p):
             try:
-                import joblib  # lazy import
                 with open(p, "rb") as fh:
                     obj = joblib.load(fh)
                 return obj, p, None
@@ -146,6 +147,8 @@ def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
     left  = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
     right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
     probs = tf.clip_by_value(left - right, 1e-12, 1.0)
     return probs.numpy()
@@ -161,14 +164,17 @@ def decode_logits(raw: np.ndarray) -> (np.ndarray, str):
     K = len(CLASSES)
     if M == K - 1:
         probs = coral_probs_from_logits(raw)[0]
         return probs, "auto_coral"
     elif M == K:
         row = raw[0]
         exps = np.exp(row - np.max(row))
         probs = exps / np.sum(exps)
         return probs, "auto_softmax"
     else:
         row = raw[0]
         s = float(np.sum(np.abs(row)))
         probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
@@ -196,6 +202,7 @@ def build_raw_vector(payload: Dict[str, Any]) -> np.ndarray:
 def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
     if imputer is not None:
         return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
     # fallback: replace NaNs with feature means from stats if available, else 0
     out = x.copy()
@@ -231,53 +238,35 @@ def apply_scaling_or_stats(raw_vec: np.ndarray) -> (np.ndarray, Dict[str, float]
         return z, z_detail, "manual_stats"
-# --------- SHAP: model wrapper & explainer ---------
 def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
     """
-    Batch-safe wrapper for SHAP and other callers.
-    Input:
-        z_batch_np: (N, n_features) or (n_features,) in z-space
-    Output:
-        probs: (N, K) matrix of class probabilities
     """
-    z = np.array(z_batch_np, dtype=np.float32)
-    # Ensure 2D: (N, D)
-    if z.ndim == 1:
-        z = z.reshape(1, -1)
-    raw = model.predict(z, verbose=0)  # shape: (N, M)
     if raw.ndim != 2:
         raise ValueError(f"Unexpected raw shape from model: {raw.shape}")
     N, M = raw.shape
     K = len(CLASSES)
     if M == K - 1:
-        # CORAL: logits for K-1 thresholds → K probabilities
         probs = coral_probs_from_logits(raw)  # (N, K)
     elif M == K:
-        # Softmax or unnormalized scores, per row
         exps = np.exp(raw - np.max(raw, axis=1, keepdims=True))
-        probs = exps / np.sum(exps, axis=1, keepdims=True)  # (N, K)
     else:
-        # Fallback: row-wise normalization
-        s = np.sum(np.abs(raw), axis=1, keepdims=True)      # (N, 1)
-        probs = np.divide(
-            raw,
-            s,
-            out=np.ones_like(raw) / max(M, 1),
-            where=(s > 0),
-        )  # (N, M)
     return probs
 EXPLAINER = None
 if SHAP_AVAILABLE:
     try:
         BACKGROUND_Z = np.zeros((50, len(FEATURES)), dtype=np.float32)
         EXPLAINER = shap.KernelExplainer(model_proba_from_z, BACKGROUND_Z)
         print("SHAP KernelExplainer initialized.")
@@ -325,7 +314,7 @@ def health():
         "imputer": bool(imputer),
         "scaler": bool(scaler),
         "stats_available": bool(stats),
-        "shap_available": bool(EXPLAINER),
     }
@@ -361,100 +350,107 @@ async def predict(req: Request):
     """
     Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
     Missing features are imputed if imputer present; else filled with means (if stats) or 0.
-    Returns:
-      - probabilities over classes
-      - z-scores per indicator
-      - SHAP contributions for *all* classes (if SHAP is available), in z-space.
     """
     try:
         payload = await req.json()
         if not isinstance(payload, dict):
-            return JSONResponse(
-                status_code=400,
-                content={"error": "Expected JSON object"},
-            )
-        # 1) Build raw feature vector in training order
-        raw = build_raw_vector(payload)                 # may contain NaNs
-        raw_imp = apply_imputer_if_any(raw)             # impute
         z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scale / z-score
-        # 2) Predict
-        X_z = z_vec.reshape(1, -1).astype(np.float32)   # (1, D) in z-space
-        raw_logits = model.predict(X_z, verbose=0)      # (1, M)
-        probs, decode_mode = decode_logits(raw_logits)  # (K,)
         pred_idx = int(np.argmax(probs))
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
-        # 3) SHAP for ALL classes (if explainer is available)
-        shap_block: Dict[str, Any] = {"available": False}
-        if EXPLAINER is not None and SHAP_AVAILABLE:
             try:
-                # KernelExplainer built with model_proba_from_z, so we pass z-space
-                shap_vals = EXPLAINER.shap_values(X_z, nsamples=50)
-                K = len(CLASSES)
-                D = len(FEATURES)
-                all_classes: Dict[str, Dict[str, float]] = {}
-                # Case 1: vector-output model → list of length K
                 if isinstance(shap_vals, list):
-                    if len(shap_vals) != K:
-                        raise ValueError(
-                            f"Expected {K} SHAP arrays (one per class), got {len(shap_vals)}"
-                        )
-                    for c_idx, cname in enumerate(CLASSES):
-                        arr = np.asarray(shap_vals[c_idx])
-                        if arr.ndim != 2 or arr.shape[0] < 1 or arr.shape[1] != D:
-                            raise ValueError(
-                                f"Unexpected SHAP shape for class {cname}: {arr.shape}, expected (1,{D})"
-                            )
-                        vec = arr[0]  # (D,)
-                        all_classes[cname] = {
-                            FEATURES[i]: float(vec[i]) for i in range(D)
-                        }
-                # Case 2: some SHAP versions return a single (K,D) array
-                elif isinstance(shap_vals, np.ndarray):
-                    arr = np.asarray(shap_vals)
-                    if arr.ndim == 3 and arr.shape[0] == 1 and arr.shape[2] == D:
-                        # shape (1, K, D) → take [0]
-                        arr = arr[0]
-                    if arr.ndim != 2 or arr.shape[0] != K or arr.shape[1] != D:
                         raise ValueError(
-                            f"Unexpected SHAP ndarray shape {arr.shape}; "
-                            f"expected (K,{D}) or (1,K,{D})"
                         )
-                    for c_idx, cname in enumerate(CLASSES):
-                        vec = arr[c_idx]  # (D,)
-                        all_classes[cname] = {
-                            FEATURES[i]: float(vec[i]) for i in range(D)
-                        }
                 else:
-                    raise TypeError(
-                        f"Unsupported SHAP output type: {type(shap_vals).__name__}"
                     )
-                shap_block = {
-                    "available": True,
-                    "predicted_class": CLASSES[pred_idx],
-                    "all_classes": all_classes,
                 }
-            except Exception as e:
-                shap_block = {
-                    "available": False,
-                    "error": str(e),
-                    "trace": traceback.format_exc(),
                 }
-        # 4) Final response
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
@@ -463,13 +459,13 @@ async def predict(req: Request):
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
-            "z_scores": z_detail,           # per indicator, in z-space
-            "probabilities": probs_dict,
             "predicted_state": CLASSES[pred_idx],
-            "shap": shap_block,
             "debug": {
                 "raw_shape": list(raw_logits.shape),
-                "decode_mode": decode_mode,
                 "raw_first_row": [float(v) for v in raw_logits[0]],
             },
         }
@@ -477,5 +473,5 @@ async def predict(req: Request):
     except Exception as e:
         return JSONResponse(
             status_code=500,
-            content={"error": str(e), "trace": traceback.format_exc()},
         )

+import os, json, io, traceback
+from typing import Any, Dict, List, Optional
 import numpy as np
 import tensorflow as tf
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+# ---------- SHAP optional import ----------
 try:
     import shap
     SHAP_AVAILABLE = True
         p = os.path.join(os.getcwd(), name)
         if os.path.isfile(p):
             try:
+                # Import inside to avoid hard dependency if not used
+                import joblib  # type: ignore
                 with open(p, "rb") as fh:
                     obj = joblib.load(fh)
                 return obj, p, None
     left  = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
     right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
     probs = tf.clip_by_value(left - right, 1e-12, 1.0)
+    # normalize row-wise just in case
+    probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
     return probs.numpy()
     K = len(CLASSES)
     if M == K - 1:
+        # CORAL logits
         probs = coral_probs_from_logits(raw)[0]
         return probs, "auto_coral"
     elif M == K:
+        # Softmax or unnormalized scores
         row = raw[0]
         exps = np.exp(row - np.max(row))
         probs = exps / np.sum(exps)
         return probs, "auto_softmax"
     else:
+        # Fallback: normalize across whatever is there
         row = raw[0]
         s = float(np.sum(np.abs(row)))
         probs = (row / s) if s > 0 else np.ones_like(row) / len(row)
 def apply_imputer_if_any(x: np.ndarray) -> np.ndarray:
     if imputer is not None:
+        # imputer expects 2D
         return imputer.transform(x.reshape(1, -1)).astype(np.float32)[0]
     # fallback: replace NaNs with feature means from stats if available, else 0
     out = x.copy()
         return z, z_detail, "manual_stats"
+# --------- SHAP model wrapper & explainer ---------
 def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
     """
+    Wrapper for SHAP: takes (N, n_features) in z-space and returns (N, K) probabilities.
     """
+    raw = model.predict(z_batch_np, verbose=0)
     if raw.ndim != 2:
         raise ValueError(f"Unexpected raw shape from model: {raw.shape}")
     N, M = raw.shape
     K = len(CLASSES)
     if M == K - 1:
+        # CORAL
         probs = coral_probs_from_logits(raw)  # (N, K)
     elif M == K:
+        # Softmax or scores
         exps = np.exp(raw - np.max(raw, axis=1, keepdims=True))
+        probs = exps / np.sum(exps, axis=1, keepdims=True)
     else:
+        # Fallback normalize
+        s = np.sum(np.abs(raw), axis=1, keepdims=True)
+        probs = np.divide(raw, s, out=np.ones_like(raw) / max(M, 1), where=(s > 0))
     return probs
 EXPLAINER = None
 if SHAP_AVAILABLE:
     try:
+        # Background: 50 "average" institutions at z=0
         BACKGROUND_Z = np.zeros((50, len(FEATURES)), dtype=np.float32)
         EXPLAINER = shap.KernelExplainer(model_proba_from_z, BACKGROUND_Z)
         print("SHAP KernelExplainer initialized.")
         "imputer": bool(imputer),
         "scaler": bool(scaler),
         "stats_available": bool(stats),
+        "shap_available": bool(EXPLAINER is not None),
     }
     """
     Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
     Missing features are imputed if imputer present; else filled with means (if stats) or 0.
     """
     try:
         payload = await req.json()
         if not isinstance(payload, dict):
+            return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
+        # ---------- PREPROCESSING ----------
+        raw = build_raw_vector(payload)            # may contain NaNs
+        raw_imp = apply_imputer_if_any(raw)        # impute
         z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scale / z-score
+        # ---------- PREDICTION ----------
+        X = z_vec.reshape(1, -1).astype(np.float32)
+        raw_logits = model.predict(X, verbose=0)
+        probs, mode = decode_logits(raw_logits)
         pred_idx = int(np.argmax(probs))
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
+                # ---------- SHAP EXPLANATION (predicted class only) ----------
+        shap_out = {"error": "SHAP not computed"}
+        if EXPLAINER is not None:
             try:
+                shap_vals = EXPLAINER.shap_values(X, nsamples=100)
+                # 1) Pull raw SHAP tensor
                 if isinstance(shap_vals, list):
+                    # Classic multi-output: list[len = n_classes], each (n_samples, n_features)
+                    raw_sv = np.array(shap_vals[pred_idx])
+                else:
+                    # Single array, possibly (n_samples, n_features) or (n_samples, n_features, n_outputs)
+                    raw_sv = np.array(shap_vals)
+                # 2) Normalize shapes to a 1D vector (n_features,) for the predicted class
+                if raw_sv.ndim == 1:
+                    # Already (n_features,)
+                    shap_vec = raw_sv.astype(float)
+                elif raw_sv.ndim == 2:
+                    # (n_samples, n_features) or (n_features, 1)
+                    if raw_sv.shape[0] == 1:
+                        # (1, n_features)
+                        shap_vec = raw_sv[0].astype(float)
+                    elif raw_sv.shape[1] == 1:
+                        # (n_features, 1)
+                        shap_vec = raw_sv[:, 0].astype(float)
+                    else:
+                        # assume (n_samples, n_features), take first sample
+                        shap_vec = raw_sv[0].astype(float)
+                elif raw_sv.ndim == 3:
+                    # Most likely (n_samples, n_features, n_outputs)
+                    n_samples, n_features, n_outputs = raw_sv.shape
+                    if n_samples < 1:
+                        raise ValueError(f"SHAP 3D output has zero samples: {raw_sv.shape}")
+                    if pred_idx >= n_outputs:
                         raise ValueError(
+                            f"SHAP 3D output has only {n_outputs} outputs, "
+                            f"cannot index class {pred_idx}"
                         )
+                    # take first sample, all features, predicted class
+                    shap_vec = raw_sv[0, :, pred_idx].astype(float)
                 else:
+                    # Fallback: flatten all sample dims, keep first feature-block
+                    flat = raw_sv.reshape(raw_sv.shape[0], -1)
+                    shap_vec = flat[0].astype(float)
+                # 3) Sanity check length
+                if shap_vec.shape[0] != len(FEATURES):
+                    raise ValueError(
+                        f"Unexpected SHAP vector length {shap_vec.shape[0]} "
+                        f"(expected {len(FEATURES)})"
                     )
+                # 4) Expected value (baseline) for the predicted class
+                exp_raw = EXPLAINER.expected_value
+                if isinstance(exp_raw, (list, np.ndarray)):
+                    exp_val = float(np.array(exp_raw)[pred_idx])
+                else:
+                    exp_val = float(exp_raw)
+                # 5) Map feature -> contribution
+                shap_feature_contribs = {
+                    FEATURES[i]: float(shap_vec[i])
+                    for i in range(len(FEATURES))
                 }
+                shap_out = {
+                    "explained_class": CLASSES[pred_idx],
+                    "expected_value": exp_val,
+                    "shap_values": shap_feature_contribs,
                 }
+            except Exception as e:
+                shap_out = {"error": str(e), "trace": traceback.format_exc()}
+        else:
+            shap_out = {"error": "SHAP not available on server"}
+        # ---------- RESPONSE ----------
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
+            "z_scores": z_detail,          # per feature (z-space)
+            "probabilities": probs_dict,   # per class
             "predicted_state": CLASSES[pred_idx],
+            "shap": shap_out,              # SHAP for predicted state only
             "debug": {
                 "raw_shape": list(raw_logits.shape),
+                "decode_mode": mode,
                 "raw_first_row": [float(v) for v in raw_logits[0]],
             },
         }
     except Exception as e:
         return JSONResponse(
             status_code=500,
+            content={"error": str(e), "trace": traceback.format_exc()}
         )