Spaces:

COCODEDE04
/

SF_FastAPI

Sleeping

App Files Files Community

COCODEDE04 commited on Nov 17, 2025

Commit

706263e

verified ·

1 Parent(s): a6c0646

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -60

app.py CHANGED Viewed

@@ -339,73 +339,87 @@ async def predict(req: Request):
     """
     Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
     Missing features are imputed if imputer present; else filled with means (if stats) or 0.
     """
     try:
         payload = await req.json()
         if not isinstance(payload, dict):
             return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
-        # Build in EXACT training order
         raw = build_raw_vector(payload)            # may contain NaNs
-        raw_imp = apply_imputer_if_any(raw)        # impute
-        z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scale / z-score
-        # Predict
-        X = z_vec.reshape(1, -1).astype(np.float32)
-        raw_logits = model.predict(X, verbose=0)
-        probs, mode = decode_logits(raw_logits)
         pred_idx = int(np.argmax(probs))
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
-        # ---------- SHAP for ALL classes ----------
-        shap_payload: Dict[str, Any] = {"available": bool(EXPLAINER)}
-        if EXPLAINER is not None:
             try:
-                shap_raw = EXPLAINER.shap_values(X, nsamples=100)
-                shap_all_classes: Dict[str, Dict[str, float]] = {}
-                if isinstance(shap_raw, list):
-                    # standard KernelExplainer multi-output: list of length K, each (1, n_features)
-                    for c_idx, cls_name in enumerate(CLASSES):
-                        if c_idx >= len(shap_raw):
-                            break
-                        arr = np.array(shap_raw[c_idx])
-                        if arr.ndim == 2:
-                            vec = arr[0]
-                        else:
-                            vec = arr.reshape(-1)
-                        m = min(len(FEATURES), len(vec))
-                        shap_all_classes[cls_name] = {
-                            FEATURES[i]: float(vec[i]) for i in range(m)
-                        }
                 else:
-                    # Fallback: single ndarray, try to interpret first dim as classes
-                    arr = np.array(shap_raw)
-                    if arr.ndim == 3:
-                        # e.g. (K, 1, n_features) or (1, K, n_features)
-                        if arr.shape[1] == 1:
-                            arr2 = arr[:, 0, :]
-                        elif arr.shape[0] == 1:
-                            arr2 = arr[0, :, :]
-                        else:
-                            arr2 = arr.reshape(arr.shape[0], -1)
-                    elif arr.ndim == 2:
-                        # (K, n_features)
-                        arr2 = arr
-                    else:
-                        raise ValueError(f"Unsupported SHAP array shape: {arr.shape}")
-                    K_eff = min(arr2.shape[0], len(CLASSES))
-                    for c_idx in range(K_eff):
-                        vec = arr2[c_idx]
-                        m = min(len(FEATURES), len(vec))
-                        shap_all_classes[CLASSES[c_idx]] = {
-                            FEATURES[i]: float(vec[i]) for i in range(m)
-                        }
-                shap_payload["all_classes"] = shap_all_classes
             except Exception as e:
                 shap_payload = {
@@ -414,7 +428,7 @@ async def predict(req: Request):
                     "trace": traceback.format_exc(),
                 }
-        # ---------- final response ----------
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
@@ -423,13 +437,13 @@ async def predict(req: Request):
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
-            "z_scores": z_detail,      # per feature
-            "probabilities": probs_dict,
-            "predicted_state": CLASSES[pred_idx],
-            "shap": shap_payload,      # FULL per-class SHAP matrix
             "debug": {
                 "raw_shape": list(raw_logits.shape),
-                "decode_mode": mode,
                 "raw_first_row": [float(v) for v in raw_logits[0]],
             },
         }
@@ -437,5 +451,6 @@ async def predict(req: Request):
     except Exception as e:
         return JSONResponse(
             status_code=500,
-            content={"error": str(e), "trace": traceback.format_exc()}
-        )

     """
     Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
     Missing features are imputed if imputer present; else filled with means (if stats) or 0.
+    This endpoint ALSO computes SHAP values for the *predicted class only*,
+    returning one SHAP value per feature (21 in total).
     """
     try:
         payload = await req.json()
         if not isinstance(payload, dict):
             return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
+        # ---------- 1) Build features in EXACT training order ----------
         raw = build_raw_vector(payload)            # may contain NaNs
+        raw_imp = apply_imputer_if_any(raw)        # median / training imputer
+        z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scaler or manual z-score
+        # ---------- 2) Model prediction ----------
+        X_z = z_vec.reshape(1, -1).astype(np.float32)
+        raw_logits = model.predict(X_z, verbose=0)
+        probs, decode_mode = decode_logits(raw_logits)
         pred_idx = int(np.argmax(probs))
+        pred_class = CLASSES[pred_idx]
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
+        # ---------- 3) SHAP explanation for the predicted class ----------
+        shap_payload: Dict[str, Any]
+        if not SHAP_AVAILABLE:
+            # shap library not installed in this environment
+            shap_payload = {
+                "available": False,
+                "reason": "SHAP library not installed in this environment.",
+            }
+        else:
             try:
+                # Helper: probability function in *z-space*
+                def model_proba_from_z(z_batch_np: np.ndarray) -> np.ndarray:
+                    """
+                    Takes (N, n_features) in z-space and returns (N, K) probabilities.
+                    This mirrors the normal predict pipeline but assumes we're already in z-space.
+                    """
+                    raw_local = model.predict(z_batch_np, verbose=0)
+                    return decode_logits(raw_local)[0].reshape(-1, len(CLASSES))
+                # Scalar function: probability of the *predicted* class only
+                def f_scalar(z_batch):
+                    z_batch = np.array(z_batch, dtype=np.float32)
+                    probs_batch = model_proba_from_z(z_batch)   # (N, K)
+                    return probs_batch[:, pred_idx]             # (N,)
+                # Background: 50 "average" institutions at z=0
+                background_z = np.zeros((50, len(FEATURES)), dtype=np.float32)
+                # Create a per-call KernelExplainer for this scalar output
+                explainer = shap.KernelExplainer(f_scalar, background_z)
+                # SHAP for this *one* observation (in z-space)
+                shap_vals = explainer.shap_values(X_z, nsamples=50)
+                shap_arr = np.array(shap_vals)
+                # We expect shape (1, n_features) or (n_features,)
+                if shap_arr.ndim == 2 and shap_arr.shape[0] == 1:
+                    shap_vec = shap_arr[0]
                 else:
+                    shap_vec = shap_arr.reshape(-1)
+                if shap_vec.size != len(FEATURES):
+                    raise ValueError(
+                        f"Unexpected SHAP vector length {shap_vec.size} "
+                        f"(expected {len(FEATURES)})"
+                    )
+                shap_feature_contribs = {
+                    FEATURES[i]: float(shap_vec[i]) for i in range(len(FEATURES))
+                }
+                shap_payload = {
+                    "available": True,
+                    "class": pred_class,
+                    "values": shap_feature_contribs,
+                }
             except Exception as e:
                 shap_payload = {
                     "trace": traceback.format_exc(),
                 }
+        # ---------- 4) Final JSON response ----------
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
+            "z_scores": z_detail,           # per feature (model input)
+            "probabilities": probs_dict,    # state → probability
+            "predicted_state": pred_class,
+            "shap": shap_payload,           # explanation for predicted class only
             "debug": {
                 "raw_shape": list(raw_logits.shape),
+                "decode_mode": decode_mode,
                 "raw_first_row": [float(v) for v in raw_logits[0]],
             },
         }
     except Exception as e:
         return JSONResponse(
             status_code=500,
+            content={"error": str(e), "trace": traceback.format_exc()},
+        )