Spaces:

COCODEDE04
/

SF_FastAPI

Sleeping

App Files Files Community

COCODEDE04 commited on Nov 17, 2025

Commit

8109a99

verified ·

1 Parent(s): c71e704

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -68

app.py CHANGED Viewed

@@ -362,95 +362,99 @@ async def predict(req: Request):
     Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
     Missing features are imputed if imputer present; else filled with means (if stats) or 0.
-    This endpoint ALSO computes SHAP values for the *predicted class only*,
-    returning one SHAP value per feature (21 in total) when SHAP is available.
     """
     try:
         payload = await req.json()
         if not isinstance(payload, dict):
-            return JSONResponse(status_code=400, content={"error": "Expected JSON object"})
-        # ---------- 1) Build features in EXACT training order ----------
-        raw = build_raw_vector(payload)            # may contain NaNs
-        raw_imp = apply_imputer_if_any(raw)        # median / training imputer
-        z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scaler or manual z-score
-        # ---------- 2) Model prediction ----------
-        X_z = z_vec.reshape(1, -1).astype(np.float32)
-        raw_logits = model.predict(X_z, verbose=0)
-        probs, decode_mode = decode_logits(raw_logits)
         pred_idx = int(np.argmax(probs))
-        pred_class = CLASSES[pred_idx]
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
-        # ---------- 3) SHAP explanation for the predicted class ----------
-        shap_payload: Dict[str, Any]
-        if not SHAP_AVAILABLE:
-            shap_payload = {
-                "available": False,
-                "reason": "SHAP library not installed in this environment.",
-            }
-        else:
             try:
-                # Scalar function: probability of the *predicted* class only
-                def f_scalar(z_batch):
-                    """
-                    z_batch: (N, D) or (D,)
-                    returns: (N,) probability of the predicted class
-                    """
-                    probs_batch = model_proba_from_z(z_batch)   # (N, K)
-                    return probs_batch[:, pred_idx]             # (N,)
-                # Background: 50 "average" institutions at z=0
-                background_z = np.zeros((50, len(FEATURES)), dtype=np.float32)
-                # KernelExplainer for a scalar-output model
-                explainer = shap.KernelExplainer(f_scalar, background_z)
-                # SHAP for this one observation (in z-space)
-                shap_vals = explainer.shap_values(X_z, nsamples=50)
-                # For scalar output, shap_vals is usually a 2D array (N, D),
-                # but some versions wrap it in a list. Handle both:
                 if isinstance(shap_vals, list):
-                    shap_mat = np.array(shap_vals[0])
                 else:
-                    shap_mat = np.array(shap_vals)
-                # Expect (1, n_features)
-                if shap_mat.ndim == 1:
-                    shap_mat = shap_mat.reshape(1, -1)
-                if shap_mat.shape[0] != 1:
-                    raise ValueError(f"Unexpected SHAP batch size {shap_mat.shape[0]} (expected 1)")
-                if shap_mat.shape[1] != len(FEATURES):
-                    raise ValueError(
-                        f"Unexpected SHAP vector length {shap_mat.shape[1]} "
-                        f"(expected {len(FEATURES)})"
                     )
-                shap_vec = shap_mat[0]  # (n_features,)
-                shap_feature_contribs = {
-                    FEATURES[i]: float(shap_vec[i]) for i in range(len(FEATURES))
-                }
-                shap_payload = {
                     "available": True,
-                    "class": pred_class,
-                    "values": shap_feature_contribs,
                 }
             except Exception as e:
-                shap_payload = {
                     "available": False,
                     "error": str(e),
                     "trace": traceback.format_exc(),
                 }
-        # ---------- 4) Final JSON response ----------
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
@@ -459,10 +463,10 @@ async def predict(req: Request):
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
-            "z_scores": z_detail,           # per feature (model input)
-            "probabilities": probs_dict,    # state → probability
-            "predicted_state": pred_class,
-            "shap": shap_payload,           # explanation for predicted class only
             "debug": {
                 "raw_shape": list(raw_logits.shape),
                 "decode_mode": decode_mode,

     Body: JSON object mapping feature -> numeric value (strings with commas/points ok).
     Missing features are imputed if imputer present; else filled with means (if stats) or 0.
+    Returns:
+      - probabilities over classes
+      - z-scores per indicator
+      - SHAP contributions for *all* classes (if SHAP is available), in z-space.
     """
     try:
         payload = await req.json()
         if not isinstance(payload, dict):
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Expected JSON object"},
+            )
+        # 1) Build raw feature vector in training order
+        raw = build_raw_vector(payload)                 # may contain NaNs
+        raw_imp = apply_imputer_if_any(raw)             # impute
+        z_vec, z_detail, z_mode = apply_scaling_or_stats(raw_imp)  # scale / z-score
+        # 2) Predict
+        X_z = z_vec.reshape(1, -1).astype(np.float32)   # (1, D) in z-space
+        raw_logits = model.predict(X_z, verbose=0)      # (1, M)
+        probs, decode_mode = decode_logits(raw_logits)  # (K,)
         pred_idx = int(np.argmax(probs))
         probs_dict = {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))}
         missing = [f for i, f in enumerate(FEATURES) if np.isnan(raw[i])]
+        # 3) SHAP for ALL classes (if explainer is available)
+        shap_block: Dict[str, Any] = {"available": False}
+        if EXPLAINER is not None and SHAP_AVAILABLE:
             try:
+                # KernelExplainer built with model_proba_from_z, so we pass z-space
+                shap_vals = EXPLAINER.shap_values(X_z, nsamples=50)
+                K = len(CLASSES)
+                D = len(FEATURES)
+                all_classes: Dict[str, Dict[str, float]] = {}
+                # Case 1: vector-output model → list of length K
                 if isinstance(shap_vals, list):
+                    if len(shap_vals) != K:
+                        raise ValueError(
+                            f"Expected {K} SHAP arrays (one per class), got {len(shap_vals)}"
+                        )
+                    for c_idx, cname in enumerate(CLASSES):
+                        arr = np.asarray(shap_vals[c_idx])
+                        if arr.ndim != 2 or arr.shape[0] < 1 or arr.shape[1] != D:
+                            raise ValueError(
+                                f"Unexpected SHAP shape for class {cname}: {arr.shape}, expected (1,{D})"
+                            )
+                        vec = arr[0]  # (D,)
+                        all_classes[cname] = {
+                            FEATURES[i]: float(vec[i]) for i in range(D)
+                        }
+                # Case 2: some SHAP versions return a single (K,D) array
+                elif isinstance(shap_vals, np.ndarray):
+                    arr = np.asarray(shap_vals)
+                    if arr.ndim == 3 and arr.shape[0] == 1 and arr.shape[2] == D:
+                        # shape (1, K, D) → take [0]
+                        arr = arr[0]
+                    if arr.ndim != 2 or arr.shape[0] != K or arr.shape[1] != D:
+                        raise ValueError(
+                            f"Unexpected SHAP ndarray shape {arr.shape}; "
+                            f"expected (K,{D}) or (1,K,{D})"
+                        )
+                    for c_idx, cname in enumerate(CLASSES):
+                        vec = arr[c_idx]  # (D,)
+                        all_classes[cname] = {
+                            FEATURES[i]: float(vec[i]) for i in range(D)
+                        }
                 else:
+                    raise TypeError(
+                        f"Unsupported SHAP output type: {type(shap_vals).__name__}"
                     )
+                shap_block = {
                     "available": True,
+                    "predicted_class": CLASSES[pred_idx],
+                    "all_classes": all_classes,
                 }
             except Exception as e:
+                shap_block = {
                     "available": False,
                     "error": str(e),
                     "trace": traceback.format_exc(),
                 }
+        # 4) Final response
         return {
             "input_ok": (len(missing) == 0),
             "missing": missing,
                 "scaler": bool(scaler),
                 "z_mode": z_mode,
             },
+            "z_scores": z_detail,           # per indicator, in z-space
+            "probabilities": probs_dict,
+            "predicted_state": CLASSES[pred_idx],
+            "shap": shap_block,
             "debug": {
                 "raw_shape": list(raw_logits.shape),
                 "decode_mode": decode_mode,