Spaces:

COCODEDE04
/

SF_FastAPI

Sleeping

App Files Files Community

COCODEDE04 commited on Nov 12, 2025

Commit

6363de7

verified ·

1 Parent(s): acbe7ed

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -143

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any, Dict
 import numpy as np
 import tensorflow as tf
@@ -11,120 +11,102 @@ from fastapi.middleware.cors import CORSMiddleware
 MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
 STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
 CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
 # ------------------------------------------
-# Debug & decoding control
-FORCE_CORAL = os.getenv("FORCE_CORAL", "0") in ("1", "true", "True", "YES", "yes")
-RETURN_DEBUG = os.getenv("RETURN_DEBUG", "1") in ("1", "true", "True", "YES", "yes")
 print("Loading model and stats...")
 model = tf.keras.models.load_model(MODEL_PATH, compile=False)
 with open(STATS_PATH, "r") as f:
     stats: Dict[str, Dict[str, float]] = json.load(f)
-# IMPORTANT: FEATURES order must match training!
-FEATURES = list(stats.keys())
-print("Feature order:", FEATURES)
-# ---------- robust numeric coercion ----------
 def coerce_float(val: Any) -> float:
-    """
-    Accepts numeric, or strings like:
-      "49.709,14"  -> 49709.14
-      "49,709.14"  -> 49709.14
-      "0,005"      -> 0.005
-      "  1 234 "   -> 1234
-    Returns float, or raises ValueError if impossible.
-    """
     if isinstance(val, (int, float)):
         return float(val)
     s = str(val).strip()
     if s == "":
         raise ValueError("empty")
     s = s.replace(" ", "")
     has_dot = "." in s
     has_comma = "," in s
     if has_dot and has_comma:
-        last_dot = s.rfind(".")
-        last_comma = s.rfind(",")
-        if last_comma > last_dot:
-            s = s.replace(".", "")
-            s = s.replace(",", ".")
         else:
             s = s.replace(",", "")
     elif has_comma and not has_dot:
         s = s.replace(",", ".")
-    # dots only or digits -> leave
     return float(s)
-def _z(val: Any, mean: float, sd: float) -> float:
-    try:
-        v = coerce_float(val)
-    except Exception:
-        return 0.0
-    if not sd:
         return 0.0
-    return (v - mean) / sd
-# ---------- CORAL utilities ----------
-def enforce_nonincreasing(sig_vec: np.ndarray) -> np.ndarray:
-    """
-    Given a 1D array of cumulative probs s (should be non-increasing for CORAL),
-    enforce s[0] >= s[1] >= ... >= s[K-1] using a simple PAV algorithm.
-    """
-    s = sig_vec.astype(float).copy()
-    n = len(s)
-    blocks = [[i] for i in range(n)]
-    vals = s.tolist()
-    i = 0
-    while i < len(vals) - 1:
-        if vals[i] < vals[i + 1]:  # violation: should be non-increasing
-            merged_idx = blocks[i] + blocks[i + 1]
-            avg = (
-                (vals[i] * len(blocks[i]) + vals[i + 1] * len(blocks[i + 1]))
-                / (len(blocks[i]) + len(blocks[i + 1]))
-            )
-            blocks[i] = merged_idx
-            vals[i] = avg
-            del blocks[i + 1]
-            del vals[i + 1]
-            if i > 0:
-                i -= 1
-        else:
-            i += 1
-    out = np.zeros(n, dtype=float)
-    for v, idxs in zip(vals, blocks):
-        for j in idxs:
-            out[j] = v
-    return np.clip(out, 1e-12, 1 - 1e-12)
-def coral_probs_from_logits_monotone(logits_np: np.ndarray) -> np.ndarray:
-    """
-    CORAL decoding with monotonicity enforcement so class probs are valid (sum=1, nonnegative).
-    """
-    sig = 1.0 / (1.0 + np.exp(-logits_np))  # sigmoid
-    sig_m = enforce_nonincreasing(sig[0])  # enforce order
-    left = np.concatenate([np.array([1.0], dtype=float), sig_m])
-    right = np.concatenate([sig_m, np.array([0.0], dtype=float)])
-    probs = np.clip(left - right, 1e-12, 1.0)
-    probs = probs / probs.sum()  # normalize
-    return probs
 # ------------- FastAPI app ----------------
-app = FastAPI(title="Static Fingerprint API", version="1.0.0")
-# Allow Excel / local tools to call the API
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -133,104 +115,76 @@ app.add_middleware(
     allow_headers=["*"],
 )
 @app.get("/")
 def root():
-    return {
-        "message": "Static Fingerprint API is running.",
-        "try": ["GET /health", "POST /predict"],
-    }
 @app.get("/health")
 def health():
     return {
         "status": "ok",
         "features": FEATURES,
         "classes": CLASSES,
         "model_file": MODEL_PATH,
         "stats_file": STATS_PATH,
     }
 @app.post("/echo")
 async def echo(req: Request):
     payload = await req.json()
     return {"received": payload}
 @app.post("/predict")
 async def predict(req: Request):
-    """
-    Body: a single JSON dict mapping feature -> numeric value.
-    """
     payload = await req.json()
     if not isinstance(payload, dict):
         return {"error": "Expected a JSON object mapping feature -> value."}
-    # --- Build z-scores in strict model order ---
-    z = []
-    z_detail = {}
-    missing = []
     for f in FEATURES:
-        mean = stats[f]["mean"]
-        sd = stats[f]["std"]
         if f in payload:
-            zf = _z(payload[f], mean, sd)
         else:
             missing.append(f)
-            zf = _z(0.0, mean, sd)
-        z.append(zf)
         z_detail[f] = zf
-    X = np.array([z], dtype=np.float32)
     raw = model.predict(X, verbose=0)
-    raw_shape = tuple(raw.shape)
-    # --- Decode ---
-    probs = None
-    decode_mode = "auto"
-    try:
-        if FORCE_CORAL:
-            decode_mode = "forced_coral_monotone"
-            probs = coral_probs_from_logits_monotone(raw)
-        else:
-            if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
-                decode_mode = "auto_coral_monotone"
-                probs = coral_probs_from_logits_monotone(raw)
-            else:
-                decode_mode = "auto_softmax_or_logits"
-                probs = raw[0]
-                s = float(np.sum(probs))
-                if s > 0:
-                    probs = probs / s
-    except Exception:
-        decode_mode = "fallback_raw_norm"
         probs = raw[0]
         s = float(np.sum(probs))
         if s > 0:
             probs = probs / s
     pred_idx = int(np.argmax(probs))
-    # --- Response ---
-    resp = {
         "input_ok": (len(missing) == 0),
         "missing": missing,
         "z_scores": z_detail,
         "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
         "predicted_state": CLASSES[pred_idx],
-    }
-    # --- Debug block ---
-    if RETURN_DEBUG:
-        resp["debug"] = {
-            "raw_shape": raw_shape,
             "decode_mode": decode_mode,
-            "raw_first_row": [
-                float(x)
-                for x in (raw[0].tolist() if raw.ndim >= 2 else [float(raw)])
-            ],
-        }
-    return resp

 import json
 import os
+from typing import Any, Dict, List
 import numpy as np
 import tensorflow as tf
 MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
 STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
 CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
+# IMPORTANT: Freeze the exact training order of features:
+FEATURES: List[str] = [
+    "autosuf_oper",
+    "cov_improductiva",
+    "ing_cartera_over_ing_total",
+    "gastos_oper_over_cart",
+    "prov_over_cartera",
+    "_margen_bruto",
+    "equity_over_assets",
+    "rend_cart_over_avg_cart",
+    "_assets",
+    "roa_pre_tax",
+    "cartera_vencida_ratio",
+    "gastos_oper_over_ing_oper",
+    "_cartera_bruta",
+    "grado_absorcion",
+    "_equity",
+    "gastos_fin_over_avg_cart",
+    "improductiva",
+    "roe_pre_tax",
+    "debt_to_equity",
+    "_liab",
+    "prov_gasto_over_cart",
+]
 # ------------------------------------------
 print("Loading model and stats...")
 model = tf.keras.models.load_model(MODEL_PATH, compile=False)
 with open(STATS_PATH, "r") as f:
     stats: Dict[str, Dict[str, float]] = json.load(f)
+# ---- Per-feature transforms used at training (make all 'higher = better') ----
+# If during dataset prep you flipped signs on some “bad” metrics, reflect it here.
+# This set is the typical choice for microfinance health where larger values are worse:
+NEGATE = {
+    "gastos_oper_over_cart",
+    "prov_over_cartera",
+    "cartera_vencida_ratio",
+    "gastos_oper_over_ing_oper",
+    "gastos_fin_over_avg_cart",
+    "improductiva",
+    "debt_to_equity",
+    "prov_gasto_over_cart",
+    # If your training actually negated coverage too (to align “higher=better”),
+    # include the next line. If not, comment it out.
+    # "cov_improductiva",
+}
 def coerce_float(val: Any) -> float:
+    """Coerce numbers from strings with either comma or dot decimal and thousands."""
     if isinstance(val, (int, float)):
         return float(val)
     s = str(val).strip()
     if s == "":
         raise ValueError("empty")
     s = s.replace(" ", "")
     has_dot = "." in s
     has_comma = "," in s
     if has_dot and has_comma:
+        # pick last as decimal
+        if s.rfind(",") > s.rfind("."):
+            s = s.replace(".", "").replace(",", ".")
         else:
             s = s.replace(",", "")
     elif has_comma and not has_dot:
         s = s.replace(",", ".")
     return float(s)
+def transform_feature(name: str, raw_val: Any) -> float:
+    v = coerce_float(raw_val)
+    if name in NEGATE:
+        return -v
+    return v
+def zscore(x: float, mean: float, std: float) -> float:
+    if not std:
         return 0.0
+    return (x - mean) / std
+def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
+    """(N, K-1) logits -> (N, K) probabilities (CORAL). Enforce monotonicity."""
+    logits = tf.convert_to_tensor(logits_np, dtype=tf.float32)  # (N, K-1)
+    sig = tf.math.sigmoid(logits)
+    # enforce monotone increasing cumulative (numerical guard)
+    sig_sorted = tf.sort(sig, axis=1)
+    left  = tf.concat([tf.ones_like(sig_sorted[:, :1]), sig_sorted], axis=1)
+    right = tf.concat([sig_sorted, tf.zeros_like(sig_sorted[:, :1])], axis=1)
+    probs = tf.clip_by_value(left - right, 1e-12, 1.0)
+    # re-normalize (safety)
+    probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
+    return probs.numpy()
 # ------------- FastAPI app ----------------
+app = FastAPI(title="Static Fingerprint API", version="1.1.0")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
 @app.get("/")
 def root():
+    return {"message": "Static Fingerprint API is running.", "try": ["GET /health", "POST /predict"]}
 @app.get("/health")
 def health():
+    # show the frozen order and which transforms are active
     return {
         "status": "ok",
         "features": FEATURES,
+        "negated_features": sorted(list(NEGATE)),
         "classes": CLASSES,
         "model_file": MODEL_PATH,
         "stats_file": STATS_PATH,
     }
 @app.post("/echo")
 async def echo(req: Request):
     payload = await req.json()
     return {"received": payload}
 @app.post("/predict")
 async def predict(req: Request):
     payload = await req.json()
     if not isinstance(payload, dict):
         return {"error": "Expected a JSON object mapping feature -> value."}
+    transformed: Dict[str, float] = {}
+    z_detail: Dict[str, float] = {}
+    missing: List[str] = []
+    z_row: List[float] = []
     for f in FEATURES:
+        mean = float(stats[f]["mean"])
+        std  = float(stats[f]["std"])
         if f in payload:
+            tv = transform_feature(f, payload[f])     # apply the same transform as training
         else:
             missing.append(f)
+            tv = transform_feature(f, 0.0)            # treat missing as 0 before transform
+        transformed[f] = tv
+        zf = zscore(tv, mean, std)
         z_detail[f] = zf
+        z_row.append(zf)
+    X = np.array([z_row], dtype=np.float32)
     raw = model.predict(X, verbose=0)
+    # Decode: CORAL (K-1) vs softmax (K)
+    if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
+        decode_mode = "auto_coral_monotone"
+        probs = coral_probs_from_logits(raw)[0]
+    else:
+        decode_mode = "softmax_or_logits_norm"
         probs = raw[0]
         s = float(np.sum(probs))
         if s > 0:
             probs = probs / s
     pred_idx = int(np.argmax(probs))
+    return {
         "input_ok": (len(missing) == 0),
         "missing": missing,
+        "transformed": transformed,      # post-transform, pre-z (should match training inputs)
         "z_scores": z_detail,
         "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
         "predicted_state": CLASSES[pred_idx],
+        "debug": {
+            "raw_shape": list(raw.shape),
             "decode_mode": decode_mode,
+            "raw_first_row": [float(x) for x in raw[0].tolist()],
+        },
+    }