Spaces:

COCODEDE04
/

SF_FastAPI

Sleeping

App Files Files Community

COCODEDE04 commited on Nov 26, 2025

Commit

dfe0810

verified ·

1 Parent(s): 7894cc4

Update app.py

Browse files

Files changed (1) hide show

app.py +183 -41

app.py CHANGED Viewed

@@ -551,9 +551,15 @@ async def predict(req: Request):
 # ============================================================
 # CORAL ORDINAL HELPERS (from training script)
 # ============================================================
 def to_cumulative_targets_tf(y_true_int, K_):
     y = tf.reshape(y_true_int, [-1])
     y = tf.cast(y, tf.int32)
     thresholds = tf.range(1, K_, dtype=tf.int32)
@@ -562,30 +568,37 @@ def to_cumulative_targets_tf(y_true_int, K_):
 def coral_loss_tf(y_true, logits):
     y_true = tf.reshape(y_true, [-1])
     y_true = tf.cast(y_true, tf.int32)
-    T = to_cumulative_targets_tf(y_true, len(CLASSES))
     bce = tf.nn.sigmoid_cross_entropy_with_logits(labels=T, logits=logits)
     return tf.reduce_mean(tf.reduce_sum(bce, axis=1))
-# ---------- TF helper & numpy wrapper (unified version) ----------
 def _coral_probs_from_logits_tf(logits_tf: tf.Tensor) -> tf.Tensor:
-    """Pure TF CORAL probability transform."""
     sig = tf.math.sigmoid(logits_tf)
     left  = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
     right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
-    return tf.clip_by_value(left - right, 1e-12, 1.0)
-def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
-    """Numpy wrapper used by decode_logits + SHAP."""
-    logits_tf = tf.convert_to_tensor(logits_np, dtype=tf.float32)
-    return _coral_probs_from_logits_tf(logits_tf).numpy()
 @tf.function
 def ordinal_accuracy_metric(y_true, y_pred_logits):
     y_true = tf.reshape(y_true, [-1])
     y_true = tf.cast(y_true, tf.int32)
     probs  = _coral_probs_from_logits_tf(y_pred_logits)
@@ -593,11 +606,75 @@ def ordinal_accuracy_metric(y_true, y_pred_logits):
     return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))
 # ============================================================
 # RECREATE MODEL FROM BEST HYPERPARAMETERS
 # ============================================================
 def build_model_from_hparams(hp: dict):
     inputs = tf.keras.Input(shape=(len(FEATURES),))
     x = inputs
@@ -622,6 +699,7 @@ def build_model_from_hparams(hp: dict):
         if drop > 0:
             x = tf.keras.layers.Dropout(drop)(x)
     outputs = tf.keras.layers.Dense(len(CLASSES) - 1, activation=None)(x)
     model = tf.keras.Model(inputs, outputs)
@@ -637,37 +715,81 @@ def build_model_from_hparams(hp: dict):
 # RETRAINING LOGIC + DATASET MGMT
 # ============================================================
-FINGERPRINT_CSV = "fingerprints_db.csv"
-BEST_HP_JSON    = "best_params_and_metrics.json"
 def load_best_hparams():
     with open(BEST_HP_JSON, "r") as f:
         js = json.load(f)
     return js["best_hyperparams"]
 def load_fingerprint_dataset():
     df = pd.read_csv(FINGERPRINT_CSV)
-    y = df["rating"].map({c: i for i, c in enumerate(CLASSES)}).astype("int32").to_numpy()
     X_raw = df[FEATURES].to_numpy().astype("float32")
     imp = SimpleImputer(strategy="median")
     sc  = StandardScaler()
     X_imp = imp.fit_transform(X_raw)
-    X_sc  = sc.fit_transform(X_imp)
     return X_sc, y, imp, sc
 def retrain_model():
     hp = load_best_hparams()
     X, y, imp, sc = load_fingerprint_dataset()
     model_new = build_model_from_hparams(hp)
     es = tf.keras.callbacks.EarlyStopping(
         monitor="loss",
         patience=15,
@@ -683,13 +805,13 @@ def retrain_model():
         verbose=1,
     )
-    # Update global model + preprocessors
     global model, imputer, scaler
     model = model_new
     imputer = imp
     scaler  = sc
-    # Rebuild SHAP explainer to match new model
     global EXPLAINER
     if SHAP_AVAILABLE:
         try:
@@ -700,6 +822,7 @@ def retrain_model():
             EXPLAINER = None
             print("⚠️ Failed to rebuild SHAP explainer:", repr(e))
     return True
@@ -710,48 +833,67 @@ def retrain_model():
 @app.post("/append_and_retrain")
 def append_and_retrain(payload: dict):
     """
-    payload format:
     {
-        "company": "...",
-        "date": "2025-Q1",
-        "rating": "Mid",
         "features": {
-            "autosuf_oper": ...,
-            "improductiva": ...,
             ...
         }
     }
     """
-    company = payload.get("company")
-    date    = payload.get("date")
-    rating  = payload.get("rating")
-    feats   = payload.get("features", {})
-    if not company or not date or not rating:
-        return {"ok": False, "error": "Missing company/date/rating"}
     if set(feats.keys()) != set(FEATURES):
-        return {"ok": False, "error": "Features missing or incorrect"}
-    # Append row
-    new_row = {
-        "company": company,
-        "date": date,
-        "rating": rating,
-        **feats
     }
-    df_new = pd.DataFrame([new_row])
     if os.path.exists(FINGERPRINT_CSV):
         df = pd.read_csv(FINGERPRINT_CSV)
-        df = pd.concat([df, df_new], ignore_index=True)
     else:
-        df = df_new
     df.to_csv(FINGERPRINT_CSV, index=False)
-    # Retrain model
     retrain_model()
-    return {"ok": True, "message": "Fingerprint appended + model retrained"}

 # ============================================================
 # CORAL ORDINAL HELPERS (from training script)
+# (we do NOT redefine coral_probs_from_logits here to avoid
+#  clashing with the one already used by decode_logits)
 # ============================================================
 def to_cumulative_targets_tf(y_true_int, K_):
+    """
+    y_true_int: (N,) integer targets 0..K-1
+    returns (N, K_-1) with t_k = 1[y >= k],  k = 1..K-1
+    """
     y = tf.reshape(y_true_int, [-1])
     y = tf.cast(y, tf.int32)
     thresholds = tf.range(1, K_, dtype=tf.int32)
 def coral_loss_tf(y_true, logits):
+    """
+    CORAL ordinal loss implemented in TF:
+    y_true: (N,) or (N,1) with integer labels 0..K-1
+    logits: (N, K-1)
+    """
     y_true = tf.reshape(y_true, [-1])
     y_true = tf.cast(y_true, tf.int32)
+    T = to_cumulative_targets_tf(y_true, len(CLASSES))      # (N, K-1)
     bce = tf.nn.sigmoid_cross_entropy_with_logits(labels=T, logits=logits)
     return tf.reduce_mean(tf.reduce_sum(bce, axis=1))
+# ---------- TF helper (pure TF CORAL probs) ----------
 def _coral_probs_from_logits_tf(logits_tf: tf.Tensor) -> tf.Tensor:
+    """
+    Pure-TF version of CORAL probability transform, used in metric.
+    logits_tf: (N, K-1)
+    returns (N, K) probabilities
+    """
     sig = tf.math.sigmoid(logits_tf)
     left  = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
     right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
+    probs = tf.clip_by_value(left - right, 1e-12, 1.0)
+    return probs
 @tf.function
 def ordinal_accuracy_metric(y_true, y_pred_logits):
+    """
+    Exact class accuracy for CORAL outputs (same idea as training script).
+    """
     y_true = tf.reshape(y_true, [-1])
     y_true = tf.cast(y_true, tf.int32)
     probs  = _coral_probs_from_logits_tf(y_pred_logits)
     return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))
+# ============================================================
+# IMPORTS FOR RETRAINING / DATA MGMT
+# (Ok to import here; Python allows imports anywhere in file)
+# ============================================================
+import pandas as pd
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import StandardScaler
+# ============================================================
+# LETTER → 5-CLASS GROUP MAPPING (same logic as training code)
+# ============================================================
+def letter_to_group(letter: str):
+    """
+    Converts raw rating letters (AAA, A-, BBB+, BB-, etc.)
+    into the 5 ordinal groups used by the model:
+      Top, Mid-Top, Mid, Mid-Low, Low
+    """
+    if letter is None:
+        return None
+    s = str(letter).strip().upper()
+    if s == "":
+        return None
+    # Normalise duals like "AA / AA+" by taking the stronger one
+    s_clean = s.replace(" ", "")
+    if "/" in s_clean:
+        order = [
+            "E","D","C-","C","C+",
+            "B-","B","B+","BB-","BB","BB+",
+            "BBB-","BBB","BBB+",
+            "A-","A","A+",
+            "AA-","AA","AA+",
+            "AAA-","AAA"
+        ]
+        parts = [p for p in s_clean.split("/") if p]
+        idxs  = [order.index(p) for p in parts if p in order]
+        if idxs:
+            s = order[max(idxs)]  # stronger (higher index)
+        else:
+            s = parts[0]
+    # Group boundaries (as in your training script)
+    g1 = {"AAA","AAA-","AA+","AA"}                       # Top
+    g2 = {"AA-","A+","A","A-"}                           # Mid-Top
+    g3 = {"BBB+","BBB","BBB-","BB+"}                     # Mid
+    g4 = {"BB","BB-","B+","B","B-"}                      # Mid-Low
+    g5 = {"C+","C","C-","D","E"}                         # Low
+    if s in g1: return "Top"
+    if s in g2: return "Mid-Top"
+    if s in g3: return "Mid"
+    if s in g4: return "Mid-Low"
+    if s in g5: return "Low"
+    return None
 # ============================================================
 # RECREATE MODEL FROM BEST HYPERPARAMETERS
 # ============================================================
 def build_model_from_hparams(hp: dict):
+    """
+    Rebuilds the CORAL DNN with the same structure & hyperparameters
+    as in your training script.
+    """
     inputs = tf.keras.Input(shape=(len(FEATURES),))
     x = inputs
         if drop > 0:
             x = tf.keras.layers.Dropout(drop)(x)
+    # CORAL output: K-1 logits (K = len(CLASSES))
     outputs = tf.keras.layers.Dense(len(CLASSES) - 1, activation=None)(x)
     model = tf.keras.Model(inputs, outputs)
 # RETRAINING LOGIC + DATASET MGMT
 # ============================================================
+FINGERPRINT_CSV = "fingerprints_db.csv"          # master DB file
+BEST_HP_JSON    = "best_params_and_metrics.json" # hyperparams JSON
 def load_best_hparams():
+    """
+    Loads best hyperparameters from your tuning JSON.
+    Expects JSON to contain key "best_hyperparams".
+    """
     with open(BEST_HP_JSON, "r") as f:
         js = json.load(f)
     return js["best_hyperparams"]
 def load_fingerprint_dataset():
+    """
+    Loads the full fingerprint DB from FINGERPRINT_CSV.
+    Expected columns (at minimum):
+      - QTR
+      - COMPANY
+      - Supervisor
+      - RATING_RAW
+      - 21 ratio features named exactly as in FEATURES
+      - rating_score (can be ignored for training)
+    We:
+      - derive RATING_GROUP (Top/Mid-Top/...) from RATING_RAW if missing
+      - drop rows with RATING_GROUP = NaN
+      - impute missing feature values with median
+      - scale with StandardScaler
+    """
     df = pd.read_csv(FINGERPRINT_CSV)
+    # Derive 5-class group if not already present
+    if "RATING_GROUP" not in df.columns:
+        df["RATING_GROUP"] = df["RATING_RAW"].apply(letter_to_group)
+    df = df[df["RATING_GROUP"].notna()].copy()
+    # y labels 0..4
+    class_to_id = {c: i for i, c in enumerate(CLASSES)}
+    y = df["RATING_GROUP"].map(class_to_id).astype("int32").to_numpy()
+    # X features
     X_raw = df[FEATURES].to_numpy().astype("float32")
+    # Fit fresh imputer + scaler on full dataset
     imp = SimpleImputer(strategy="median")
     sc  = StandardScaler()
     X_imp = imp.fit_transform(X_raw)
+    X_sc  = sc.fit_transform(X_imp).astype("float32")
     return X_sc, y, imp, sc
 def retrain_model():
+    """
+    Retrains the model on the current fingerprints_db.csv
+    using the fixed best hyperparameters.
+    - Rebuilds the model
+    - Fits on full (X_sc, y)
+    - Updates global model/imputer/scaler
+    - Rebuilds SHAP explainer to stay in sync
+    """
+    print(">>> RETRAIN: loading dataset")
     hp = load_best_hparams()
     X, y, imp, sc = load_fingerprint_dataset()
+    print(">>> RETRAIN: building model from best hparams")
     model_new = build_model_from_hparams(hp)
+    print(">>> RETRAIN: fitting on fingerprint DB")
     es = tf.keras.callbacks.EarlyStopping(
         monitor="loss",
         patience=15,
         verbose=1,
     )
+    # Update global model + preprocessors used by /predict
     global model, imputer, scaler
     model = model_new
     imputer = imp
     scaler  = sc
+    # Rebuild SHAP explainer so explanations match new model
     global EXPLAINER
     if SHAP_AVAILABLE:
         try:
             EXPLAINER = None
             print("⚠️ Failed to rebuild SHAP explainer:", repr(e))
+    print(">>> RETRAIN COMPLETE")
     return True
 @app.post("/append_and_retrain")
 def append_and_retrain(payload: dict):
     """
+    Appends a new fingerprint row to fingerprints_db.csv
+    and retrains the model.
+    Expected payload:
     {
+        "qtr": "2014Q4",
+        "company": "COAC Ambato Ltda",
+        "supervisor": "SEPS",
+        "rating_raw": "B",
         "features": {
+            "autosuf_oper": 0.536154555,
+            "improductiva": null,
+            "gastos_fin_over_avg_cart": 1.200803646,
+            "_equity": ...,
             ...
+            "roa_pre_tax": 1.580296249
         }
     }
+    - rating_raw is the letter rating (AAA, A-, BBB+, BB-, ...)
+    - we derive RATING_GROUP (Top / Mid-Top / Mid / Mid-Low / Low)
+      using the same logic as in the training script.
     """
+    qtr        = payload.get("qtr")
+    company    = payload.get("company")
+    supervisor = payload.get("supervisor")
+    rating_raw = payload.get("rating_raw")
+    feats      = payload.get("features", {})
+    if not qtr or not company or not rating_raw:
+        return {"ok": False, "error": "Missing qtr/company/rating_raw"}
     if set(feats.keys()) != set(FEATURES):
+        return {"ok": False, "error": "features must contain all 21 ratio names"}
+    rating_group = letter_to_group(rating_raw)
+    if rating_group is None:
+        return {"ok": False, "error": f"Cannot map rating_raw '{rating_raw}' to 5-class group"}
+    # Build new row matching your CSV schema
+    row = {
+        "QTR": qtr,
+        "COMPANY": company,
+        "Supervisor": supervisor,
+        "RATING_RAW": rating_raw,
+        "RATING_GROUP": rating_group,
+        **feats,
+        "rating_score": None  # optional, can be filled later
     }
+    # Append row to CSV
     if os.path.exists(FINGERPRINT_CSV):
         df = pd.read_csv(FINGERPRINT_CSV)
+        df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
     else:
+        df = pd.DataFrame([row])
     df.to_csv(FINGERPRINT_CSV, index=False)
+    # Retrain model on full updated DB
     retrain_model()
+    return {"ok": True, "message": "Fingerprint appended and model retrained"}