Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -551,9 +551,15 @@ async def predict(req: Request):
|
|
| 551 |
|
| 552 |
# ============================================================
|
| 553 |
# CORAL ORDINAL HELPERS (from training script)
|
|
|
|
|
|
|
| 554 |
# ============================================================
|
| 555 |
|
| 556 |
def to_cumulative_targets_tf(y_true_int, K_):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
y = tf.reshape(y_true_int, [-1])
|
| 558 |
y = tf.cast(y, tf.int32)
|
| 559 |
thresholds = tf.range(1, K_, dtype=tf.int32)
|
|
@@ -562,30 +568,37 @@ def to_cumulative_targets_tf(y_true_int, K_):
|
|
| 562 |
|
| 563 |
|
| 564 |
def coral_loss_tf(y_true, logits):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
y_true = tf.reshape(y_true, [-1])
|
| 566 |
y_true = tf.cast(y_true, tf.int32)
|
| 567 |
-
T = to_cumulative_targets_tf(y_true, len(CLASSES))
|
| 568 |
bce = tf.nn.sigmoid_cross_entropy_with_logits(labels=T, logits=logits)
|
| 569 |
return tf.reduce_mean(tf.reduce_sum(bce, axis=1))
|
| 570 |
|
| 571 |
|
| 572 |
-
# ---------- TF helper
|
| 573 |
def _coral_probs_from_logits_tf(logits_tf: tf.Tensor) -> tf.Tensor:
|
| 574 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
sig = tf.math.sigmoid(logits_tf)
|
| 576 |
left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
|
| 577 |
right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
|
| 582 |
-
"""Numpy wrapper used by decode_logits + SHAP."""
|
| 583 |
-
logits_tf = tf.convert_to_tensor(logits_np, dtype=tf.float32)
|
| 584 |
-
return _coral_probs_from_logits_tf(logits_tf).numpy()
|
| 585 |
|
| 586 |
|
| 587 |
@tf.function
|
| 588 |
def ordinal_accuracy_metric(y_true, y_pred_logits):
|
|
|
|
|
|
|
|
|
|
| 589 |
y_true = tf.reshape(y_true, [-1])
|
| 590 |
y_true = tf.cast(y_true, tf.int32)
|
| 591 |
probs = _coral_probs_from_logits_tf(y_pred_logits)
|
|
@@ -593,11 +606,75 @@ def ordinal_accuracy_metric(y_true, y_pred_logits):
|
|
| 593 |
return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))
|
| 594 |
|
| 595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
# ============================================================
|
| 597 |
# RECREATE MODEL FROM BEST HYPERPARAMETERS
|
| 598 |
# ============================================================
|
| 599 |
|
| 600 |
def build_model_from_hparams(hp: dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
inputs = tf.keras.Input(shape=(len(FEATURES),))
|
| 602 |
x = inputs
|
| 603 |
|
|
@@ -622,6 +699,7 @@ def build_model_from_hparams(hp: dict):
|
|
| 622 |
if drop > 0:
|
| 623 |
x = tf.keras.layers.Dropout(drop)(x)
|
| 624 |
|
|
|
|
| 625 |
outputs = tf.keras.layers.Dense(len(CLASSES) - 1, activation=None)(x)
|
| 626 |
|
| 627 |
model = tf.keras.Model(inputs, outputs)
|
|
@@ -637,37 +715,81 @@ def build_model_from_hparams(hp: dict):
|
|
| 637 |
# RETRAINING LOGIC + DATASET MGMT
|
| 638 |
# ============================================================
|
| 639 |
|
| 640 |
-
FINGERPRINT_CSV = "fingerprints_db.csv"
|
| 641 |
-
BEST_HP_JSON = "best_params_and_metrics.json"
|
| 642 |
|
| 643 |
|
| 644 |
def load_best_hparams():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
with open(BEST_HP_JSON, "r") as f:
|
| 646 |
js = json.load(f)
|
| 647 |
return js["best_hyperparams"]
|
| 648 |
|
| 649 |
|
| 650 |
def load_fingerprint_dataset():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
df = pd.read_csv(FINGERPRINT_CSV)
|
| 652 |
|
| 653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
X_raw = df[FEATURES].to_numpy().astype("float32")
|
| 655 |
|
|
|
|
| 656 |
imp = SimpleImputer(strategy="median")
|
| 657 |
sc = StandardScaler()
|
| 658 |
|
| 659 |
X_imp = imp.fit_transform(X_raw)
|
| 660 |
-
X_sc = sc.fit_transform(X_imp)
|
| 661 |
|
| 662 |
return X_sc, y, imp, sc
|
| 663 |
|
| 664 |
|
| 665 |
def retrain_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
hp = load_best_hparams()
|
| 667 |
X, y, imp, sc = load_fingerprint_dataset()
|
| 668 |
|
|
|
|
| 669 |
model_new = build_model_from_hparams(hp)
|
| 670 |
|
|
|
|
| 671 |
es = tf.keras.callbacks.EarlyStopping(
|
| 672 |
monitor="loss",
|
| 673 |
patience=15,
|
|
@@ -683,13 +805,13 @@ def retrain_model():
|
|
| 683 |
verbose=1,
|
| 684 |
)
|
| 685 |
|
| 686 |
-
# Update global model + preprocessors
|
| 687 |
global model, imputer, scaler
|
| 688 |
model = model_new
|
| 689 |
imputer = imp
|
| 690 |
scaler = sc
|
| 691 |
|
| 692 |
-
# Rebuild SHAP explainer
|
| 693 |
global EXPLAINER
|
| 694 |
if SHAP_AVAILABLE:
|
| 695 |
try:
|
|
@@ -700,6 +822,7 @@ def retrain_model():
|
|
| 700 |
EXPLAINER = None
|
| 701 |
print("⚠️ Failed to rebuild SHAP explainer:", repr(e))
|
| 702 |
|
|
|
|
| 703 |
return True
|
| 704 |
|
| 705 |
|
|
@@ -710,48 +833,67 @@ def retrain_model():
|
|
| 710 |
@app.post("/append_and_retrain")
|
| 711 |
def append_and_retrain(payload: dict):
|
| 712 |
"""
|
| 713 |
-
|
|
|
|
|
|
|
|
|
|
| 714 |
{
|
| 715 |
-
"
|
| 716 |
-
"
|
| 717 |
-
"
|
|
|
|
| 718 |
"features": {
|
| 719 |
-
"autosuf_oper":
|
| 720 |
-
"improductiva":
|
|
|
|
|
|
|
| 721 |
...
|
|
|
|
| 722 |
}
|
| 723 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
"""
|
| 725 |
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
|
|
|
| 730 |
|
| 731 |
-
if not
|
| 732 |
-
return {"ok": False, "error": "Missing company/
|
| 733 |
|
| 734 |
if set(feats.keys()) != set(FEATURES):
|
| 735 |
-
return {"ok": False, "error": "
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
"
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
}
|
| 744 |
-
df_new = pd.DataFrame([new_row])
|
| 745 |
|
|
|
|
| 746 |
if os.path.exists(FINGERPRINT_CSV):
|
| 747 |
df = pd.read_csv(FINGERPRINT_CSV)
|
| 748 |
-
df = pd.concat([df,
|
| 749 |
else:
|
| 750 |
-
df =
|
| 751 |
|
| 752 |
df.to_csv(FINGERPRINT_CSV, index=False)
|
| 753 |
|
| 754 |
-
# Retrain model
|
| 755 |
retrain_model()
|
| 756 |
|
| 757 |
-
return {"ok": True, "message": "Fingerprint appended
|
|
|
|
| 551 |
|
| 552 |
# ============================================================
|
| 553 |
# CORAL ORDINAL HELPERS (from training script)
|
| 554 |
+
# (we do NOT redefine coral_probs_from_logits here to avoid
|
| 555 |
+
# clashing with the one already used by decode_logits)
|
| 556 |
# ============================================================
|
| 557 |
|
| 558 |
def to_cumulative_targets_tf(y_true_int, K_):
|
| 559 |
+
"""
|
| 560 |
+
y_true_int: (N,) integer targets 0..K-1
|
| 561 |
+
returns (N, K_-1) with t_k = 1[y >= k], k = 1..K-1
|
| 562 |
+
"""
|
| 563 |
y = tf.reshape(y_true_int, [-1])
|
| 564 |
y = tf.cast(y, tf.int32)
|
| 565 |
thresholds = tf.range(1, K_, dtype=tf.int32)
|
|
|
|
| 568 |
|
| 569 |
|
| 570 |
def coral_loss_tf(y_true, logits):
|
| 571 |
+
"""
|
| 572 |
+
CORAL ordinal loss implemented in TF:
|
| 573 |
+
y_true: (N,) or (N,1) with integer labels 0..K-1
|
| 574 |
+
logits: (N, K-1)
|
| 575 |
+
"""
|
| 576 |
y_true = tf.reshape(y_true, [-1])
|
| 577 |
y_true = tf.cast(y_true, tf.int32)
|
| 578 |
+
T = to_cumulative_targets_tf(y_true, len(CLASSES)) # (N, K-1)
|
| 579 |
bce = tf.nn.sigmoid_cross_entropy_with_logits(labels=T, logits=logits)
|
| 580 |
return tf.reduce_mean(tf.reduce_sum(bce, axis=1))
|
| 581 |
|
| 582 |
|
| 583 |
+
# ---------- TF helper (pure TF CORAL probs) ----------
|
| 584 |
def _coral_probs_from_logits_tf(logits_tf: tf.Tensor) -> tf.Tensor:
|
| 585 |
+
"""
|
| 586 |
+
Pure-TF version of CORAL probability transform, used in metric.
|
| 587 |
+
logits_tf: (N, K-1)
|
| 588 |
+
returns (N, K) probabilities
|
| 589 |
+
"""
|
| 590 |
sig = tf.math.sigmoid(logits_tf)
|
| 591 |
left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
|
| 592 |
right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
|
| 593 |
+
probs = tf.clip_by_value(left - right, 1e-12, 1.0)
|
| 594 |
+
return probs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
|
| 596 |
|
| 597 |
@tf.function
|
| 598 |
def ordinal_accuracy_metric(y_true, y_pred_logits):
|
| 599 |
+
"""
|
| 600 |
+
Exact class accuracy for CORAL outputs (same idea as training script).
|
| 601 |
+
"""
|
| 602 |
y_true = tf.reshape(y_true, [-1])
|
| 603 |
y_true = tf.cast(y_true, tf.int32)
|
| 604 |
probs = _coral_probs_from_logits_tf(y_pred_logits)
|
|
|
|
| 606 |
return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32))
|
| 607 |
|
| 608 |
|
| 609 |
+
# ============================================================
|
| 610 |
+
# IMPORTS FOR RETRAINING / DATA MGMT
|
| 611 |
+
# (Ok to import here; Python allows imports anywhere in file)
|
| 612 |
+
# ============================================================
|
| 613 |
+
|
| 614 |
+
import pandas as pd
|
| 615 |
+
from sklearn.impute import SimpleImputer
|
| 616 |
+
from sklearn.preprocessing import StandardScaler
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
# ============================================================
|
| 620 |
+
# LETTER → 5-CLASS GROUP MAPPING (same logic as training code)
|
| 621 |
+
# ============================================================
|
| 622 |
+
|
| 623 |
+
def letter_to_group(letter: str):
|
| 624 |
+
"""
|
| 625 |
+
Converts raw rating letters (AAA, A-, BBB+, BB-, etc.)
|
| 626 |
+
into the 5 ordinal groups used by the model:
|
| 627 |
+
Top, Mid-Top, Mid, Mid-Low, Low
|
| 628 |
+
"""
|
| 629 |
+
if letter is None:
|
| 630 |
+
return None
|
| 631 |
+
|
| 632 |
+
s = str(letter).strip().upper()
|
| 633 |
+
if s == "":
|
| 634 |
+
return None
|
| 635 |
+
|
| 636 |
+
# Normalise duals like "AA / AA+" by taking the stronger one
|
| 637 |
+
s_clean = s.replace(" ", "")
|
| 638 |
+
if "/" in s_clean:
|
| 639 |
+
order = [
|
| 640 |
+
"E","D","C-","C","C+",
|
| 641 |
+
"B-","B","B+","BB-","BB","BB+",
|
| 642 |
+
"BBB-","BBB","BBB+",
|
| 643 |
+
"A-","A","A+",
|
| 644 |
+
"AA-","AA","AA+",
|
| 645 |
+
"AAA-","AAA"
|
| 646 |
+
]
|
| 647 |
+
parts = [p for p in s_clean.split("/") if p]
|
| 648 |
+
idxs = [order.index(p) for p in parts if p in order]
|
| 649 |
+
if idxs:
|
| 650 |
+
s = order[max(idxs)] # stronger (higher index)
|
| 651 |
+
else:
|
| 652 |
+
s = parts[0]
|
| 653 |
+
|
| 654 |
+
# Group boundaries (as in your training script)
|
| 655 |
+
g1 = {"AAA","AAA-","AA+","AA"} # Top
|
| 656 |
+
g2 = {"AA-","A+","A","A-"} # Mid-Top
|
| 657 |
+
g3 = {"BBB+","BBB","BBB-","BB+"} # Mid
|
| 658 |
+
g4 = {"BB","BB-","B+","B","B-"} # Mid-Low
|
| 659 |
+
g5 = {"C+","C","C-","D","E"} # Low
|
| 660 |
+
|
| 661 |
+
if s in g1: return "Top"
|
| 662 |
+
if s in g2: return "Mid-Top"
|
| 663 |
+
if s in g3: return "Mid"
|
| 664 |
+
if s in g4: return "Mid-Low"
|
| 665 |
+
if s in g5: return "Low"
|
| 666 |
+
return None
|
| 667 |
+
|
| 668 |
+
|
| 669 |
# ============================================================
|
| 670 |
# RECREATE MODEL FROM BEST HYPERPARAMETERS
|
| 671 |
# ============================================================
|
| 672 |
|
| 673 |
def build_model_from_hparams(hp: dict):
|
| 674 |
+
"""
|
| 675 |
+
Rebuilds the CORAL DNN with the same structure & hyperparameters
|
| 676 |
+
as in your training script.
|
| 677 |
+
"""
|
| 678 |
inputs = tf.keras.Input(shape=(len(FEATURES),))
|
| 679 |
x = inputs
|
| 680 |
|
|
|
|
| 699 |
if drop > 0:
|
| 700 |
x = tf.keras.layers.Dropout(drop)(x)
|
| 701 |
|
| 702 |
+
# CORAL output: K-1 logits (K = len(CLASSES))
|
| 703 |
outputs = tf.keras.layers.Dense(len(CLASSES) - 1, activation=None)(x)
|
| 704 |
|
| 705 |
model = tf.keras.Model(inputs, outputs)
|
|
|
|
| 715 |
# RETRAINING LOGIC + DATASET MGMT
|
| 716 |
# ============================================================
|
| 717 |
|
| 718 |
+
FINGERPRINT_CSV = "fingerprints_db.csv" # master DB file
|
| 719 |
+
BEST_HP_JSON = "best_params_and_metrics.json" # hyperparams JSON
|
| 720 |
|
| 721 |
|
| 722 |
def load_best_hparams():
|
| 723 |
+
"""
|
| 724 |
+
Loads best hyperparameters from your tuning JSON.
|
| 725 |
+
Expects JSON to contain key "best_hyperparams".
|
| 726 |
+
"""
|
| 727 |
with open(BEST_HP_JSON, "r") as f:
|
| 728 |
js = json.load(f)
|
| 729 |
return js["best_hyperparams"]
|
| 730 |
|
| 731 |
|
| 732 |
def load_fingerprint_dataset():
|
| 733 |
+
"""
|
| 734 |
+
Loads the full fingerprint DB from FINGERPRINT_CSV.
|
| 735 |
+
|
| 736 |
+
Expected columns (at minimum):
|
| 737 |
+
- QTR
|
| 738 |
+
- COMPANY
|
| 739 |
+
- Supervisor
|
| 740 |
+
- RATING_RAW
|
| 741 |
+
- 21 ratio features named exactly as in FEATURES
|
| 742 |
+
- rating_score (can be ignored for training)
|
| 743 |
+
|
| 744 |
+
We:
|
| 745 |
+
- derive RATING_GROUP (Top/Mid-Top/...) from RATING_RAW if missing
|
| 746 |
+
- drop rows with RATING_GROUP = NaN
|
| 747 |
+
- impute missing feature values with median
|
| 748 |
+
- scale with StandardScaler
|
| 749 |
+
"""
|
| 750 |
df = pd.read_csv(FINGERPRINT_CSV)
|
| 751 |
|
| 752 |
+
# Derive 5-class group if not already present
|
| 753 |
+
if "RATING_GROUP" not in df.columns:
|
| 754 |
+
df["RATING_GROUP"] = df["RATING_RAW"].apply(letter_to_group)
|
| 755 |
+
|
| 756 |
+
df = df[df["RATING_GROUP"].notna()].copy()
|
| 757 |
+
|
| 758 |
+
# y labels 0..4
|
| 759 |
+
class_to_id = {c: i for i, c in enumerate(CLASSES)}
|
| 760 |
+
y = df["RATING_GROUP"].map(class_to_id).astype("int32").to_numpy()
|
| 761 |
+
|
| 762 |
+
# X features
|
| 763 |
X_raw = df[FEATURES].to_numpy().astype("float32")
|
| 764 |
|
| 765 |
+
# Fit fresh imputer + scaler on full dataset
|
| 766 |
imp = SimpleImputer(strategy="median")
|
| 767 |
sc = StandardScaler()
|
| 768 |
|
| 769 |
X_imp = imp.fit_transform(X_raw)
|
| 770 |
+
X_sc = sc.fit_transform(X_imp).astype("float32")
|
| 771 |
|
| 772 |
return X_sc, y, imp, sc
|
| 773 |
|
| 774 |
|
| 775 |
def retrain_model():
|
| 776 |
+
"""
|
| 777 |
+
Retrains the model on the current fingerprints_db.csv
|
| 778 |
+
using the fixed best hyperparameters.
|
| 779 |
+
|
| 780 |
+
- Rebuilds the model
|
| 781 |
+
- Fits on full (X_sc, y)
|
| 782 |
+
- Updates global model/imputer/scaler
|
| 783 |
+
- Rebuilds SHAP explainer to stay in sync
|
| 784 |
+
"""
|
| 785 |
+
print(">>> RETRAIN: loading dataset")
|
| 786 |
hp = load_best_hparams()
|
| 787 |
X, y, imp, sc = load_fingerprint_dataset()
|
| 788 |
|
| 789 |
+
print(">>> RETRAIN: building model from best hparams")
|
| 790 |
model_new = build_model_from_hparams(hp)
|
| 791 |
|
| 792 |
+
print(">>> RETRAIN: fitting on fingerprint DB")
|
| 793 |
es = tf.keras.callbacks.EarlyStopping(
|
| 794 |
monitor="loss",
|
| 795 |
patience=15,
|
|
|
|
| 805 |
verbose=1,
|
| 806 |
)
|
| 807 |
|
| 808 |
+
# Update global model + preprocessors used by /predict
|
| 809 |
global model, imputer, scaler
|
| 810 |
model = model_new
|
| 811 |
imputer = imp
|
| 812 |
scaler = sc
|
| 813 |
|
| 814 |
+
# Rebuild SHAP explainer so explanations match new model
|
| 815 |
global EXPLAINER
|
| 816 |
if SHAP_AVAILABLE:
|
| 817 |
try:
|
|
|
|
| 822 |
EXPLAINER = None
|
| 823 |
print("⚠️ Failed to rebuild SHAP explainer:", repr(e))
|
| 824 |
|
| 825 |
+
print(">>> RETRAIN COMPLETE")
|
| 826 |
return True
|
| 827 |
|
| 828 |
|
|
|
|
| 833 |
@app.post("/append_and_retrain")
|
| 834 |
def append_and_retrain(payload: dict):
|
| 835 |
"""
|
| 836 |
+
Appends a new fingerprint row to fingerprints_db.csv
|
| 837 |
+
and retrains the model.
|
| 838 |
+
|
| 839 |
+
Expected payload:
|
| 840 |
{
|
| 841 |
+
"qtr": "2014Q4",
|
| 842 |
+
"company": "COAC Ambato Ltda",
|
| 843 |
+
"supervisor": "SEPS",
|
| 844 |
+
"rating_raw": "B",
|
| 845 |
"features": {
|
| 846 |
+
"autosuf_oper": 0.536154555,
|
| 847 |
+
"improductiva": null,
|
| 848 |
+
"gastos_fin_over_avg_cart": 1.200803646,
|
| 849 |
+
"_equity": ...,
|
| 850 |
...
|
| 851 |
+
"roa_pre_tax": 1.580296249
|
| 852 |
}
|
| 853 |
}
|
| 854 |
+
|
| 855 |
+
- rating_raw is the letter rating (AAA, A-, BBB+, BB-, ...)
|
| 856 |
+
- we derive RATING_GROUP (Top / Mid-Top / Mid / Mid-Low / Low)
|
| 857 |
+
using the same logic as in the training script.
|
| 858 |
"""
|
| 859 |
|
| 860 |
+
qtr = payload.get("qtr")
|
| 861 |
+
company = payload.get("company")
|
| 862 |
+
supervisor = payload.get("supervisor")
|
| 863 |
+
rating_raw = payload.get("rating_raw")
|
| 864 |
+
feats = payload.get("features", {})
|
| 865 |
|
| 866 |
+
if not qtr or not company or not rating_raw:
|
| 867 |
+
return {"ok": False, "error": "Missing qtr/company/rating_raw"}
|
| 868 |
|
| 869 |
if set(feats.keys()) != set(FEATURES):
|
| 870 |
+
return {"ok": False, "error": "features must contain all 21 ratio names"}
|
| 871 |
+
|
| 872 |
+
rating_group = letter_to_group(rating_raw)
|
| 873 |
+
if rating_group is None:
|
| 874 |
+
return {"ok": False, "error": f"Cannot map rating_raw '{rating_raw}' to 5-class group"}
|
| 875 |
+
|
| 876 |
+
# Build new row matching your CSV schema
|
| 877 |
+
row = {
|
| 878 |
+
"QTR": qtr,
|
| 879 |
+
"COMPANY": company,
|
| 880 |
+
"Supervisor": supervisor,
|
| 881 |
+
"RATING_RAW": rating_raw,
|
| 882 |
+
"RATING_GROUP": rating_group,
|
| 883 |
+
**feats,
|
| 884 |
+
"rating_score": None # optional, can be filled later
|
| 885 |
}
|
|
|
|
| 886 |
|
| 887 |
+
# Append row to CSV
|
| 888 |
if os.path.exists(FINGERPRINT_CSV):
|
| 889 |
df = pd.read_csv(FINGERPRINT_CSV)
|
| 890 |
+
df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
|
| 891 |
else:
|
| 892 |
+
df = pd.DataFrame([row])
|
| 893 |
|
| 894 |
df.to_csv(FINGERPRINT_CSV, index=False)
|
| 895 |
|
| 896 |
+
# Retrain model on full updated DB
|
| 897 |
retrain_model()
|
| 898 |
|
| 899 |
+
return {"ok": True, "message": "Fingerprint appended and model retrained"}
|