Spaces:

Synav
/

Explainable-Acute-Leukemia-Mortality-Predictor

Running

App Files Files Community

Synav commited on Jan 20

Commit

b330c61

verified ·

1 Parent(s): 7d7e2cb

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -57

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ from sklearn.preprocessing import OneHotEncoder, StandardScaler
 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import roc_auc_score, accuracy_score
 # ============================================================
@@ -35,7 +35,7 @@ from sklearn.metrics import roc_auc_score, accuracy_score
 LABEL_COL = "AA"
 N_FEATURES = 26
 N_NUM = 13  # first 13 numeric, last 13 categorical
-LABEL_COL = "AA"
 def get_feature_cols_from_df(df: pd.DataFrame):
     """
@@ -103,33 +103,39 @@ def coerce_binary_label(y: pd.Series):
     pos = uniq_str[-1]
     return y.astype(str).eq(pos).astype(int).to_numpy(), pos
-def infer_schema_from_df(df: pd.DataFrame):
-    """
-    Uses the Excel header row (df.columns) as variable names.
-    Assumptions:
-      - First 26 columns are features (in order)
-      - Column 'AA' is the binary label and must exist
-      - Numeric = first 13 features; Categorical = remaining 13
-    """
-    if LABEL_COL not in df.columns:
-        raise ValueError("Missing required label column 'AA'.")
-    # Keep original column order, exclude AA
-    feature_cols_all = [c for c in df.columns if c != LABEL_COL]
-    if len(feature_cols_all) < N_FEATURES:
-        raise ValueError(f"Need at least {N_FEATURES} feature columns (excluding AA). Found {len(feature_cols_all)}.")
-    feature_cols = feature_cols_all[:N_FEATURES]
-    num_cols = feature_cols[:N_NUM]
-    cat_cols = feature_cols[N_NUM:]
-    return feature_cols, num_cols, cat_cols
 # ============================================================
 # Training + persistence
 # ============================================================
 def train_and_save(df: pd.DataFrame, feature_cols, num_cols, cat_cols):
     X = df[feature_cols].copy()
     y_raw = df[LABEL_COL].copy()
@@ -211,32 +217,9 @@ def train_and_save(df: pd.DataFrame, feature_cols, num_cols, cat_cols):
     with open("meta.json", "w", encoding="utf-8") as f:
         json.dump(meta, f, indent=2)
-    return pipe, meta, X
-def compute_classification_metrics(y_true, y_proba, threshold: float = 0.5):
-    y_pred = (y_proba >= threshold).astype(int)
-    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
-    sensitivity = tp / (tp + fn) if (tp + fn) else 0.0  # recall, TPR
-    specificity = tn / (tn + fp) if (tn + fp) else 0.0  # TNR
-    precision = precision_score(y_true, y_pred, zero_division=0)
-    recall = recall_score(y_true, y_pred, zero_division=0)
-    f1 = f1_score(y_true, y_pred, zero_division=0)
-    acc = accuracy_score(y_true, y_pred)
-    bacc = balanced_accuracy_score(y_true, y_pred)
-    return {
-        "threshold": float(threshold),
-        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
-        "sensitivity": float(sensitivity),
-        "specificity": float(specificity),
-        "precision": float(precision),
-        "recall": float(recall),
-        "f1": float(f1),
-        "accuracy": float(acc),
-        "balanced_accuracy": float(bacc),
-    }
@@ -446,7 +429,7 @@ with tab_train:
             if st.button("Train model"):
                 with st.spinner("Training model..."):
-                    pipe, meta, X_bg = train_and_save(df, feature_cols, num_cols, cat_cols)
                     explainer = build_shap_explainer(pipe, X_bg)
                     st.session_state.pipe = pipe
@@ -533,15 +516,15 @@ with tab_train:
                 help="Used as releases/<version>/ in the model repository",
             )
-        if st.button("Publish model.joblib + meta.json to Model Repo"):
-                try:
-                    with st.spinner("Uploading to Hugging Face Model repo..."):
-                        paths = publish_to_hub(MODEL_REPO_ID, version_tag)
-                    st.success("Uploaded successfully to your model repository.")
-                    st.json(paths)
-                except Exception as e:
-                    st.error(f"Upload failed: {e}")
 # ---------------- PREDICT ----------------

 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 # ============================================================
 LABEL_COL = "AA"
 N_FEATURES = 26
 N_NUM = 13  # first 13 numeric, last 13 categorical
 def get_feature_cols_from_df(df: pd.DataFrame):
     """
     pos = uniq_str[-1]
     return y.astype(str).eq(pos).astype(int).to_numpy(), pos
 # ============================================================
 # Training + persistence
 # ============================================================
+def compute_classification_metrics(y_true, y_proba, threshold: float = 0.5):
+    y_pred = (y_proba >= threshold).astype(int)
+    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
+    sensitivity = tp / (tp + fn) if (tp + fn) else 0.0  # recall, TPR
+    specificity = tn / (tn + fp) if (tn + fp) else 0.0  # TNR
+    precision = precision_score(y_true, y_pred, zero_division=0)
+    recall = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    acc = accuracy_score(y_true, y_pred)
+    bacc = balanced_accuracy_score(y_true, y_pred)
+    return {
+        "threshold": float(threshold),
+        "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
+        "sensitivity": float(sensitivity),
+        "specificity": float(specificity),
+        "precision": float(precision),
+        "recall": float(recall),
+        "f1": float(f1),
+        "accuracy": float(acc),
+        "balanced_accuracy": float(bacc),
+    }
 def train_and_save(df: pd.DataFrame, feature_cols, num_cols, cat_cols):
     X = df[feature_cols].copy()
     y_raw = df[LABEL_COL].copy()
     with open("meta.json", "w", encoding="utf-8") as f:
         json.dump(meta, f, indent=2)
+    return pipe, meta, X, y_test, proba
             if st.button("Train model"):
                 with st.spinner("Training model..."):
+                    pipe, meta, X_bg, y_test, proba = train_and_save(df, feature_cols, num_cols, cat_cols)
                     explainer = build_shap_explainer(pipe, X_bg)
                     st.session_state.pipe = pipe
                 help="Used as releases/<version>/ in the model repository",
             )
+            if st.button("Publish model.joblib + meta.json to Model Repo"):
+                    try:
+                        with st.spinner("Uploading to Hugging Face Model repo..."):
+                            paths = publish_to_hub(MODEL_REPO_ID, version_tag)
+                        st.success("Uploaded successfully to your model repository.")
+                        st.json(paths)
+                    except Exception as e:
+                        st.error(f"Upload failed: {e}")
 # ---------------- PREDICT ----------------