Spaces:

GDMProjects
/

GTT

Sleeping

App Files Files Community

GDMProjects commited on Sep 1, 2025

Commit

93a36b7

verified ·

1 Parent(s): 6430c95

Update app.py

Browse files

Files changed (1) hide show

app.py +414 -409

app.py CHANGED Viewed

@@ -1,409 +1,414 @@
-# app.py
-# pip install "pycaret>=3.3,<4" gradio pandas shap matplotlib
-# --- FORCE NON-INTERACTIVE MATPLOTLIB BACKEND (must be first!) ---
-import os
-os.environ["MPLBACKEND"] = "Agg"   # prevents Tk backend init
-import matplotlib
-matplotlib.use("Agg", force=True)
-import json
-import numpy as np
-import pandas as pd
-import gradio as gr
-import matplotlib.pyplot as plt
-import shap
-from pathlib import Path
-from pycaret.classification import load_model
-# --- config ---
-MODEL_BASENAME = "subset_best_model"
-SAMPLES_CSV    = "GTT.csv"   # fixed hidden file
-TARGET_COL     = "gtt"
-POS_LABEL      = 1
-# subset features used by the model (normalized names)
-SUBSET_FEATURES = [
-    "age",
-    "bmi",
-    "history_of_htn",
-    "history_infectious_cardiovascular_diseae",
-    "previos_obsteric_history_ab",
-    "fbs_first_trimester",
-    "hb",
-    "hct",
-    "cr",
-    "plt",
-    "vit_d3",
-    "sono_nt_nt",
-    "sono_nt_crl",
-]
-# ---------- utils ----------
-def normalize_cols(df: pd.DataFrame) -> pd.DataFrame:
-    out = df.copy()
-    out.columns = (
-        out.columns.str.strip()
-        .str.replace(r"[\s/\\\.\-]+", "_", regex=True)
-        .str.replace(r"__+", "_", regex=True)
-        .str.lower()
-    )
-    return out
-def load_samples():
-    if not Path(SAMPLES_CSV).exists():
-        return None
-    df = pd.read_csv(SAMPLES_CSV)
-    df = normalize_cols(df)
-    needed = set(["id", TARGET_COL] + SUBSET_FEATURES)
-    if not needed.issubset(df.columns):
-        missing = needed - set(df.columns)
-        print(f"[WARN] samples file missing columns: {sorted(missing)}")
-        return None
-    df = df.reset_index(drop=False).rename(columns={"index": "_rid"})  # stable row id for dropdown
-    return df
-def pretty_json(d):
-    return json.dumps(d, ensure_ascii=False, indent=2)
-def as_bool(x, default=False):
-    if x is None or (isinstance(x, float) and pd.isna(x)):
-        return default
-    if isinstance(x, bool):
-        return x
-    if isinstance(x, (int,)):
-        return bool(x)
-    s = str(x).strip().lower()
-    yes = {"1","true","t","yes","y","on","pos","positive"}
-    no  = {"0","false","f","no","n","off","neg","negative"}
-    if s in yes: return True
-    if s in no:  return False
-    try:
-        return bool(int(float(s)))
-    except Exception:
-        return default
-def f_or_none(v):
-    return float(v) if (v is not None and not (isinstance(v, float) and pd.isna(v))) else None
-def build_row_dict(
-    age, bmi, ab_count,
-    htn, cvd,
-    fbs1, hb, hct, cr, plt, vitd3, sono_nt, sono_crl
-):
-    return {
-        "age": age,
-        "bmi": bmi,
-        "previos_obsteric_history_ab": ab_count,
-        "history_of_htn": 1 if htn else 0,
-        "history_infectious_cardiovascular_diseae": 1 if cvd else 0,
-        "fbs_first_trimester": fbs1,
-        "hb": hb,
-        "hct": hct,
-        "cr": cr,
-        "plt": plt,
-        "vit_d3": vitd3,
-        "sono_nt_nt": sono_nt,
-        "sono_nt_crl": sono_crl,
-    }
-def _get_pos_index_and_classes(pipe, pos_label=1):
-    est = None
-    try:
-        est = getattr(pipe, "named_steps", {}).get("trained_model", None)
-    except Exception:
-        est = None
-    if est is None:
-        est = pipe
-    classes = getattr(est, "classes_", None)
-    if classes is not None and pos_label in list(classes):
-        return list(classes).index(pos_label), list(classes)
-    return -1, list(classes) if classes is not None else None
-# ---------- model & samples ----------
-model = load_model(MODEL_BASENAME)
-samples_df = load_samples()
-# ---------- SHAP: background + explainer (built once) ----------
-def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
-    if df_samples is None:
-        # if no CSV, make a tiny synthetic background of zeros
-        bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
-    else:
-        bg = df_samples[SUBSET_FEATURES].copy()
-    # numeric coercion + median impute
-    for c in SUBSET_FEATURES:
-        if c not in bg.columns:
-            bg[c] = np.nan
-    bg = bg.apply(pd.to_numeric, errors="coerce")
-    bg = bg.fillna(bg.median(numeric_only=True))
-    if len(bg) > max_rows:
-        bg = bg.sample(max_rows, random_state=42)
-    return bg.reset_index(drop=True)
-BACKGROUND = _prepare_background(samples_df)
-POS_IDX, _ = _get_pos_index_and_classes(model, POS_LABEL)
-def _f_proba_pos(X_np: np.ndarray) -> np.ndarray:
-    """Model function returning P(class==1) for SHAP. X_np is numpy; convert to DataFrame with right columns."""
-    X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
-    return model.predict_proba(X_df)[:, POS_IDX]
-# SHAP Explainer (KernelExplainer via unified interface)
-try:
-    EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
-except Exception as e:
-    print("[WARN] SHAP explainer init failed:", e)
-    EXPLAINER = None
-def _plot_local_shap(row_dict: dict):
-    """Returns a matplotlib Figure with local SHAP bar chart for the given row."""
-    if EXPLAINER is None:
-        return None
-    X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
-    exp = EXPLAINER(X.values)  # exp.values shape: (1, n_features)
-    vals = exp.values[0]
-    order = np.argsort(np.abs(vals))
-    fig, ax = plt.subplots(figsize=(7, 4.5))
-    ax.barh(np.array(SUBSET_FEATURES)[order], vals[order])
-    ax.axvline(0, linewidth=1)
-    ax.set_title("Local SHAP values (current input)")
-    ax.set_xlabel("Impact on P(class==1)")
-    fig.tight_layout()
-    return fig
-def _plot_global_shap():
-    """Returns a matplotlib Figure with global mean(|SHAP|) bar chart over BACKGROUND."""
-    if EXPLAINER is None:
-        return None
-    exp = EXPLAINER(BACKGROUND.values)
-    mean_abs = np.mean(np.abs(exp.values), axis=0)
-    order = np.argsort(mean_abs)
-    fig, ax = plt.subplots(figsize=(7, 4.5))
-    ax.barh(np.array(SUBSET_FEATURES)[order], mean_abs[order])
-    ax.set_title("Global feature importance (mean |SHAP|)")
-    ax.set_xlabel("Mean |impact on P(class==1)|")
-    fig.tight_layout()
-    return fig
-GLOBAL_FIG = _plot_global_shap()
-# ---------- prediction ----------
-def predict_manual(
-    threshold,
-    age, bmi, ab_count,
-    htn, cvd,
-    fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
-):
-    row = build_row_dict(
-        age, bmi, ab_count,
-        htn, cvd,
-        fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
-    )
-    df = pd.DataFrame([row], columns=SUBSET_FEATURES)
-    proba = model.predict_proba(df)
-    p1 = float(proba[0][POS_IDX])
-    decision = 1 if p1 >= float(threshold) else 0
-    return int(decision), round(p1, 4), ("Positive" if decision==1 else "Negative"), pretty_json(row)
-def explain_local(
-    age, bmi, ab_count,
-    htn, cvd,
-    fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
-):
-    row = build_row_dict(
-        age, bmi, ab_count,
-        htn, cvd,
-        fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
-    )
-    fig = _plot_local_shap(row)
-    return fig
-def explain_global():
-    return GLOBAL_FIG
-def filter_sample_options(filter_target):
-    if samples_df is None:
-        return gr.update(choices=[], value=None)
-    df = samples_df
-    if filter_target in ("0", "1"):
-        df = df[df[TARGET_COL] == int(filter_target)]
-    opts = [ (f"{int(r['_rid'])}: y={int(r[TARGET_COL])}", int(r["_rid"])) for _, r in df.iterrows() ]
-    return gr.update(choices=opts, value=(opts[0][1] if opts else None))
-def load_sample(rid):
-    if samples_df is None or rid is None:
-        return [gr.update()]*13 + [gr.update(value="")]
-    r = samples_df.loc[samples_df["_rid"] == int(rid)]
-    if r.empty:
-        return [gr.update()]*13 + [gr.update(value="")]
-    r = r.iloc[0]
-    updates = [
-        gr.update(value=f_or_none(r.get("age"))),
-        gr.update(value=f_or_none(r.get("bmi"))),
-        gr.update(value=int(r.get("previos_obsteric_history_ab", 0)) if pd.notna(r.get("previos_obsteric_history_ab")) else 0),
-        gr.update(value=as_bool(r.get("history_of_htn"))),
-        gr.update(value=as_bool(r.get("history_infectious_cardiovascular_diseae"))),
-        gr.update(value=f_or_none(r.get("fbs_first_trimester"))),
-        gr.update(value=f_or_none(r.get("hb"))),
-        gr.update(value=f_or_none(r.get("hct"))),
-        gr.update(value=f_or_none(r.get("cr"))),
-        gr.update(value=f_or_none(r.get("plt"))),
-        gr.update(value=f_or_none(r.get("vit_d3"))),
-        gr.update(value=f_or_none(r.get("sono_nt_nt"))),
-        gr.update(value=f_or_none(r.get("sono_nt_crl"))),
-        gr.update(value=str(int(r.get(TARGET_COL))) if pd.notna(r.get(TARGET_COL)) else "")
-    ]
-    return updates
-def compare_correctness(gt_text, decision_label):
-    if gt_text is None or gt_text == "":
-        return "—"
-    try:
-        gt = int(float(gt_text))
-    except Exception:
-        return "—"
-    return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
-def get_feature_importance_text():
-    # Keep textual fallback if SHAP not available
-    est = None
-    try:
-        est = getattr(model, "named_steps", {}).get("trained_model", None)
-    except Exception:
-        est = None
-    if est is None:
-        est = model
-    fi = None
-    if hasattr(est, "feature_importances_"):
-        fi = list(est.feature_importances_)
-    elif hasattr(est, "coef_"):
-        coef = est.coef_
-        if coef is not None:
-            fi = list(coef.reshape(-1))
-    if not fi or len(fi) != len(SUBSET_FEATURES):
-        return "Not available for this model."
-    pairs = sorted(zip(SUBSET_FEATURES, fi), key=lambda x: abs(x[1]), reverse=True)
-    return "\n".join([f"- {k}: {v:.4f}" for k, v in pairs])
-GLOBAL_FI_TEXT = get_feature_importance_text()
-# ---------- theme ----------
-theme = gr.themes.Soft(
-    primary_hue="violet",
-    neutral_hue="slate",
-).set(
-    body_background_fill_dark="#0b0f19",
-    block_border_width="1px"
-)
-# ---------- UI ----------
-with gr.Blocks(theme=theme, title="GTT Classifier — Manual + Fixed Samples") as demo:
-    gr.Markdown("## GTT Prediction (Subset Features)\n**PyCaret pipeline · Auto-preprocessing · Thresholdable**")
-    with gr.Row():
-        # (1) Manual input
-        with gr.Column(scale=1):
-            gr.Markdown("### 1) Manual input")
-            age = gr.Number(label="Age (years)", value=0)
-            bmi = gr.Number(label="BMI", value=0)
-            ab_count = gr.Number(label="Previos Obsteric History of Abortion (count)", value=0, precision=0)
-            gr.Markdown("---\n**Clinical flags**")
-            htn = gr.Checkbox(label="History of Hypertension", value=False)
-            cvd = gr.Checkbox(label="History of Cardiovascular disease", value=False)
-            with gr.Accordion("More numeric features (optional)", open=False):
-                fbs1   = gr.Number(label="FBS of First trimester")
-                hb     = gr.Number(label="HB")
-                hct    = gr.Number(label="HCT")
-                cr     = gr.Number(label="CR")
-                plt_v  = gr.Number(label="PLT")
-                vitd3  = gr.Number(label="Vit D3")
-                sono_nt  = gr.Number(label="Sonographic NT")
-                sono_crl = gr.Number(label="Sonographic CRL")
-            with gr.Row():
-                threshold = gr.Slider(0.05, 0.95, value=0.50, step=0.01, label="Decision threshold for class '1'")
-                reset_thr = gr.Button("↻", size="sm")
-            predict_btn = gr.Button("🚀 Predict (manual)", variant="primary")
-            explain_btn = gr.Button("🧠 Explain (SHAP for current input)")
-        # (2) Sample picker
-        with gr.Column(scale=1):
-            gr.Markdown("### 2) Sample picker (from fixed file)")
-            filt = gr.Dropdown(choices=["All", "0", "1"], value="All", label="Filter by target")
-            sample_dd = gr.Dropdown(choices=[], value=None, label="Choose sample row")
-            load_ok = gr.Button("Load sample into manual inputs", variant="secondary")
-        # (3) Results
-        with gr.Column(scale=1):
-            gr.Markdown("### 3) Results")
-            pred_label = gr.Number(label="Predicted label (with threshold decision)", interactive=False)
-            with gr.Row():
-                pred_prob = gr.Number(label="P(class==1)", value=0, interactive=False)
-                decision_text = gr.Textbox(label="Decision @ threshold", interactive=False)
-            gt_box = gr.Textbox(label="Ground truth (sample)", interactive=False)
-            correctness = gr.Textbox(label="Correct vs. ground truth?", interactive=False)
-            with gr.Accordion("Echoed input (row sent to model)", open=False):
-                echoed = gr.Code(label="", language="json")
-            with gr.Accordion("Global feature importance (SHAP)", open=False):
-                global_plot = gr.Plot(value=GLOBAL_FIG)
-                gr.Markdown("> Text fallback (native model importances):")
-                gr.Markdown(GLOBAL_FI_TEXT)
-            with gr.Accordion("Local explanation (SHAP) for current input", open=False):
-                local_plot = gr.Plot()
-    # events
-    demo.load(lambda: filter_sample_options("All"), inputs=None, outputs=[sample_dd], queue=False)
-    filt.change(filter_sample_options, inputs=[filt], outputs=[sample_dd])
-    reset_thr.click(fn=lambda: 0.5, inputs=None, outputs=[threshold])
-    load_ok.click(
-        fn=load_sample,
-        inputs=[sample_dd],
-        outputs=[
-            age, bmi, ab_count,
-            htn, cvd,
-            fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl,
-            gt_box
-        ],
-    )
-    predict_btn.click(
-        fn=predict_manual,
-        inputs=[
-            threshold,
-            age, bmi, ab_count,
-            htn, cvd,
-            fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
-        ],
-        outputs=[pred_label, pred_prob, decision_text, echoed],
-    ).then(
-        fn=compare_correctness,
-        inputs=[gt_box, pred_label],
-        outputs=[correctness]
-    )
-    explain_btn.click(
-        fn=explain_local,
-        inputs=[age, bmi, ab_count, htn, cvd, fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl],
-        outputs=[local_plot]
-    )
-if __name__ == "__main__":
-    os.environ["NO_PROXY"] = "127.0.0.1,localhost"
-    os.environ["no_proxy"] = "127.0.0.1,localhost"
-    demo.launch()

+# app.py
+# pip install "pycaret>=3.3,<4" gradio pandas shap matplotlib
+# --- FORCE NON-INTERACTIVE MATPLOTLIB BACKEND (must be first!) ---
+import os
+os.environ["MPLBACKEND"] = "Agg"   # prevents Tk backend init
+import matplotlib
+matplotlib.use("Agg", force=True)
+import json
+import numpy as np
+import pandas as pd
+import gradio as gr
+import matplotlib.pyplot as plt
+import shap
+from pathlib import Path
+from pycaret.classification import load_model
+from huggingface_hub import hf_hub_download
+# --- config ---
+MODEL_BASENAME = "subset_best_model"
+SAMPLES_CSV    = "GTT.csv"   # fixed hidden file
+TARGET_COL     = "gtt"
+POS_LABEL      = 1
+REPO = os.getenv("MODEL_REPO", "GDMProjects/my-private-model")
+FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
+TOKEN = os.getenv("HF_TOKEN")
+# subset features used by the model (normalized names)
+SUBSET_FEATURES = [
+    "age",
+    "bmi",
+    "history_of_htn",
+    "history_infectious_cardiovascular_diseae",
+    "previos_obsteric_history_ab",
+    "fbs_first_trimester",
+    "hb",
+    "hct",
+    "cr",
+    "plt",
+    "vit_d3",
+    "sono_nt_nt",
+    "sono_nt_crl",
+]
+# ---------- utils ----------
+def normalize_cols(df: pd.DataFrame) -> pd.DataFrame:
+    out = df.copy()
+    out.columns = (
+        out.columns.str.strip()
+        .str.replace(r"[\s/\\\.\-]+", "_", regex=True)
+        .str.replace(r"__+", "_", regex=True)
+        .str.lower()
+    )
+    return out
+def load_samples():
+    if not Path(SAMPLES_CSV).exists():
+        return None
+    df = pd.read_csv(SAMPLES_CSV)
+    df = normalize_cols(df)
+    needed = set(["id", TARGET_COL] + SUBSET_FEATURES)
+    if not needed.issubset(df.columns):
+        missing = needed - set(df.columns)
+        print(f"[WARN] samples file missing columns: {sorted(missing)}")
+        return None
+    df = df.reset_index(drop=False).rename(columns={"index": "_rid"})  # stable row id for dropdown
+    return df
+def pretty_json(d):
+    return json.dumps(d, ensure_ascii=False, indent=2)
+def as_bool(x, default=False):
+    if x is None or (isinstance(x, float) and pd.isna(x)):
+        return default
+    if isinstance(x, bool):
+        return x
+    if isinstance(x, (int,)):
+        return bool(x)
+    s = str(x).strip().lower()
+    yes = {"1","true","t","yes","y","on","pos","positive"}
+    no  = {"0","false","f","no","n","off","neg","negative"}
+    if s in yes: return True
+    if s in no:  return False
+    try:
+        return bool(int(float(s)))
+    except Exception:
+        return default
+def f_or_none(v):
+    return float(v) if (v is not None and not (isinstance(v, float) and pd.isna(v))) else None
+def build_row_dict(
+    age, bmi, ab_count,
+    htn, cvd,
+    fbs1, hb, hct, cr, plt, vitd3, sono_nt, sono_crl
+):
+    return {
+        "age": age,
+        "bmi": bmi,
+        "previos_obsteric_history_ab": ab_count,
+        "history_of_htn": 1 if htn else 0,
+        "history_infectious_cardiovascular_diseae": 1 if cvd else 0,
+        "fbs_first_trimester": fbs1,
+        "hb": hb,
+        "hct": hct,
+        "cr": cr,
+        "plt": plt,
+        "vit_d3": vitd3,
+        "sono_nt_nt": sono_nt,
+        "sono_nt_crl": sono_crl,
+    }
+def _get_pos_index_and_classes(pipe, pos_label=1):
+    est = None
+    try:
+        est = getattr(pipe, "named_steps", {}).get("trained_model", None)
+    except Exception:
+        est = None
+    if est is None:
+        est = pipe
+    classes = getattr(est, "classes_", None)
+    if classes is not None and pos_label in list(classes):
+        return list(classes).index(pos_label), list(classes)
+    return -1, list(classes) if classes is not None else None
+# ---------- model & samples ----------
+local_path = hf_hub_download(repo_id=REPO, filename=FNAME, token=TOKEN)
+model = load_model(local_path)
+samples_df = load_samples()
+# ---------- SHAP: background + explainer (built once) ----------
+def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
+    if df_samples is None:
+        # if no CSV, make a tiny synthetic background of zeros
+        bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
+    else:
+        bg = df_samples[SUBSET_FEATURES].copy()
+    # numeric coercion + median impute
+    for c in SUBSET_FEATURES:
+        if c not in bg.columns:
+            bg[c] = np.nan
+    bg = bg.apply(pd.to_numeric, errors="coerce")
+    bg = bg.fillna(bg.median(numeric_only=True))
+    if len(bg) > max_rows:
+        bg = bg.sample(max_rows, random_state=42)
+    return bg.reset_index(drop=True)
+BACKGROUND = _prepare_background(samples_df)
+POS_IDX, _ = _get_pos_index_and_classes(model, POS_LABEL)
+def _f_proba_pos(X_np: np.ndarray) -> np.ndarray:
+    """Model function returning P(class==1) for SHAP. X_np is numpy; convert to DataFrame with right columns."""
+    X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
+    return model.predict_proba(X_df)[:, POS_IDX]
+# SHAP Explainer (KernelExplainer via unified interface)
+try:
+    EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
+except Exception as e:
+    print("[WARN] SHAP explainer init failed:", e)
+    EXPLAINER = None
+def _plot_local_shap(row_dict: dict):
+    """Returns a matplotlib Figure with local SHAP bar chart for the given row."""
+    if EXPLAINER is None:
+        return None
+    X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
+    exp = EXPLAINER(X.values)  # exp.values shape: (1, n_features)
+    vals = exp.values[0]
+    order = np.argsort(np.abs(vals))
+    fig, ax = plt.subplots(figsize=(7, 4.5))
+    ax.barh(np.array(SUBSET_FEATURES)[order], vals[order])
+    ax.axvline(0, linewidth=1)
+    ax.set_title("Local SHAP values (current input)")
+    ax.set_xlabel("Impact on P(class==1)")
+    fig.tight_layout()
+    return fig
+def _plot_global_shap():
+    """Returns a matplotlib Figure with global mean(|SHAP|) bar chart over BACKGROUND."""
+    if EXPLAINER is None:
+        return None
+    exp = EXPLAINER(BACKGROUND.values)
+    mean_abs = np.mean(np.abs(exp.values), axis=0)
+    order = np.argsort(mean_abs)
+    fig, ax = plt.subplots(figsize=(7, 4.5))
+    ax.barh(np.array(SUBSET_FEATURES)[order], mean_abs[order])
+    ax.set_title("Global feature importance (mean |SHAP|)")
+    ax.set_xlabel("Mean |impact on P(class==1)|")
+    fig.tight_layout()
+    return fig
+GLOBAL_FIG = _plot_global_shap()
+# ---------- prediction ----------
+def predict_manual(
+    threshold,
+    age, bmi, ab_count,
+    htn, cvd,
+    fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
+):
+    row = build_row_dict(
+        age, bmi, ab_count,
+        htn, cvd,
+        fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
+    )
+    df = pd.DataFrame([row], columns=SUBSET_FEATURES)
+    proba = model.predict_proba(df)
+    p1 = float(proba[0][POS_IDX])
+    decision = 1 if p1 >= float(threshold) else 0
+    return int(decision), round(p1, 4), ("Positive" if decision==1 else "Negative"), pretty_json(row)
+def explain_local(
+    age, bmi, ab_count,
+    htn, cvd,
+    fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
+):
+    row = build_row_dict(
+        age, bmi, ab_count,
+        htn, cvd,
+        fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
+    )
+    fig = _plot_local_shap(row)
+    return fig
+def explain_global():
+    return GLOBAL_FIG
+def filter_sample_options(filter_target):
+    if samples_df is None:
+        return gr.update(choices=[], value=None)
+    df = samples_df
+    if filter_target in ("0", "1"):
+        df = df[df[TARGET_COL] == int(filter_target)]
+    opts = [ (f"{int(r['_rid'])}: y={int(r[TARGET_COL])}", int(r["_rid"])) for _, r in df.iterrows() ]
+    return gr.update(choices=opts, value=(opts[0][1] if opts else None))
+def load_sample(rid):
+    if samples_df is None or rid is None:
+        return [gr.update()]*13 + [gr.update(value="")]
+    r = samples_df.loc[samples_df["_rid"] == int(rid)]
+    if r.empty:
+        return [gr.update()]*13 + [gr.update(value="")]
+    r = r.iloc[0]
+    updates = [
+        gr.update(value=f_or_none(r.get("age"))),
+        gr.update(value=f_or_none(r.get("bmi"))),
+        gr.update(value=int(r.get("previos_obsteric_history_ab", 0)) if pd.notna(r.get("previos_obsteric_history_ab")) else 0),
+        gr.update(value=as_bool(r.get("history_of_htn"))),
+        gr.update(value=as_bool(r.get("history_infectious_cardiovascular_diseae"))),
+        gr.update(value=f_or_none(r.get("fbs_first_trimester"))),
+        gr.update(value=f_or_none(r.get("hb"))),
+        gr.update(value=f_or_none(r.get("hct"))),
+        gr.update(value=f_or_none(r.get("cr"))),
+        gr.update(value=f_or_none(r.get("plt"))),
+        gr.update(value=f_or_none(r.get("vit_d3"))),
+        gr.update(value=f_or_none(r.get("sono_nt_nt"))),
+        gr.update(value=f_or_none(r.get("sono_nt_crl"))),
+        gr.update(value=str(int(r.get(TARGET_COL))) if pd.notna(r.get(TARGET_COL)) else "")
+    ]
+    return updates
+def compare_correctness(gt_text, decision_label):
+    if gt_text is None or gt_text == "":
+        return "—"
+    try:
+        gt = int(float(gt_text))
+    except Exception:
+        return "—"
+    return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
+def get_feature_importance_text():
+    # Keep textual fallback if SHAP not available
+    est = None
+    try:
+        est = getattr(model, "named_steps", {}).get("trained_model", None)
+    except Exception:
+        est = None
+    if est is None:
+        est = model
+    fi = None
+    if hasattr(est, "feature_importances_"):
+        fi = list(est.feature_importances_)
+    elif hasattr(est, "coef_"):
+        coef = est.coef_
+        if coef is not None:
+            fi = list(coef.reshape(-1))
+    if not fi or len(fi) != len(SUBSET_FEATURES):
+        return "Not available for this model."
+    pairs = sorted(zip(SUBSET_FEATURES, fi), key=lambda x: abs(x[1]), reverse=True)
+    return "\n".join([f"- {k}: {v:.4f}" for k, v in pairs])
+GLOBAL_FI_TEXT = get_feature_importance_text()
+# ---------- theme ----------
+theme = gr.themes.Soft(
+    primary_hue="violet",
+    neutral_hue="slate",
+).set(
+    body_background_fill_dark="#0b0f19",
+    block_border_width="1px"
+)
+# ---------- UI ----------
+with gr.Blocks(theme=theme, title="GTT Classifier — Manual + Fixed Samples") as demo:
+    gr.Markdown("## GTT Prediction (Subset Features)\n**PyCaret pipeline · Auto-preprocessing · Thresholdable**")
+    with gr.Row():
+        # (1) Manual input
+        with gr.Column(scale=1):
+            gr.Markdown("### 1) Manual input")
+            age = gr.Number(label="Age (years)", value=0)
+            bmi = gr.Number(label="BMI", value=0)
+            ab_count = gr.Number(label="Previos Obsteric History of Abortion (count)", value=0, precision=0)
+            gr.Markdown("---\n**Clinical flags**")
+            htn = gr.Checkbox(label="History of Hypertension", value=False)
+            cvd = gr.Checkbox(label="History of Cardiovascular disease", value=False)
+            with gr.Accordion("More numeric features (optional)", open=False):
+                fbs1   = gr.Number(label="FBS of First trimester")
+                hb     = gr.Number(label="HB")
+                hct    = gr.Number(label="HCT")
+                cr     = gr.Number(label="CR")
+                plt_v  = gr.Number(label="PLT")
+                vitd3  = gr.Number(label="Vit D3")
+                sono_nt  = gr.Number(label="Sonographic NT")
+                sono_crl = gr.Number(label="Sonographic CRL")
+            with gr.Row():
+                threshold = gr.Slider(0.05, 0.95, value=0.50, step=0.01, label="Decision threshold for class '1'")
+                reset_thr = gr.Button("↻", size="sm")
+            predict_btn = gr.Button("🚀 Predict (manual)", variant="primary")
+            explain_btn = gr.Button("🧠 Explain (SHAP for current input)")
+        # (2) Sample picker
+        with gr.Column(scale=1):
+            gr.Markdown("### 2) Sample picker (from fixed file)")
+            filt = gr.Dropdown(choices=["All", "0", "1"], value="All", label="Filter by target")
+            sample_dd = gr.Dropdown(choices=[], value=None, label="Choose sample row")
+            load_ok = gr.Button("Load sample into manual inputs", variant="secondary")
+        # (3) Results
+        with gr.Column(scale=1):
+            gr.Markdown("### 3) Results")
+            pred_label = gr.Number(label="Predicted label (with threshold decision)", interactive=False)
+            with gr.Row():
+                pred_prob = gr.Number(label="P(class==1)", value=0, interactive=False)
+                decision_text = gr.Textbox(label="Decision @ threshold", interactive=False)
+            gt_box = gr.Textbox(label="Ground truth (sample)", interactive=False)
+            correctness = gr.Textbox(label="Correct vs. ground truth?", interactive=False)
+            with gr.Accordion("Echoed input (row sent to model)", open=False):
+                echoed = gr.Code(label="", language="json")
+            with gr.Accordion("Global feature importance (SHAP)", open=False):
+                global_plot = gr.Plot(value=GLOBAL_FIG)
+                gr.Markdown("> Text fallback (native model importances):")
+                gr.Markdown(GLOBAL_FI_TEXT)
+            with gr.Accordion("Local explanation (SHAP) for current input", open=False):
+                local_plot = gr.Plot()
+    # events
+    demo.load(lambda: filter_sample_options("All"), inputs=None, outputs=[sample_dd], queue=False)
+    filt.change(filter_sample_options, inputs=[filt], outputs=[sample_dd])
+    reset_thr.click(fn=lambda: 0.5, inputs=None, outputs=[threshold])
+    load_ok.click(
+        fn=load_sample,
+        inputs=[sample_dd],
+        outputs=[
+            age, bmi, ab_count,
+            htn, cvd,
+            fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl,
+            gt_box
+        ],
+    )
+    predict_btn.click(
+        fn=predict_manual,
+        inputs=[
+            threshold,
+            age, bmi, ab_count,
+            htn, cvd,
+            fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl
+        ],
+        outputs=[pred_label, pred_prob, decision_text, echoed],
+    ).then(
+        fn=compare_correctness,
+        inputs=[gt_box, pred_label],
+        outputs=[correctness]
+    )
+    explain_btn.click(
+        fn=explain_local,
+        inputs=[age, bmi, ab_count, htn, cvd, fbs1, hb, hct, cr, plt_v, vitd3, sono_nt, sono_crl],
+        outputs=[local_plot]
+    )
+if __name__ == "__main__":
+    os.environ["NO_PROXY"] = "127.0.0.1,localhost"
+    os.environ["no_proxy"] = "127.0.0.1,localhost"
+    demo.launch()