Spaces:

GDMProjects
/

Insulin

Sleeping

App Files Files Community

GDMProjects commited on Sep 1, 2025

Commit

f664075

verified ·

1 Parent(s): e9731f0

Upload 2 files

Browse files

Files changed (2) hide show

insulin.py +300 -0
requirements.txt +5 -0

insulin.py ADDED Viewed

	@@ -0,0 +1,300 @@

+#!/usr/bin/env python3
+# Gradio app for PyCaret insulin classifier
+# - Manual inputs (fixed 11 features)
+# - Fixed sample file loaded at startup (Excel/CSV)
+# - User selects a sample from dropdown and "Predict & Compare"
+# - No upload and no "load into form" section
+# ---------- Fixed config ----------
+MODEL_BASE   = "best_insulin_model"          # expects ./best_insulin_model.pkl
+SAMPLE_FILE  = r"C:\Users\A\Desktop\My_Projects\0-AI\GDM\Insulin.xlsx"  # <- EDIT to your path
+TARGET_NAME  = "insulin"                     # case-insensitive in the sample file
+POS_CLASS    = 1                             # positive class label for thresholding (binary)
+HOST, PORT, SHARE = "0.0.0.0", 7860, True
+# ---------- Env hygiene ----------
+import os
+os.environ["NO_PROXY"] = "127.0.0.1,localhost,::1"
+os.environ["no_proxy"] = "127.0.0.1,localhost,::1"
+for _k in ("HTTP_PROXY","http_proxy","HTTPS_PROXY","https_proxy"):
+    os.environ.pop(_k, None)
+os.environ.setdefault("GRADIO_OPEN_BROWSER", "false")
+os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
+# ---------- Imports ----------
+from typing import Any, Dict, Optional, Tuple, List
+import re
+import numpy as np
+import pandas as pd
+import gradio as gr
+from pycaret.classification import load_model, predict_model
+# ---------- Feature space (exactly as trained) ----------
+FEATURES = [
+    "age",
+    "BMI",
+    "history_of_htn",
+    "history_infectious_endocrine_metabolic_disease",
+    "history_infectious_digestive_disease",
+    "history_infectious_cardiovascular_diseae",
+    "family_history_dm",
+    "family_history_htn",
+    "Current_history_obsteric",
+    "Previos_Obsteric_History_AB",
+    "infertility",
+]
+NUMERIC_INPUTS = {"age", "BMI", "Previos_Obsteric_History_AB"}
+BOOL_FEATURES  = [f for f in FEATURES if f not in NUMERIC_INPUTS]  # 8 flags
+# ---------- Utilities ----------
+def strip_pkl(x: str) -> str:
+    return x[:-4] if x.lower().endswith(".pkl") else x
+def normalize(s: str) -> str:
+    return re.sub(r"[^a-z0-9]+", "", str(s).lower())
+def coerce_numeric(val: Any) -> Optional[float]:
+    if val in ("", None) or (isinstance(val, float) and np.isnan(val)): return None
+    try: return float(val)
+    except: return None
+def truthy(val: Any) -> bool:
+    if pd.isna(val): return False
+    s = str(val).strip().lower()
+    return s in {"1","true","yes","y","t"} or val is True or val == 1
+def extract_probability_for_positive(preds: pd.DataFrame, positive_label=1) -> Optional[float]:
+    str_pos = str(positive_label)
+    if str_pos in preds.columns:
+        return float(preds.iloc[0][str_pos])
+    for c in preds.columns:
+        if str_pos == str(c) or str(c).endswith("_"+str_pos):
+            try: return float(preds.iloc[0][c])
+            except: pass
+    for cname in ("prediction_score","Score"):
+        if cname in preds.columns:
+            try: return float(preds.iloc[0][cname])
+            except: pass
+    return None
+def get_global_importance_table(model) -> Optional[pd.DataFrame]:
+    try:
+        if hasattr(model, "named_steps"):
+            est = model.named_steps.get("trained_model", list(model.named_steps.values())[-1])
+        elif hasattr(model, "steps"):
+            est = model.steps[-1][1]
+        else:
+            est = model
+    except Exception:
+        est = model
+    X_cols = getattr(model, "feature_names_in_", None)
+    if hasattr(est, "feature_importances_"):
+        vals = np.asarray(est.feature_importances_)
+        if X_cols is not None and len(vals) == len(X_cols):
+            df_imp = pd.DataFrame({"feature": list(X_cols), "importance": vals})
+        else:
+            df_imp = pd.DataFrame({"feature": [f"f{i}" for i in range(len(vals))], "importance": vals})
+        return df_imp.sort_values("importance", ascending=False).reset_index(drop=True)
+    if hasattr(est, "coef_"):
+        coef = np.array(est.coef_)
+        if coef.ndim > 1: coef = coef[0]
+        coef = np.ravel(coef)
+        if X_cols is not None and len(coef) == len(X_cols):
+            df_coef = pd.DataFrame({"feature": list(X_cols), "coefficient": coef})
+        else:
+            df_coef = pd.DataFrame({"feature": [f"f{i}" for i in range(len(coef))], "coefficient": coef})
+        return df_coef.reindex(df_coef.iloc[:, -1].abs().sort_values(ascending=False).index).reset_index(drop=True)
+    return None
+# ---------- Load model ----------
+BASE  = strip_pkl(MODEL_BASE)
+MODEL = load_model(BASE)
+# ---------- Load fixed sample file ----------
+def load_sample_dataframe(path: str) -> Tuple[pd.DataFrame, str]:
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"Sample file not found: {path}")
+    if path.lower().endswith((".xlsx",".xls")):
+        sdf = pd.read_excel(path)
+    else:
+        sdf = pd.read_csv(path)
+    # Find target col case-insensitively
+    cols_norm = {normalize(c): c for c in sdf.columns}
+    target_col = cols_norm.get(normalize(TARGET_NAME))
+    if target_col is None:
+        raise ValueError(f"Target column '{TARGET_NAME}' not found in sample file (case-insensitive).")
+    # Map to exact FEATURES (case-insensitive)
+    rename_map, missing = {}, []
+    for f in FEATURES:
+        src = cols_norm.get(normalize(f))
+        if src is None:
+            missing.append(f)
+        else:
+            rename_map[src] = f
+    if missing:
+        raise ValueError(f"Missing required feature columns in sample file: {missing}")
+    sdf2 = sdf.rename(columns=rename_map)[FEATURES + [target_col]]
+    return sdf2, target_col
+try:
+    SAMPLE_DF, SAMPLE_TARGET = load_sample_dataframe(SAMPLE_FILE)
+except Exception as e:
+    # Fall back to empty DF but keep the app alive with a warning in UI
+    SAMPLE_DF, SAMPLE_TARGET = pd.DataFrame(columns=FEATURES+[TARGET_NAME]), TARGET_NAME
+    SAMPLE_ERROR = f"⚠️ Could not load sample file: {e}"
+else:
+    SAMPLE_ERROR = ""
+# Build initial dropdown choices
+def build_sample_choices(df: pd.DataFrame, tgt: str, flt: str = "All") -> List[str]:
+    if df.empty: return []
+    if flt == "All":
+        idxs = list(range(len(df)))
+    else:
+        want = int(flt)
+        idxs = [i for i in range(len(df)) if str(df.iloc[i][tgt]) == str(want)]
+    return [f"{i}: y={df.iloc[i][tgt]}" for i in idxs]
+# ---------- Gradio UI ----------
+with gr.Blocks(theme=gr.themes.Soft(), css="""
+* { font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI; }
+.gradio-container { max-width: 1040px !important; margin: 0 auto; }
+.card { border: 1px solid #e5e7eb; border-radius: 16px; padding: 16px; background: white; box-shadow: 0 1px 8px rgba(0,0,0,0.04); }
+h1.title { font-size: 28px; font-weight: 800; margin: 10px 0 2px; }
+.badge { display:inline-block; padding: 2px 10px; border-radius: 999px; background:#eef2ff; color:#3730a3; font-size: 12px; font-weight:700; }
+.small { font-size: 12px; color:#6b7280; }
+hr.sep { border: none; border-top: 1px solid #e5e7eb; margin: 8px 0 14px; }
+""") as demo:
+    gr.Markdown(
+        "<h1 class='title'>Insulin Classifier — Manual + Fixed Samples</h1>"
+        "<div class='badge'>PyCaret pipeline · Auto-preprocessing · Thresholdable</div>"
+    )
+    if SAMPLE_ERROR:
+        gr.Markdown(f"<div class='card small'>{SAMPLE_ERROR}</div>")
+    with gr.Row():
+        # -------- Left: Manual inputs + Sample picker --------
+        with gr.Column(scale=1):
+            gr.Markdown("### 1) Manual input")
+            age_in  = gr.Number(label="age (years)", value=None, precision=2)
+            bmi_in  = gr.Number(label="BMI", value=None, precision=3)
+            prev_ab = gr.Number(label="Previos_Obsteric_History_AB (count)", value=None, precision=0)
+            gr.Markdown("<hr class='sep'/>")
+            gr.Markdown("#### Clinical flags")
+            checkbox_map: Dict[str, gr.Checkbox] = {}
+            for feat in BOOL_FEATURES:
+                checkbox_map[feat] = gr.Checkbox(label=feat, value=False)
+            gr.Markdown("<hr class='sep'/>")
+            thr = gr.Slider(0.05, 0.95, value=0.50, step=0.01, label="Decision threshold for class '1'")
+            run_btn = gr.Button("🚀 Predict (manual)", variant="primary")
+            # -------- Sample picker (fixed file) --------
+            gr.Markdown("<hr class='sep'/>")
+            gr.Markdown("### 2) Sample picker (from fixed file)")
+            grp_dd   = gr.Dropdown(label="Filter by target", choices=["All","0","1"], value="All")
+            choices0 = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, "All")
+            sample_dd= gr.Dropdown(label="Choose sample row", choices=choices0, value=(choices0[0] if choices0 else None))
+            pred_btn = gr.Button("🎯 Predict & compare (sample)", variant="primary")
+        # -------- Right: Results --------
+        with gr.Column(scale=1):
+            gr.Markdown("### 3) Results")
+            pred_label = gr.Textbox(label="Predicted label (with threshold decision)", interactive=False)
+            with gr.Row():
+                prob_out = gr.Number(label="P(class==1)", interactive=False, precision=6)
+                decision = gr.Textbox(label="Decision @ threshold", interactive=False)
+            with gr.Row():
+                gt_out   = gr.Textbox(label="Ground truth (sample)", interactive=False)
+                match_out= gr.Textbox(label="Correct vs. ground truth?", interactive=False)
+            with gr.Accordion("Echoed input (row sent to model)", open=False):
+                echoed = gr.Dataframe(wrap=True)
+            GI = get_global_importance_table(MODEL)
+            if GI is not None and not GI.empty:
+                with gr.Accordion("Global feature importance / coefficients", open=False):
+                    gr.Dataframe(value=GI, interactive=False, wrap=True)
+            else:
+                gr.Markdown("<div class='card small'>No native importances/coefficients available for this estimator.</div>")
+    # -------- Manual predict --------
+    def do_predict_manual(age, bmi, prev_ab_cnt, threshold, *flag_values):
+        row = {c: None for c in FEATURES}
+        row["age"]  = coerce_numeric(age)
+        row["BMI"]  = coerce_numeric(bmi)
+        row["Previos_Obsteric_History_AB"] = coerce_numeric(prev_ab_cnt)
+        for feat, val in zip(BOOL_FEATURES, flag_values):
+            row[feat] = 1.0 if bool(val) else 0.0
+        df_row = pd.DataFrame([row], columns=FEATURES)
+        preds = predict_model(MODEL, data=df_row.copy())
+        label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
+        label = preds.iloc[0][label_col] if label_col else None
+        p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
+        if p is not None:
+            dec = 1 if float(p) >= float(threshold) else 0
+            pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
+            return pretty, float(p), str(dec), "", "", df_row
+        else:
+            return str(label), float("nan"), str(label), "", "", df_row
+    run_btn.click(
+        do_predict_manual,
+        inputs=[age_in, bmi_in, prev_ab, thr] + [checkbox_map[f] for f in BOOL_FEATURES],
+        outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
+    )
+    # -------- Update sample choices on filter change --------
+    def update_choices(group_value):
+        ch = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, group_value)
+        return gr.Dropdown(choices=ch, value=(ch[0] if ch else None))
+    grp_dd.change(update_choices, inputs=[grp_dd], outputs=[sample_dd])
+    # -------- Predict & compare for selected sample --------
+    def predict_sample(sample_choice, threshold):
+        if SAMPLE_DF.empty or sample_choice is None or str(sample_choice).strip() == "":
+            raise gr.Error("Sample file is empty or no row selected. Check SAMPLE_FILE path.")
+        idx = int(str(sample_choice).split(":")[0])
+        srow = SAMPLE_DF.iloc[idx]
+        row = {c: None for c in FEATURES}
+        row["age"]  = coerce_numeric(srow["age"])
+        row["BMI"]  = coerce_numeric(srow["BMI"])
+        row["Previos_Obsteric_History_AB"] = coerce_numeric(srow["Previos_Obsteric_History_AB"])
+        for feat in BOOL_FEATURES:
+            row[feat] = 1.0 if truthy(srow[feat]) else 0.0
+        df_row = pd.DataFrame([row], columns=FEATURES)
+        preds = predict_model(MODEL, data=df_row.copy())
+        label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
+        label = preds.iloc[0][label_col] if label_col else None
+        p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
+        # Decision & compare
+        if p is not None:
+            dec = 1 if float(p) >= float(threshold) else 0
+            pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
+        else:
+            dec, pretty = label, str(label)
+        gt = srow[SAMPLE_TARGET]
+        match = "✅ Correct" if gt == label else "❌ Incorrect"
+        return pretty, (float(p) if p is not None else float("nan")), str(dec), str(gt), match, df_row
+    pred_btn.click(
+        predict_sample,
+        inputs=[sample_dd, thr],
+        outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
+    )
+# ---------- Launch ----------
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pycaret>=3.3,<4
+gradio
+pandas
+shap
+matplotlib