GDMProjects commited on
Commit
5ed58ef
·
verified ·
1 Parent(s): 610765c

Update insulin.py

Browse files
Files changed (1) hide show
  1. insulin.py +295 -299
insulin.py CHANGED
@@ -1,300 +1,296 @@
1
- #!/usr/bin/env python3
2
- # Gradio app for PyCaret insulin classifier
3
- # - Manual inputs (fixed 11 features)
4
- # - Fixed sample file loaded at startup (Excel/CSV)
5
- # - User selects a sample from dropdown and "Predict & Compare"
6
- # - No upload and no "load into form" section
7
-
8
- # ---------- Fixed config ----------
9
- MODEL_BASE = "best_insulin_model" # expects ./best_insulin_model.pkl
10
- SAMPLE_FILE = r"C:\Users\A\Desktop\My_Projects\0-AI\GDM\Insulin.xlsx" # <- EDIT to your path
11
- TARGET_NAME = "insulin" # case-insensitive in the sample file
12
- POS_CLASS = 1 # positive class label for thresholding (binary)
13
- HOST, PORT, SHARE = "0.0.0.0", 7860, True
14
-
15
- # ---------- Env hygiene ----------
16
- import os
17
- os.environ["NO_PROXY"] = "127.0.0.1,localhost,::1"
18
- os.environ["no_proxy"] = "127.0.0.1,localhost,::1"
19
- for _k in ("HTTP_PROXY","http_proxy","HTTPS_PROXY","https_proxy"):
20
- os.environ.pop(_k, None)
21
- os.environ.setdefault("GRADIO_OPEN_BROWSER", "false")
22
- os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
23
-
24
- # ---------- Imports ----------
25
- from typing import Any, Dict, Optional, Tuple, List
26
- import re
27
- import numpy as np
28
- import pandas as pd
29
- import gradio as gr
30
- from pycaret.classification import load_model, predict_model
31
-
32
- # ---------- Feature space (exactly as trained) ----------
33
- FEATURES = [
34
- "age",
35
- "BMI",
36
- "history_of_htn",
37
- "history_infectious_endocrine_metabolic_disease",
38
- "history_infectious_digestive_disease",
39
- "history_infectious_cardiovascular_diseae",
40
- "family_history_dm",
41
- "family_history_htn",
42
- "Current_history_obsteric",
43
- "Previos_Obsteric_History_AB",
44
- "infertility",
45
- ]
46
- NUMERIC_INPUTS = {"age", "BMI", "Previos_Obsteric_History_AB"}
47
- BOOL_FEATURES = [f for f in FEATURES if f not in NUMERIC_INPUTS] # 8 flags
48
-
49
- # ---------- Utilities ----------
50
- def strip_pkl(x: str) -> str:
51
- return x[:-4] if x.lower().endswith(".pkl") else x
52
-
53
- def normalize(s: str) -> str:
54
- return re.sub(r"[^a-z0-9]+", "", str(s).lower())
55
-
56
- def coerce_numeric(val: Any) -> Optional[float]:
57
- if val in ("", None) or (isinstance(val, float) and np.isnan(val)): return None
58
- try: return float(val)
59
- except: return None
60
-
61
- def truthy(val: Any) -> bool:
62
- if pd.isna(val): return False
63
- s = str(val).strip().lower()
64
- return s in {"1","true","yes","y","t"} or val is True or val == 1
65
-
66
- def extract_probability_for_positive(preds: pd.DataFrame, positive_label=1) -> Optional[float]:
67
- str_pos = str(positive_label)
68
- if str_pos in preds.columns:
69
- return float(preds.iloc[0][str_pos])
70
- for c in preds.columns:
71
- if str_pos == str(c) or str(c).endswith("_"+str_pos):
72
- try: return float(preds.iloc[0][c])
73
- except: pass
74
- for cname in ("prediction_score","Score"):
75
- if cname in preds.columns:
76
- try: return float(preds.iloc[0][cname])
77
- except: pass
78
- return None
79
-
80
- def get_global_importance_table(model) -> Optional[pd.DataFrame]:
81
- try:
82
- if hasattr(model, "named_steps"):
83
- est = model.named_steps.get("trained_model", list(model.named_steps.values())[-1])
84
- elif hasattr(model, "steps"):
85
- est = model.steps[-1][1]
86
- else:
87
- est = model
88
- except Exception:
89
- est = model
90
- X_cols = getattr(model, "feature_names_in_", None)
91
- if hasattr(est, "feature_importances_"):
92
- vals = np.asarray(est.feature_importances_)
93
- if X_cols is not None and len(vals) == len(X_cols):
94
- df_imp = pd.DataFrame({"feature": list(X_cols), "importance": vals})
95
- else:
96
- df_imp = pd.DataFrame({"feature": [f"f{i}" for i in range(len(vals))], "importance": vals})
97
- return df_imp.sort_values("importance", ascending=False).reset_index(drop=True)
98
- if hasattr(est, "coef_"):
99
- coef = np.array(est.coef_)
100
- if coef.ndim > 1: coef = coef[0]
101
- coef = np.ravel(coef)
102
- if X_cols is not None and len(coef) == len(X_cols):
103
- df_coef = pd.DataFrame({"feature": list(X_cols), "coefficient": coef})
104
- else:
105
- df_coef = pd.DataFrame({"feature": [f"f{i}" for i in range(len(coef))], "coefficient": coef})
106
- return df_coef.reindex(df_coef.iloc[:, -1].abs().sort_values(ascending=False).index).reset_index(drop=True)
107
- return None
108
-
109
- # ---------- Load model ----------
110
- BASE = strip_pkl(MODEL_BASE)
111
- MODEL = load_model(BASE)
112
-
113
- # ---------- Load fixed sample file ----------
114
- def load_sample_dataframe(path: str) -> Tuple[pd.DataFrame, str]:
115
- if not os.path.exists(path):
116
- raise FileNotFoundError(f"Sample file not found: {path}")
117
- if path.lower().endswith((".xlsx",".xls")):
118
- sdf = pd.read_excel(path)
119
- else:
120
- sdf = pd.read_csv(path)
121
-
122
- # Find target col case-insensitively
123
- cols_norm = {normalize(c): c for c in sdf.columns}
124
- target_col = cols_norm.get(normalize(TARGET_NAME))
125
- if target_col is None:
126
- raise ValueError(f"Target column '{TARGET_NAME}' not found in sample file (case-insensitive).")
127
-
128
- # Map to exact FEATURES (case-insensitive)
129
- rename_map, missing = {}, []
130
- for f in FEATURES:
131
- src = cols_norm.get(normalize(f))
132
- if src is None:
133
- missing.append(f)
134
- else:
135
- rename_map[src] = f
136
- if missing:
137
- raise ValueError(f"Missing required feature columns in sample file: {missing}")
138
-
139
- sdf2 = sdf.rename(columns=rename_map)[FEATURES + [target_col]]
140
- return sdf2, target_col
141
-
142
- try:
143
- SAMPLE_DF, SAMPLE_TARGET = load_sample_dataframe(SAMPLE_FILE)
144
- except Exception as e:
145
- # Fall back to empty DF but keep the app alive with a warning in UI
146
- SAMPLE_DF, SAMPLE_TARGET = pd.DataFrame(columns=FEATURES+[TARGET_NAME]), TARGET_NAME
147
- SAMPLE_ERROR = f"⚠️ Could not load sample file: {e}"
148
- else:
149
- SAMPLE_ERROR = ""
150
-
151
- # Build initial dropdown choices
152
- def build_sample_choices(df: pd.DataFrame, tgt: str, flt: str = "All") -> List[str]:
153
- if df.empty: return []
154
- if flt == "All":
155
- idxs = list(range(len(df)))
156
- else:
157
- want = int(flt)
158
- idxs = [i for i in range(len(df)) if str(df.iloc[i][tgt]) == str(want)]
159
- return [f"{i}: y={df.iloc[i][tgt]}" for i in idxs]
160
-
161
- # ---------- Gradio UI ----------
162
- with gr.Blocks(theme=gr.themes.Soft(), css="""
163
- * { font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI; }
164
- .gradio-container { max-width: 1040px !important; margin: 0 auto; }
165
- .card { border: 1px solid #e5e7eb; border-radius: 16px; padding: 16px; background: white; box-shadow: 0 1px 8px rgba(0,0,0,0.04); }
166
- h1.title { font-size: 28px; font-weight: 800; margin: 10px 0 2px; }
167
- .badge { display:inline-block; padding: 2px 10px; border-radius: 999px; background:#eef2ff; color:#3730a3; font-size: 12px; font-weight:700; }
168
- .small { font-size: 12px; color:#6b7280; }
169
- hr.sep { border: none; border-top: 1px solid #e5e7eb; margin: 8px 0 14px; }
170
- """) as demo:
171
-
172
- gr.Markdown(
173
- "<h1 class='title'>Insulin Classifier — Manual + Fixed Samples</h1>"
174
- "<div class='badge'>PyCaret pipeline · Auto-preprocessing · Thresholdable</div>"
175
- )
176
- if SAMPLE_ERROR:
177
- gr.Markdown(f"<div class='card small'>{SAMPLE_ERROR}</div>")
178
-
179
- with gr.Row():
180
- # -------- Left: Manual inputs + Sample picker --------
181
- with gr.Column(scale=1):
182
- gr.Markdown("### 1) Manual input")
183
- age_in = gr.Number(label="age (years)", value=None, precision=2)
184
- bmi_in = gr.Number(label="BMI", value=None, precision=3)
185
- prev_ab = gr.Number(label="Previos_Obsteric_History_AB (count)", value=None, precision=0)
186
-
187
- gr.Markdown("<hr class='sep'/>")
188
- gr.Markdown("#### Clinical flags")
189
- checkbox_map: Dict[str, gr.Checkbox] = {}
190
- for feat in BOOL_FEATURES:
191
- checkbox_map[feat] = gr.Checkbox(label=feat, value=False)
192
-
193
- gr.Markdown("<hr class='sep'/>")
194
- thr = gr.Slider(0.05, 0.95, value=0.50, step=0.01, label="Decision threshold for class '1'")
195
- run_btn = gr.Button("🚀 Predict (manual)", variant="primary")
196
-
197
- # -------- Sample picker (fixed file) --------
198
- gr.Markdown("<hr class='sep'/>")
199
- gr.Markdown("### 2) Sample picker (from fixed file)")
200
- grp_dd = gr.Dropdown(label="Filter by target", choices=["All","0","1"], value="All")
201
- choices0 = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, "All")
202
- sample_dd= gr.Dropdown(label="Choose sample row", choices=choices0, value=(choices0[0] if choices0 else None))
203
- pred_btn = gr.Button("🎯 Predict & compare (sample)", variant="primary")
204
-
205
- # -------- Right: Results --------
206
- with gr.Column(scale=1):
207
- gr.Markdown("### 3) Results")
208
- pred_label = gr.Textbox(label="Predicted label (with threshold decision)", interactive=False)
209
- with gr.Row():
210
- prob_out = gr.Number(label="P(class==1)", interactive=False, precision=6)
211
- decision = gr.Textbox(label="Decision @ threshold", interactive=False)
212
- with gr.Row():
213
- gt_out = gr.Textbox(label="Ground truth (sample)", interactive=False)
214
- match_out= gr.Textbox(label="Correct vs. ground truth?", interactive=False)
215
- with gr.Accordion("Echoed input (row sent to model)", open=False):
216
- echoed = gr.Dataframe(wrap=True)
217
-
218
- GI = get_global_importance_table(MODEL)
219
- if GI is not None and not GI.empty:
220
- with gr.Accordion("Global feature importance / coefficients", open=False):
221
- gr.Dataframe(value=GI, interactive=False, wrap=True)
222
- else:
223
- gr.Markdown("<div class='card small'>No native importances/coefficients available for this estimator.</div>")
224
-
225
- # -------- Manual predict --------
226
- def do_predict_manual(age, bmi, prev_ab_cnt, threshold, *flag_values):
227
- row = {c: None for c in FEATURES}
228
- row["age"] = coerce_numeric(age)
229
- row["BMI"] = coerce_numeric(bmi)
230
- row["Previos_Obsteric_History_AB"] = coerce_numeric(prev_ab_cnt)
231
- for feat, val in zip(BOOL_FEATURES, flag_values):
232
- row[feat] = 1.0 if bool(val) else 0.0
233
-
234
- df_row = pd.DataFrame([row], columns=FEATURES)
235
- preds = predict_model(MODEL, data=df_row.copy())
236
- label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
237
- label = preds.iloc[0][label_col] if label_col else None
238
- p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
239
- if p is not None:
240
- dec = 1 if float(p) >= float(threshold) else 0
241
- pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
242
- return pretty, float(p), str(dec), "", "", df_row
243
- else:
244
- return str(label), float("nan"), str(label), "", "", df_row
245
-
246
- run_btn.click(
247
- do_predict_manual,
248
- inputs=[age_in, bmi_in, prev_ab, thr] + [checkbox_map[f] for f in BOOL_FEATURES],
249
- outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
250
- )
251
-
252
- # -------- Update sample choices on filter change --------
253
- def update_choices(group_value):
254
- ch = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, group_value)
255
- return gr.Dropdown(choices=ch, value=(ch[0] if ch else None))
256
-
257
- grp_dd.change(update_choices, inputs=[grp_dd], outputs=[sample_dd])
258
-
259
- # -------- Predict & compare for selected sample --------
260
- def predict_sample(sample_choice, threshold):
261
- if SAMPLE_DF.empty or sample_choice is None or str(sample_choice).strip() == "":
262
- raise gr.Error("Sample file is empty or no row selected. Check SAMPLE_FILE path.")
263
-
264
- idx = int(str(sample_choice).split(":")[0])
265
- srow = SAMPLE_DF.iloc[idx]
266
-
267
- row = {c: None for c in FEATURES}
268
- row["age"] = coerce_numeric(srow["age"])
269
- row["BMI"] = coerce_numeric(srow["BMI"])
270
- row["Previos_Obsteric_History_AB"] = coerce_numeric(srow["Previos_Obsteric_History_AB"])
271
- for feat in BOOL_FEATURES:
272
- row[feat] = 1.0 if truthy(srow[feat]) else 0.0
273
-
274
- df_row = pd.DataFrame([row], columns=FEATURES)
275
- preds = predict_model(MODEL, data=df_row.copy())
276
- label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
277
- label = preds.iloc[0][label_col] if label_col else None
278
- p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
279
-
280
- # Decision & compare
281
- if p is not None:
282
- dec = 1 if float(p) >= float(threshold) else 0
283
- pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
284
- else:
285
- dec, pretty = label, str(label)
286
-
287
- gt = srow[SAMPLE_TARGET]
288
- match = "✅ Correct" if gt == label else "❌ Incorrect"
289
-
290
- return pretty, (float(p) if p is not None else float("nan")), str(dec), str(gt), match, df_row
291
-
292
- pred_btn.click(
293
- predict_sample,
294
- inputs=[sample_dd, thr],
295
- outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
296
- )
297
-
298
- # ---------- Launch ----------
299
- if __name__ == "__main__":
300
  demo.launch()
 
1
+
2
+ HOST, PORT, SHARE = "0.0.0.0", 7860, True
3
+
4
+ # ---------- Env hygiene ----------
5
+ import os
6
+ os.environ["NO_PROXY"] = "127.0.0.1,localhost,::1"
7
+ os.environ["no_proxy"] = "127.0.0.1,localhost,::1"
8
+ for _k in ("HTTP_PROXY","http_proxy","HTTPS_PROXY","https_proxy"):
9
+ os.environ.pop(_k, None)
10
+ os.environ.setdefault("GRADIO_OPEN_BROWSER", "false")
11
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
12
+
13
+ # ---------- Imports ----------
14
+ from typing import Any, Dict, Optional, Tuple, List
15
+ import re
16
+ import numpy as np
17
+ import pandas as pd
18
+ import gradio as gr
19
+ from pycaret.classification import load_model, predict_model
20
+ from huggingface_hub import hf_hub_download
21
+ REPO = os.getenv("MODEL_REPO", "GDMProjects/my-private-model")
22
+ FNAME = os.getenv("MODEL_FILE", "best_insulin_model.pkl")
23
+ TOKEN = os.getenv("HF_TOKEN")
24
+
25
+
26
+ SAMPLE_FILE = "Insulin.xlsx"
27
+ TARGET_NAME = "insulin"
28
+ POS_CLASS = 1
29
+ FEATURES = [
30
+ "age",
31
+ "BMI",
32
+ "history_of_htn",
33
+ "history_infectious_endocrine_metabolic_disease",
34
+ "history_infectious_digestive_disease",
35
+ "history_infectious_cardiovascular_diseae",
36
+ "family_history_dm",
37
+ "family_history_htn",
38
+ "Current_history_obsteric",
39
+ "Previos_Obsteric_History_AB",
40
+ "infertility",
41
+ ]
42
+ NUMERIC_INPUTS = {"age", "BMI", "Previos_Obsteric_History_AB"}
43
+ BOOL_FEATURES = [f for f in FEATURES if f not in NUMERIC_INPUTS] # 8 flags
44
+
45
+ # ---------- Utilities ----------
46
+ def strip_pkl(x: str) -> str:
47
+ return x[:-4] if x.lower().endswith(".pkl") else x
48
+
49
+ def normalize(s: str) -> str:
50
+ return re.sub(r"[^a-z0-9]+", "", str(s).lower())
51
+
52
+ def coerce_numeric(val: Any) -> Optional[float]:
53
+ if val in ("", None) or (isinstance(val, float) and np.isnan(val)): return None
54
+ try: return float(val)
55
+ except: return None
56
+
57
+ def truthy(val: Any) -> bool:
58
+ if pd.isna(val): return False
59
+ s = str(val).strip().lower()
60
+ return s in {"1","true","yes","y","t"} or val is True or val == 1
61
+
62
+ def extract_probability_for_positive(preds: pd.DataFrame, positive_label=1) -> Optional[float]:
63
+ str_pos = str(positive_label)
64
+ if str_pos in preds.columns:
65
+ return float(preds.iloc[0][str_pos])
66
+ for c in preds.columns:
67
+ if str_pos == str(c) or str(c).endswith("_"+str_pos):
68
+ try: return float(preds.iloc[0][c])
69
+ except: pass
70
+ for cname in ("prediction_score","Score"):
71
+ if cname in preds.columns:
72
+ try: return float(preds.iloc[0][cname])
73
+ except: pass
74
+ return None
75
+
76
+ def get_global_importance_table(model) -> Optional[pd.DataFrame]:
77
+ try:
78
+ if hasattr(model, "named_steps"):
79
+ est = model.named_steps.get("trained_model", list(model.named_steps.values())[-1])
80
+ elif hasattr(model, "steps"):
81
+ est = model.steps[-1][1]
82
+ else:
83
+ est = model
84
+ except Exception:
85
+ est = model
86
+ X_cols = getattr(model, "feature_names_in_", None)
87
+ if hasattr(est, "feature_importances_"):
88
+ vals = np.asarray(est.feature_importances_)
89
+ if X_cols is not None and len(vals) == len(X_cols):
90
+ df_imp = pd.DataFrame({"feature": list(X_cols), "importance": vals})
91
+ else:
92
+ df_imp = pd.DataFrame({"feature": [f"f{i}" for i in range(len(vals))], "importance": vals})
93
+ return df_imp.sort_values("importance", ascending=False).reset_index(drop=True)
94
+ if hasattr(est, "coef_"):
95
+ coef = np.array(est.coef_)
96
+ if coef.ndim > 1: coef = coef[0]
97
+ coef = np.ravel(coef)
98
+ if X_cols is not None and len(coef) == len(X_cols):
99
+ df_coef = pd.DataFrame({"feature": list(X_cols), "coefficient": coef})
100
+ else:
101
+ df_coef = pd.DataFrame({"feature": [f"f{i}" for i in range(len(coef))], "coefficient": coef})
102
+ return df_coef.reindex(df_coef.iloc[:, -1].abs().sort_values(ascending=False).index).reset_index(drop=True)
103
+ return None
104
+
105
+ # ---------- Load model ----------
106
+ local_path = hf_hub_download(repo_id=REPO, filename=FNAME, token=TOKEN)
107
+ MODEL = load_model(str(Path(local_path).with_suffix("")))
108
+
109
+ # ---------- Load fixed sample file ----------
110
+ def load_sample_dataframe(path: str) -> Tuple[pd.DataFrame, str]:
111
+ if not os.path.exists(path):
112
+ raise FileNotFoundError(f"Sample file not found: {path}")
113
+ if path.lower().endswith((".xlsx",".xls")):
114
+ sdf = pd.read_excel(path)
115
+ else:
116
+ sdf = pd.read_csv(path)
117
+
118
+ # Find target col case-insensitively
119
+ cols_norm = {normalize(c): c for c in sdf.columns}
120
+ target_col = cols_norm.get(normalize(TARGET_NAME))
121
+ if target_col is None:
122
+ raise ValueError(f"Target column '{TARGET_NAME}' not found in sample file (case-insensitive).")
123
+
124
+ # Map to exact FEATURES (case-insensitive)
125
+ rename_map, missing = {}, []
126
+ for f in FEATURES:
127
+ src = cols_norm.get(normalize(f))
128
+ if src is None:
129
+ missing.append(f)
130
+ else:
131
+ rename_map[src] = f
132
+ if missing:
133
+ raise ValueError(f"Missing required feature columns in sample file: {missing}")
134
+
135
+ sdf2 = sdf.rename(columns=rename_map)[FEATURES + [target_col]]
136
+ return sdf2, target_col
137
+
138
+ try:
139
+ SAMPLE_DF, SAMPLE_TARGET = load_sample_dataframe(SAMPLE_FILE)
140
+ except Exception as e:
141
+ # Fall back to empty DF but keep the app alive with a warning in UI
142
+ SAMPLE_DF, SAMPLE_TARGET = pd.DataFrame(columns=FEATURES+[TARGET_NAME]), TARGET_NAME
143
+ SAMPLE_ERROR = f"⚠️ Could not load sample file: {e}"
144
+ else:
145
+ SAMPLE_ERROR = ""
146
+
147
+ # Build initial dropdown choices
148
+ def build_sample_choices(df: pd.DataFrame, tgt: str, flt: str = "All") -> List[str]:
149
+ if df.empty: return []
150
+ if flt == "All":
151
+ idxs = list(range(len(df)))
152
+ else:
153
+ want = int(flt)
154
+ idxs = [i for i in range(len(df)) if str(df.iloc[i][tgt]) == str(want)]
155
+ return [f"{i}: y={df.iloc[i][tgt]}" for i in idxs]
156
+
157
+ # ---------- Gradio UI ----------
158
+ with gr.Blocks(theme=gr.themes.Soft(), css="""
159
+ * { font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI; }
160
+ .gradio-container { max-width: 1040px !important; margin: 0 auto; }
161
+ .card { border: 1px solid #e5e7eb; border-radius: 16px; padding: 16px; background: white; box-shadow: 0 1px 8px rgba(0,0,0,0.04); }
162
+ h1.title { font-size: 28px; font-weight: 800; margin: 10px 0 2px; }
163
+ .badge { display:inline-block; padding: 2px 10px; border-radius: 999px; background:#eef2ff; color:#3730a3; font-size: 12px; font-weight:700; }
164
+ .small { font-size: 12px; color:#6b7280; }
165
+ hr.sep { border: none; border-top: 1px solid #e5e7eb; margin: 8px 0 14px; }
166
+ """) as demo:
167
+
168
+ gr.Markdown(
169
+ "<h1 class='title'>Insulin Classifier Manual + Fixed Samples</h1>"
170
+ "<div class='badge'>PyCaret pipeline · Auto-preprocessing · Thresholdable</div>"
171
+ )
172
+ if SAMPLE_ERROR:
173
+ gr.Markdown(f"<div class='card small'>{SAMPLE_ERROR}</div>")
174
+
175
+ with gr.Row():
176
+ # -------- Left: Manual inputs + Sample picker --------
177
+ with gr.Column(scale=1):
178
+ gr.Markdown("### 1) Manual input")
179
+ age_in = gr.Number(label="age (years)", value=None, precision=2)
180
+ bmi_in = gr.Number(label="BMI", value=None, precision=3)
181
+ prev_ab = gr.Number(label="Previos_Obsteric_History_AB (count)", value=None, precision=0)
182
+
183
+ gr.Markdown("<hr class='sep'/>")
184
+ gr.Markdown("#### Clinical flags")
185
+ checkbox_map: Dict[str, gr.Checkbox] = {}
186
+ for feat in BOOL_FEATURES:
187
+ checkbox_map[feat] = gr.Checkbox(label=feat, value=False)
188
+
189
+ gr.Markdown("<hr class='sep'/>")
190
+ thr = gr.Slider(0.05, 0.95, value=0.50, step=0.01, label="Decision threshold for class '1'")
191
+ run_btn = gr.Button("🚀 Predict (manual)", variant="primary")
192
+
193
+ # -------- Sample picker (fixed file) --------
194
+ gr.Markdown("<hr class='sep'/>")
195
+ gr.Markdown("### 2) Sample picker (from fixed file)")
196
+ grp_dd = gr.Dropdown(label="Filter by target", choices=["All","0","1"], value="All")
197
+ choices0 = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, "All")
198
+ sample_dd= gr.Dropdown(label="Choose sample row", choices=choices0, value=(choices0[0] if choices0 else None))
199
+ pred_btn = gr.Button("🎯 Predict & compare (sample)", variant="primary")
200
+
201
+ # -------- Right: Results --------
202
+ with gr.Column(scale=1):
203
+ gr.Markdown("### 3) Results")
204
+ pred_label = gr.Textbox(label="Predicted label (with threshold decision)", interactive=False)
205
+ with gr.Row():
206
+ prob_out = gr.Number(label="P(class==1)", interactive=False, precision=6)
207
+ decision = gr.Textbox(label="Decision @ threshold", interactive=False)
208
+ with gr.Row():
209
+ gt_out = gr.Textbox(label="Ground truth (sample)", interactive=False)
210
+ match_out= gr.Textbox(label="Correct vs. ground truth?", interactive=False)
211
+ with gr.Accordion("Echoed input (row sent to model)", open=False):
212
+ echoed = gr.Dataframe(wrap=True)
213
+
214
+ GI = get_global_importance_table(MODEL)
215
+ if GI is not None and not GI.empty:
216
+ with gr.Accordion("Global feature importance / coefficients", open=False):
217
+ gr.Dataframe(value=GI, interactive=False, wrap=True)
218
+ else:
219
+ gr.Markdown("<div class='card small'>No native importances/coefficients available for this estimator.</div>")
220
+
221
+ # -------- Manual predict --------
222
+ def do_predict_manual(age, bmi, prev_ab_cnt, threshold, *flag_values):
223
+ row = {c: None for c in FEATURES}
224
+ row["age"] = coerce_numeric(age)
225
+ row["BMI"] = coerce_numeric(bmi)
226
+ row["Previos_Obsteric_History_AB"] = coerce_numeric(prev_ab_cnt)
227
+ for feat, val in zip(BOOL_FEATURES, flag_values):
228
+ row[feat] = 1.0 if bool(val) else 0.0
229
+
230
+ df_row = pd.DataFrame([row], columns=FEATURES)
231
+ preds = predict_model(MODEL, data=df_row.copy())
232
+ label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
233
+ label = preds.iloc[0][label_col] if label_col else None
234
+ p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
235
+ if p is not None:
236
+ dec = 1 if float(p) >= float(threshold) else 0
237
+ pretty = f"{label} (threshold {threshold:.2f} decision={dec})"
238
+ return pretty, float(p), str(dec), "", "", df_row
239
+ else:
240
+ return str(label), float("nan"), str(label), "", "", df_row
241
+
242
+ run_btn.click(
243
+ do_predict_manual,
244
+ inputs=[age_in, bmi_in, prev_ab, thr] + [checkbox_map[f] for f in BOOL_FEATURES],
245
+ outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
246
+ )
247
+
248
+ # -------- Update sample choices on filter change --------
249
+ def update_choices(group_value):
250
+ ch = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, group_value)
251
+ return gr.Dropdown(choices=ch, value=(ch[0] if ch else None))
252
+
253
+ grp_dd.change(update_choices, inputs=[grp_dd], outputs=[sample_dd])
254
+
255
+ # -------- Predict & compare for selected sample --------
256
+ def predict_sample(sample_choice, threshold):
257
+ if SAMPLE_DF.empty or sample_choice is None or str(sample_choice).strip() == "":
258
+ raise gr.Error("Sample file is empty or no row selected. Check SAMPLE_FILE path.")
259
+
260
+ idx = int(str(sample_choice).split(":")[0])
261
+ srow = SAMPLE_DF.iloc[idx]
262
+
263
+ row = {c: None for c in FEATURES}
264
+ row["age"] = coerce_numeric(srow["age"])
265
+ row["BMI"] = coerce_numeric(srow["BMI"])
266
+ row["Previos_Obsteric_History_AB"] = coerce_numeric(srow["Previos_Obsteric_History_AB"])
267
+ for feat in BOOL_FEATURES:
268
+ row[feat] = 1.0 if truthy(srow[feat]) else 0.0
269
+
270
+ df_row = pd.DataFrame([row], columns=FEATURES)
271
+ preds = predict_model(MODEL, data=df_row.copy())
272
+ label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
273
+ label = preds.iloc[0][label_col] if label_col else None
274
+ p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
275
+
276
+ # Decision & compare
277
+ if p is not None:
278
+ dec = 1 if float(p) >= float(threshold) else 0
279
+ pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
280
+ else:
281
+ dec, pretty = label, str(label)
282
+
283
+ gt = srow[SAMPLE_TARGET]
284
+ match = "✅ Correct" if gt == label else "❌ Incorrect"
285
+
286
+ return pretty, (float(p) if p is not None else float("nan")), str(dec), str(gt), match, df_row
287
+
288
+ pred_btn.click(
289
+ predict_sample,
290
+ inputs=[sample_dd, thr],
291
+ outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
292
+ )
293
+
294
+ # ---------- Launch ----------
295
+ if __name__ == "__main__":
 
 
 
 
296
  demo.launch()