GDMProjects commited on
Commit
f664075
·
verified ·
1 Parent(s): e9731f0

Upload 2 files

Browse files
Files changed (2) hide show
  1. insulin.py +300 -0
  2. requirements.txt +5 -0
insulin.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Gradio app for PyCaret insulin classifier
3
+ # - Manual inputs (fixed 11 features)
4
+ # - Fixed sample file loaded at startup (Excel/CSV)
5
+ # - User selects a sample from dropdown and "Predict & Compare"
6
+ # - No upload and no "load into form" section
7
+
8
+ # ---------- Fixed config ----------
9
+ MODEL_BASE = "best_insulin_model" # expects ./best_insulin_model.pkl
10
+ SAMPLE_FILE = r"C:\Users\A\Desktop\My_Projects\0-AI\GDM\Insulin.xlsx" # <- EDIT to your path
11
+ TARGET_NAME = "insulin" # case-insensitive in the sample file
12
+ POS_CLASS = 1 # positive class label for thresholding (binary)
13
+ HOST, PORT, SHARE = "0.0.0.0", 7860, True
14
+
15
+ # ---------- Env hygiene ----------
16
+ import os
17
+ os.environ["NO_PROXY"] = "127.0.0.1,localhost,::1"
18
+ os.environ["no_proxy"] = "127.0.0.1,localhost,::1"
19
+ for _k in ("HTTP_PROXY","http_proxy","HTTPS_PROXY","https_proxy"):
20
+ os.environ.pop(_k, None)
21
+ os.environ.setdefault("GRADIO_OPEN_BROWSER", "false")
22
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
23
+
24
+ # ---------- Imports ----------
25
+ from typing import Any, Dict, Optional, Tuple, List
26
+ import re
27
+ import numpy as np
28
+ import pandas as pd
29
+ import gradio as gr
30
+ from pycaret.classification import load_model, predict_model
31
+
32
+ # ---------- Feature space (exactly as trained) ----------
33
+ FEATURES = [
34
+ "age",
35
+ "BMI",
36
+ "history_of_htn",
37
+ "history_infectious_endocrine_metabolic_disease",
38
+ "history_infectious_digestive_disease",
39
+ "history_infectious_cardiovascular_diseae",
40
+ "family_history_dm",
41
+ "family_history_htn",
42
+ "Current_history_obsteric",
43
+ "Previos_Obsteric_History_AB",
44
+ "infertility",
45
+ ]
46
+ NUMERIC_INPUTS = {"age", "BMI", "Previos_Obsteric_History_AB"}
47
+ BOOL_FEATURES = [f for f in FEATURES if f not in NUMERIC_INPUTS] # 8 flags
48
+
49
+ # ---------- Utilities ----------
50
+ def strip_pkl(x: str) -> str:
51
+ return x[:-4] if x.lower().endswith(".pkl") else x
52
+
53
+ def normalize(s: str) -> str:
54
+ return re.sub(r"[^a-z0-9]+", "", str(s).lower())
55
+
56
+ def coerce_numeric(val: Any) -> Optional[float]:
57
+ if val in ("", None) or (isinstance(val, float) and np.isnan(val)): return None
58
+ try: return float(val)
59
+ except: return None
60
+
61
+ def truthy(val: Any) -> bool:
62
+ if pd.isna(val): return False
63
+ s = str(val).strip().lower()
64
+ return s in {"1","true","yes","y","t"} or val is True or val == 1
65
+
66
+ def extract_probability_for_positive(preds: pd.DataFrame, positive_label=1) -> Optional[float]:
67
+ str_pos = str(positive_label)
68
+ if str_pos in preds.columns:
69
+ return float(preds.iloc[0][str_pos])
70
+ for c in preds.columns:
71
+ if str_pos == str(c) or str(c).endswith("_"+str_pos):
72
+ try: return float(preds.iloc[0][c])
73
+ except: pass
74
+ for cname in ("prediction_score","Score"):
75
+ if cname in preds.columns:
76
+ try: return float(preds.iloc[0][cname])
77
+ except: pass
78
+ return None
79
+
80
+ def get_global_importance_table(model) -> Optional[pd.DataFrame]:
81
+ try:
82
+ if hasattr(model, "named_steps"):
83
+ est = model.named_steps.get("trained_model", list(model.named_steps.values())[-1])
84
+ elif hasattr(model, "steps"):
85
+ est = model.steps[-1][1]
86
+ else:
87
+ est = model
88
+ except Exception:
89
+ est = model
90
+ X_cols = getattr(model, "feature_names_in_", None)
91
+ if hasattr(est, "feature_importances_"):
92
+ vals = np.asarray(est.feature_importances_)
93
+ if X_cols is not None and len(vals) == len(X_cols):
94
+ df_imp = pd.DataFrame({"feature": list(X_cols), "importance": vals})
95
+ else:
96
+ df_imp = pd.DataFrame({"feature": [f"f{i}" for i in range(len(vals))], "importance": vals})
97
+ return df_imp.sort_values("importance", ascending=False).reset_index(drop=True)
98
+ if hasattr(est, "coef_"):
99
+ coef = np.array(est.coef_)
100
+ if coef.ndim > 1: coef = coef[0]
101
+ coef = np.ravel(coef)
102
+ if X_cols is not None and len(coef) == len(X_cols):
103
+ df_coef = pd.DataFrame({"feature": list(X_cols), "coefficient": coef})
104
+ else:
105
+ df_coef = pd.DataFrame({"feature": [f"f{i}" for i in range(len(coef))], "coefficient": coef})
106
+ return df_coef.reindex(df_coef.iloc[:, -1].abs().sort_values(ascending=False).index).reset_index(drop=True)
107
+ return None
108
+
109
+ # ---------- Load model ----------
110
+ BASE = strip_pkl(MODEL_BASE)
111
+ MODEL = load_model(BASE)
112
+
113
+ # ---------- Load fixed sample file ----------
114
+ def load_sample_dataframe(path: str) -> Tuple[pd.DataFrame, str]:
115
+ if not os.path.exists(path):
116
+ raise FileNotFoundError(f"Sample file not found: {path}")
117
+ if path.lower().endswith((".xlsx",".xls")):
118
+ sdf = pd.read_excel(path)
119
+ else:
120
+ sdf = pd.read_csv(path)
121
+
122
+ # Find target col case-insensitively
123
+ cols_norm = {normalize(c): c for c in sdf.columns}
124
+ target_col = cols_norm.get(normalize(TARGET_NAME))
125
+ if target_col is None:
126
+ raise ValueError(f"Target column '{TARGET_NAME}' not found in sample file (case-insensitive).")
127
+
128
+ # Map to exact FEATURES (case-insensitive)
129
+ rename_map, missing = {}, []
130
+ for f in FEATURES:
131
+ src = cols_norm.get(normalize(f))
132
+ if src is None:
133
+ missing.append(f)
134
+ else:
135
+ rename_map[src] = f
136
+ if missing:
137
+ raise ValueError(f"Missing required feature columns in sample file: {missing}")
138
+
139
+ sdf2 = sdf.rename(columns=rename_map)[FEATURES + [target_col]]
140
+ return sdf2, target_col
141
+
142
+ try:
143
+ SAMPLE_DF, SAMPLE_TARGET = load_sample_dataframe(SAMPLE_FILE)
144
+ except Exception as e:
145
+ # Fall back to empty DF but keep the app alive with a warning in UI
146
+ SAMPLE_DF, SAMPLE_TARGET = pd.DataFrame(columns=FEATURES+[TARGET_NAME]), TARGET_NAME
147
+ SAMPLE_ERROR = f"⚠️ Could not load sample file: {e}"
148
+ else:
149
+ SAMPLE_ERROR = ""
150
+
151
+ # Build initial dropdown choices
152
+ def build_sample_choices(df: pd.DataFrame, tgt: str, flt: str = "All") -> List[str]:
153
+ if df.empty: return []
154
+ if flt == "All":
155
+ idxs = list(range(len(df)))
156
+ else:
157
+ want = int(flt)
158
+ idxs = [i for i in range(len(df)) if str(df.iloc[i][tgt]) == str(want)]
159
+ return [f"{i}: y={df.iloc[i][tgt]}" for i in idxs]
160
+
161
+ # ---------- Gradio UI ----------
162
+ with gr.Blocks(theme=gr.themes.Soft(), css="""
163
+ * { font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI; }
164
+ .gradio-container { max-width: 1040px !important; margin: 0 auto; }
165
+ .card { border: 1px solid #e5e7eb; border-radius: 16px; padding: 16px; background: white; box-shadow: 0 1px 8px rgba(0,0,0,0.04); }
166
+ h1.title { font-size: 28px; font-weight: 800; margin: 10px 0 2px; }
167
+ .badge { display:inline-block; padding: 2px 10px; border-radius: 999px; background:#eef2ff; color:#3730a3; font-size: 12px; font-weight:700; }
168
+ .small { font-size: 12px; color:#6b7280; }
169
+ hr.sep { border: none; border-top: 1px solid #e5e7eb; margin: 8px 0 14px; }
170
+ """) as demo:
171
+
172
+ gr.Markdown(
173
+ "<h1 class='title'>Insulin Classifier — Manual + Fixed Samples</h1>"
174
+ "<div class='badge'>PyCaret pipeline · Auto-preprocessing · Thresholdable</div>"
175
+ )
176
+ if SAMPLE_ERROR:
177
+ gr.Markdown(f"<div class='card small'>{SAMPLE_ERROR}</div>")
178
+
179
+ with gr.Row():
180
+ # -------- Left: Manual inputs + Sample picker --------
181
+ with gr.Column(scale=1):
182
+ gr.Markdown("### 1) Manual input")
183
+ age_in = gr.Number(label="age (years)", value=None, precision=2)
184
+ bmi_in = gr.Number(label="BMI", value=None, precision=3)
185
+ prev_ab = gr.Number(label="Previos_Obsteric_History_AB (count)", value=None, precision=0)
186
+
187
+ gr.Markdown("<hr class='sep'/>")
188
+ gr.Markdown("#### Clinical flags")
189
+ checkbox_map: Dict[str, gr.Checkbox] = {}
190
+ for feat in BOOL_FEATURES:
191
+ checkbox_map[feat] = gr.Checkbox(label=feat, value=False)
192
+
193
+ gr.Markdown("<hr class='sep'/>")
194
+ thr = gr.Slider(0.05, 0.95, value=0.50, step=0.01, label="Decision threshold for class '1'")
195
+ run_btn = gr.Button("🚀 Predict (manual)", variant="primary")
196
+
197
+ # -------- Sample picker (fixed file) --------
198
+ gr.Markdown("<hr class='sep'/>")
199
+ gr.Markdown("### 2) Sample picker (from fixed file)")
200
+ grp_dd = gr.Dropdown(label="Filter by target", choices=["All","0","1"], value="All")
201
+ choices0 = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, "All")
202
+ sample_dd= gr.Dropdown(label="Choose sample row", choices=choices0, value=(choices0[0] if choices0 else None))
203
+ pred_btn = gr.Button("🎯 Predict & compare (sample)", variant="primary")
204
+
205
+ # -------- Right: Results --------
206
+ with gr.Column(scale=1):
207
+ gr.Markdown("### 3) Results")
208
+ pred_label = gr.Textbox(label="Predicted label (with threshold decision)", interactive=False)
209
+ with gr.Row():
210
+ prob_out = gr.Number(label="P(class==1)", interactive=False, precision=6)
211
+ decision = gr.Textbox(label="Decision @ threshold", interactive=False)
212
+ with gr.Row():
213
+ gt_out = gr.Textbox(label="Ground truth (sample)", interactive=False)
214
+ match_out= gr.Textbox(label="Correct vs. ground truth?", interactive=False)
215
+ with gr.Accordion("Echoed input (row sent to model)", open=False):
216
+ echoed = gr.Dataframe(wrap=True)
217
+
218
+ GI = get_global_importance_table(MODEL)
219
+ if GI is not None and not GI.empty:
220
+ with gr.Accordion("Global feature importance / coefficients", open=False):
221
+ gr.Dataframe(value=GI, interactive=False, wrap=True)
222
+ else:
223
+ gr.Markdown("<div class='card small'>No native importances/coefficients available for this estimator.</div>")
224
+
225
+ # -------- Manual predict --------
226
+ def do_predict_manual(age, bmi, prev_ab_cnt, threshold, *flag_values):
227
+ row = {c: None for c in FEATURES}
228
+ row["age"] = coerce_numeric(age)
229
+ row["BMI"] = coerce_numeric(bmi)
230
+ row["Previos_Obsteric_History_AB"] = coerce_numeric(prev_ab_cnt)
231
+ for feat, val in zip(BOOL_FEATURES, flag_values):
232
+ row[feat] = 1.0 if bool(val) else 0.0
233
+
234
+ df_row = pd.DataFrame([row], columns=FEATURES)
235
+ preds = predict_model(MODEL, data=df_row.copy())
236
+ label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
237
+ label = preds.iloc[0][label_col] if label_col else None
238
+ p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
239
+ if p is not None:
240
+ dec = 1 if float(p) >= float(threshold) else 0
241
+ pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
242
+ return pretty, float(p), str(dec), "", "", df_row
243
+ else:
244
+ return str(label), float("nan"), str(label), "", "", df_row
245
+
246
+ run_btn.click(
247
+ do_predict_manual,
248
+ inputs=[age_in, bmi_in, prev_ab, thr] + [checkbox_map[f] for f in BOOL_FEATURES],
249
+ outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
250
+ )
251
+
252
+ # -------- Update sample choices on filter change --------
253
+ def update_choices(group_value):
254
+ ch = build_sample_choices(SAMPLE_DF, SAMPLE_TARGET, group_value)
255
+ return gr.Dropdown(choices=ch, value=(ch[0] if ch else None))
256
+
257
+ grp_dd.change(update_choices, inputs=[grp_dd], outputs=[sample_dd])
258
+
259
+ # -------- Predict & compare for selected sample --------
260
+ def predict_sample(sample_choice, threshold):
261
+ if SAMPLE_DF.empty or sample_choice is None or str(sample_choice).strip() == "":
262
+ raise gr.Error("Sample file is empty or no row selected. Check SAMPLE_FILE path.")
263
+
264
+ idx = int(str(sample_choice).split(":")[0])
265
+ srow = SAMPLE_DF.iloc[idx]
266
+
267
+ row = {c: None for c in FEATURES}
268
+ row["age"] = coerce_numeric(srow["age"])
269
+ row["BMI"] = coerce_numeric(srow["BMI"])
270
+ row["Previos_Obsteric_History_AB"] = coerce_numeric(srow["Previos_Obsteric_History_AB"])
271
+ for feat in BOOL_FEATURES:
272
+ row[feat] = 1.0 if truthy(srow[feat]) else 0.0
273
+
274
+ df_row = pd.DataFrame([row], columns=FEATURES)
275
+ preds = predict_model(MODEL, data=df_row.copy())
276
+ label_col = next((c for c in preds.columns if c.lower() in ("prediction_label","label")), None)
277
+ label = preds.iloc[0][label_col] if label_col else None
278
+ p = extract_probability_for_positive(preds, positive_label=POS_CLASS)
279
+
280
+ # Decision & compare
281
+ if p is not None:
282
+ dec = 1 if float(p) >= float(threshold) else 0
283
+ pretty = f"{label} (threshold {threshold:.2f} ⇒ decision={dec})"
284
+ else:
285
+ dec, pretty = label, str(label)
286
+
287
+ gt = srow[SAMPLE_TARGET]
288
+ match = "✅ Correct" if gt == label else "❌ Incorrect"
289
+
290
+ return pretty, (float(p) if p is not None else float("nan")), str(dec), str(gt), match, df_row
291
+
292
+ pred_btn.click(
293
+ predict_sample,
294
+ inputs=[sample_dd, thr],
295
+ outputs=[pred_label, prob_out, decision, gt_out, match_out, echoed],
296
+ )
297
+
298
+ # ---------- Launch ----------
299
+ if __name__ == "__main__":
300
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pycaret>=3.3,<4
2
+ gradio
3
+ pandas
4
+ shap
5
+ matplotlib