GDMProjects commited on
Commit
3425736
·
verified ·
1 Parent(s): 91e7bac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -12
app.py CHANGED
@@ -1,9 +1,6 @@
1
- # app.py
2
- # pip install "pycaret>=3.3,<4" gradio pandas shap matplotlib
3
 
4
- # --- FORCE NON-INTERACTIVE MATPLOTLIB BACKEND (must be first!) ---
5
  import os
6
- os.environ["MPLBACKEND"] = "Agg" # prevents Tk backend init
7
  import matplotlib
8
  matplotlib.use("Agg", force=True)
9
 
@@ -19,7 +16,7 @@ from pycaret.classification import load_model
19
  from huggingface_hub import hf_hub_download
20
  # --- config ---
21
  MODEL_BASENAME = "subset_best_model"
22
- SAMPLES_CSV = "GTT.csv" # fixed hidden file
23
  TARGET_COL = "gtt"
24
  POS_LABEL = 1
25
 
@@ -27,7 +24,6 @@ REPO = os.getenv("MODEL_REPO", "GDMProjects/my-private-model")
27
  FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
28
  TOKEN = os.getenv("HF_TOKEN")
29
 
30
- # subset features used by the model (normalized names)
31
  SUBSET_FEATURES = [
32
  "age",
33
  "bmi",
@@ -65,7 +61,7 @@ def load_samples():
65
  missing = needed - set(df.columns)
66
  print(f"[WARN] samples file missing columns: {sorted(missing)}")
67
  return None
68
- df = df.reset_index(drop=False).rename(columns={"index": "_rid"}) # stable row id for dropdown
69
  return df
70
 
71
  def pretty_json(d):
@@ -133,11 +129,9 @@ samples_df = load_samples()
133
  # ---------- SHAP: background + explainer (built once) ----------
134
  def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
135
  if df_samples is None:
136
- # if no CSV, make a tiny synthetic background of zeros
137
  bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
138
  else:
139
  bg = df_samples[SUBSET_FEATURES].copy()
140
- # numeric coercion + median impute
141
  for c in SUBSET_FEATURES:
142
  if c not in bg.columns:
143
  bg[c] = np.nan
@@ -155,7 +149,7 @@ def _f_proba_pos(X_np: np.ndarray) -> np.ndarray:
155
  X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
156
  return model.predict_proba(X_df)[:, POS_IDX]
157
 
158
- # SHAP Explainer (KernelExplainer via unified interface)
159
  try:
160
  EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
161
  except Exception as e:
@@ -167,7 +161,7 @@ def _plot_local_shap(row_dict: dict):
167
  if EXPLAINER is None:
168
  return None
169
  X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
170
- exp = EXPLAINER(X.values) # exp.values shape: (1, n_features)
171
  vals = exp.values[0]
172
  order = np.argsort(np.abs(vals))
173
  fig, ax = plt.subplots(figsize=(7, 4.5))
@@ -276,7 +270,6 @@ def compare_correctness(gt_text, decision_label):
276
  return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
277
 
278
  def get_feature_importance_text():
279
- # Keep textual fallback if SHAP not available
280
  est = None
281
  try:
282
  est = getattr(model, "named_steps", {}).get("trained_model", None)
 
 
 
1
 
 
2
  import os
3
+ os.environ["MPLBACKEND"] = "Agg"
4
  import matplotlib
5
  matplotlib.use("Agg", force=True)
6
 
 
16
  from huggingface_hub import hf_hub_download
17
  # --- config ---
18
  MODEL_BASENAME = "subset_best_model"
19
+ SAMPLES_CSV = "GTT.csv"
20
  TARGET_COL = "gtt"
21
  POS_LABEL = 1
22
 
 
24
  FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
25
  TOKEN = os.getenv("HF_TOKEN")
26
 
 
27
  SUBSET_FEATURES = [
28
  "age",
29
  "bmi",
 
61
  missing = needed - set(df.columns)
62
  print(f"[WARN] samples file missing columns: {sorted(missing)}")
63
  return None
64
+ df = df.reset_index(drop=False).rename(columns={"index": "_rid"})
65
  return df
66
 
67
  def pretty_json(d):
 
129
  # ---------- SHAP: background + explainer (built once) ----------
130
  def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
131
  if df_samples is None:
 
132
  bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
133
  else:
134
  bg = df_samples[SUBSET_FEATURES].copy()
 
135
  for c in SUBSET_FEATURES:
136
  if c not in bg.columns:
137
  bg[c] = np.nan
 
149
  X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
150
  return model.predict_proba(X_df)[:, POS_IDX]
151
 
152
+ # SHAP Explainer
153
  try:
154
  EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
155
  except Exception as e:
 
161
  if EXPLAINER is None:
162
  return None
163
  X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
164
+ exp = EXPLAINER(X.values)
165
  vals = exp.values[0]
166
  order = np.argsort(np.abs(vals))
167
  fig, ax = plt.subplots(figsize=(7, 4.5))
 
270
  return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
271
 
272
  def get_feature_importance_text():
 
273
  est = None
274
  try:
275
  est = getattr(model, "named_steps", {}).get("trained_model", None)