Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,6 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
# pip install "pycaret>=3.3,<4" gradio pandas shap matplotlib
|
| 3 |
|
| 4 |
-
# --- FORCE NON-INTERACTIVE MATPLOTLIB BACKEND (must be first!) ---
|
| 5 |
import os
|
| 6 |
-
os.environ["MPLBACKEND"] = "Agg"
|
| 7 |
import matplotlib
|
| 8 |
matplotlib.use("Agg", force=True)
|
| 9 |
|
|
@@ -19,7 +16,7 @@ from pycaret.classification import load_model
|
|
| 19 |
from huggingface_hub import hf_hub_download
|
| 20 |
# --- config ---
|
| 21 |
MODEL_BASENAME = "subset_best_model"
|
| 22 |
-
SAMPLES_CSV = "GTT.csv"
|
| 23 |
TARGET_COL = "gtt"
|
| 24 |
POS_LABEL = 1
|
| 25 |
|
|
@@ -27,7 +24,6 @@ REPO = os.getenv("MODEL_REPO", "GDMProjects/my-private-model")
|
|
| 27 |
FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
|
| 28 |
TOKEN = os.getenv("HF_TOKEN")
|
| 29 |
|
| 30 |
-
# subset features used by the model (normalized names)
|
| 31 |
SUBSET_FEATURES = [
|
| 32 |
"age",
|
| 33 |
"bmi",
|
|
@@ -65,7 +61,7 @@ def load_samples():
|
|
| 65 |
missing = needed - set(df.columns)
|
| 66 |
print(f"[WARN] samples file missing columns: {sorted(missing)}")
|
| 67 |
return None
|
| 68 |
-
df = df.reset_index(drop=False).rename(columns={"index": "_rid"})
|
| 69 |
return df
|
| 70 |
|
| 71 |
def pretty_json(d):
|
|
@@ -133,11 +129,9 @@ samples_df = load_samples()
|
|
| 133 |
# ---------- SHAP: background + explainer (built once) ----------
|
| 134 |
def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
|
| 135 |
if df_samples is None:
|
| 136 |
-
# if no CSV, make a tiny synthetic background of zeros
|
| 137 |
bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
|
| 138 |
else:
|
| 139 |
bg = df_samples[SUBSET_FEATURES].copy()
|
| 140 |
-
# numeric coercion + median impute
|
| 141 |
for c in SUBSET_FEATURES:
|
| 142 |
if c not in bg.columns:
|
| 143 |
bg[c] = np.nan
|
|
@@ -155,7 +149,7 @@ def _f_proba_pos(X_np: np.ndarray) -> np.ndarray:
|
|
| 155 |
X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
|
| 156 |
return model.predict_proba(X_df)[:, POS_IDX]
|
| 157 |
|
| 158 |
-
# SHAP Explainer
|
| 159 |
try:
|
| 160 |
EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
|
| 161 |
except Exception as e:
|
|
@@ -167,7 +161,7 @@ def _plot_local_shap(row_dict: dict):
|
|
| 167 |
if EXPLAINER is None:
|
| 168 |
return None
|
| 169 |
X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
|
| 170 |
-
exp = EXPLAINER(X.values)
|
| 171 |
vals = exp.values[0]
|
| 172 |
order = np.argsort(np.abs(vals))
|
| 173 |
fig, ax = plt.subplots(figsize=(7, 4.5))
|
|
@@ -276,7 +270,6 @@ def compare_correctness(gt_text, decision_label):
|
|
| 276 |
return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
|
| 277 |
|
| 278 |
def get_feature_importance_text():
|
| 279 |
-
# Keep textual fallback if SHAP not available
|
| 280 |
est = None
|
| 281 |
try:
|
| 282 |
est = getattr(model, "named_steps", {}).get("trained_model", None)
|
|
|
|
|
|
|
|
|
|
| 1 |
|
|
|
|
| 2 |
import os
|
| 3 |
+
os.environ["MPLBACKEND"] = "Agg"
|
| 4 |
import matplotlib
|
| 5 |
matplotlib.use("Agg", force=True)
|
| 6 |
|
|
|
|
| 16 |
from huggingface_hub import hf_hub_download
|
| 17 |
# --- config ---
|
| 18 |
MODEL_BASENAME = "subset_best_model"
|
| 19 |
+
SAMPLES_CSV = "GTT.csv"
|
| 20 |
TARGET_COL = "gtt"
|
| 21 |
POS_LABEL = 1
|
| 22 |
|
|
|
|
| 24 |
FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
|
| 25 |
TOKEN = os.getenv("HF_TOKEN")
|
| 26 |
|
|
|
|
| 27 |
SUBSET_FEATURES = [
|
| 28 |
"age",
|
| 29 |
"bmi",
|
|
|
|
| 61 |
missing = needed - set(df.columns)
|
| 62 |
print(f"[WARN] samples file missing columns: {sorted(missing)}")
|
| 63 |
return None
|
| 64 |
+
df = df.reset_index(drop=False).rename(columns={"index": "_rid"})
|
| 65 |
return df
|
| 66 |
|
| 67 |
def pretty_json(d):
|
|
|
|
| 129 |
# ---------- SHAP: background + explainer (built once) ----------
|
| 130 |
def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
|
| 131 |
if df_samples is None:
|
|
|
|
| 132 |
bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
|
| 133 |
else:
|
| 134 |
bg = df_samples[SUBSET_FEATURES].copy()
|
|
|
|
| 135 |
for c in SUBSET_FEATURES:
|
| 136 |
if c not in bg.columns:
|
| 137 |
bg[c] = np.nan
|
|
|
|
| 149 |
X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
|
| 150 |
return model.predict_proba(X_df)[:, POS_IDX]
|
| 151 |
|
| 152 |
+
# SHAP Explainer
|
| 153 |
try:
|
| 154 |
EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
|
| 155 |
except Exception as e:
|
|
|
|
| 161 |
if EXPLAINER is None:
|
| 162 |
return None
|
| 163 |
X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
|
| 164 |
+
exp = EXPLAINER(X.values)
|
| 165 |
vals = exp.values[0]
|
| 166 |
order = np.argsort(np.abs(vals))
|
| 167 |
fig, ax = plt.subplots(figsize=(7, 4.5))
|
|
|
|
| 270 |
return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
|
| 271 |
|
| 272 |
def get_feature_importance_text():
|
|
|
|
| 273 |
est = None
|
| 274 |
try:
|
| 275 |
est = getattr(model, "named_steps", {}).get("trained_model", None)
|