Spaces:

GDMProjects
/

GTT

Sleeping

App Files Files Community

GDMProjects commited on Sep 1, 2025

Commit

3425736

verified ·

1 Parent(s): 91e7bac

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -12

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
-# app.py
-# pip install "pycaret>=3.3,<4" gradio pandas shap matplotlib
-# --- FORCE NON-INTERACTIVE MATPLOTLIB BACKEND (must be first!) ---
 import os
-os.environ["MPLBACKEND"] = "Agg"   # prevents Tk backend init
 import matplotlib
 matplotlib.use("Agg", force=True)
@@ -19,7 +16,7 @@ from pycaret.classification import load_model
 from huggingface_hub import hf_hub_download
 # --- config ---
 MODEL_BASENAME = "subset_best_model"
-SAMPLES_CSV    = "GTT.csv"   # fixed hidden file
 TARGET_COL     = "gtt"
 POS_LABEL      = 1
@@ -27,7 +24,6 @@ REPO = os.getenv("MODEL_REPO", "GDMProjects/my-private-model")
 FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
 TOKEN = os.getenv("HF_TOKEN")
-# subset features used by the model (normalized names)
 SUBSET_FEATURES = [
     "age",
     "bmi",
@@ -65,7 +61,7 @@ def load_samples():
         missing = needed - set(df.columns)
         print(f"[WARN] samples file missing columns: {sorted(missing)}")
         return None
-    df = df.reset_index(drop=False).rename(columns={"index": "_rid"})  # stable row id for dropdown
     return df
 def pretty_json(d):
@@ -133,11 +129,9 @@ samples_df = load_samples()
 # ---------- SHAP: background + explainer (built once) ----------
 def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
     if df_samples is None:
-        # if no CSV, make a tiny synthetic background of zeros
         bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
     else:
         bg = df_samples[SUBSET_FEATURES].copy()
-    # numeric coercion + median impute
     for c in SUBSET_FEATURES:
         if c not in bg.columns:
             bg[c] = np.nan
@@ -155,7 +149,7 @@ def _f_proba_pos(X_np: np.ndarray) -> np.ndarray:
     X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
     return model.predict_proba(X_df)[:, POS_IDX]
-# SHAP Explainer (KernelExplainer via unified interface)
 try:
     EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
 except Exception as e:
@@ -167,7 +161,7 @@ def _plot_local_shap(row_dict: dict):
     if EXPLAINER is None:
         return None
     X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
-    exp = EXPLAINER(X.values)  # exp.values shape: (1, n_features)
     vals = exp.values[0]
     order = np.argsort(np.abs(vals))
     fig, ax = plt.subplots(figsize=(7, 4.5))
@@ -276,7 +270,6 @@ def compare_correctness(gt_text, decision_label):
     return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
 def get_feature_importance_text():
-    # Keep textual fallback if SHAP not available
     est = None
     try:
         est = getattr(model, "named_steps", {}).get("trained_model", None)

 import os
+os.environ["MPLBACKEND"] = "Agg"
 import matplotlib
 matplotlib.use("Agg", force=True)
 from huggingface_hub import hf_hub_download
 # --- config ---
 MODEL_BASENAME = "subset_best_model"
+SAMPLES_CSV    = "GTT.csv"
 TARGET_COL     = "gtt"
 POS_LABEL      = 1
 FNAME = os.getenv("MODEL_FILE", "subset_best_model.pkl")
 TOKEN = os.getenv("HF_TOKEN")
 SUBSET_FEATURES = [
     "age",
     "bmi",
         missing = needed - set(df.columns)
         print(f"[WARN] samples file missing columns: {sorted(missing)}")
         return None
+    df = df.reset_index(drop=False).rename(columns={"index": "_rid"})
     return df
 def pretty_json(d):
 # ---------- SHAP: background + explainer (built once) ----------
 def _prepare_background(df_samples: pd.DataFrame | None, max_rows: int = 200) -> pd.DataFrame:
     if df_samples is None:
         bg = pd.DataFrame([{k: 0.0 for k in SUBSET_FEATURES} for _ in range(50)])
     else:
         bg = df_samples[SUBSET_FEATURES].copy()
     for c in SUBSET_FEATURES:
         if c not in bg.columns:
             bg[c] = np.nan
     X_df = pd.DataFrame(X_np, columns=SUBSET_FEATURES)
     return model.predict_proba(X_df)[:, POS_IDX]
+# SHAP Explainer
 try:
     EXPLAINER = shap.Explainer(_f_proba_pos, BACKGROUND.values)
 except Exception as e:
     if EXPLAINER is None:
         return None
     X = pd.DataFrame([row_dict], columns=SUBSET_FEATURES)
+    exp = EXPLAINER(X.values)
     vals = exp.values[0]
     order = np.argsort(np.abs(vals))
     fig, ax = plt.subplots(figsize=(7, 4.5))
     return "✅ Correct" if gt == int(decision_label) else "❌ Incorrect"
 def get_feature_importance_text():
     est = None
     try:
         est = getattr(model, "named_steps", {}).get("trained_model", None)