Spaces:

Synav
/

Explainable-Acute-Leukemia-Mortality-Predictor

Running

App Files Files Community

Synav commited on Jan 19

Commit

bb3bf4d

verified ·

1 Parent(s): ddb0ad4

Update app.py

Browse files

Files changed (1) hide show

app.py +189 -83

app.py CHANGED Viewed

@@ -7,7 +7,8 @@ import joblib
 import shap
 import matplotlib.pyplot as plt
 import os
-from huggingface_hub import HfApi
 from sklearn.pipeline import Pipeline
@@ -231,6 +232,70 @@ def publish_to_hub(model_repo_id: str, version_tag: str):
         "latest_meta_path": "latest/meta.json",
     }
 # ============================================================
 # Streamlit UI
 # ============================================================
@@ -311,89 +376,130 @@ with tab_train:
 # ---------------- PREDICT ----------------
 with tab_predict:
     if st.session_state.pipe is None:
-        st.warning("Train a model first.")
-    else:
-        infer_file = st.file_uploader("Upload inference Excel (.xlsx)", type=["xlsx"])
-        if infer_file:
-            df_inf = pd.read_excel(infer_file, engine="openpyxl")
-            X_inf = df_inf[FEATURE_COLS].copy()
-            X_inf = X_inf.replace({pd.NA: np.nan})
-            for c in CAT_COLS:
-                X_inf[c] = X_inf[c].astype("object")
-                X_inf.loc[X_inf[c].isna(), c] = np.nan
-                X_inf[c] = X_inf[c].map(lambda v: v if pd.isna(v) else str(v))
-            for c in NUM_COLS:
-                X_inf[c] = pd.to_numeric(X_inf[c], errors="coerce")
-            for c in CAT_COLS:
-                X_inf[c] = X_inf[c].astype("object")
-            pipe = st.session_state.pipe
-            proba = pipe.predict_proba(X_inf)[:, 1]
-            df_out = df_inf.copy()
-            df_out["predicted_probability"] = proba
-            st.dataframe(df_out.head())
-            st.download_button(
-                "Download predictions",
-                df_out.to_csv(index=False).encode(),
-                "predictions.csv",
-                "text/csv"
             )
-            st.subheader("SHAP explanation")
-            with st.form("shap_form"):
-                row = st.number_input("Row index", 0, len(X_inf) - 1, 0)
-                explain_btn = st.form_submit_button("Generate SHAP explanation")
-            if explain_btn:
-                X_one = X_inf.iloc[[int(row)]]
-                pre = pipe.named_steps["preprocess"]
-                X_one_t = pre.transform(X_one)
-                # Build explainer if missing
-                if st.session_state.get("explainer") is None:
-                    st.session_state.explainer = build_shap_explainer(pipe, X_inf)
-                explainer = st.session_state.explainer
-                shap_vals = explainer.shap_values(X_one_t)
-                base = explainer.expected_value
-                if isinstance(shap_vals, list):
-                    shap_vals = shap_vals[1]
-                try:
-                    names = list(pre.get_feature_names_out())
-                except Exception:
-                    names = [f"f{i}" for i in range(len(shap_vals[0]))]
-                try:
-                    x_dense = X_one_t.toarray()[0]
-                except Exception:
-                    x_dense = np.array(X_one_t)[0]
-                exp = shap.Explanation(
-                    values=shap_vals[0],
-                    base_values=float(base) if np.isscalar(base) else float(np.array(base).reshape(-1)[0]),
-                    data=x_dense,
-                    feature_names=names,
-                )
-                c1, c2 = st.columns(2)
-                with c1:
-                    st.markdown("**Waterfall**")
-                    fig = plt.figure()
-                    shap.plots.waterfall(exp, show=False, max_display=20)
-                    st.pyplot(fig, clear_figure=True)
-                with c2:
-                    st.markdown("**Top features**")
-                    fig2 = plt.figure()
-                    shap.plots.bar(exp, show=False, max_display=20)
-                    st.pyplot(fig2, clear_figure=True)
-                st.stop()

 import shap
 import matplotlib.pyplot as plt
 import os
+from huggingface_hub import hf_hub_download, HfApi
 from sklearn.pipeline import Pipeline
         "latest_meta_path": "latest/meta.json",
     }
+MODEL_REPO_ID = "Synav/LogiSHAP-Studio-LogReg"
+def list_release_versions(model_repo_id: str):
+    """
+    Returns sorted version tags found under releases/<version>/model.joblib in the model repo.
+    """
+    api = HfApi(token=os.environ.get("HF_TOKEN") or None)
+    files = api.list_repo_files(repo_id=model_repo_id, repo_type="model")
+    versions = set()
+    for f in files:
+        # We only care about releases/<version>/model.joblib
+        if f.startswith("releases/") and f.endswith("/model.joblib"):
+            parts = f.split("/")
+            if len(parts) >= 3:
+                versions.add(parts[1])
+    # Most users want newest first (timestamp tags sort lexicographically)
+    return sorted(versions, reverse=True)
+def load_model_by_version(model_repo_id: str, version_tag: str):
+    """
+    Loads a specific version from releases/<version_tag>/model.joblib and meta.json
+    """
+    model_file = hf_hub_download(
+        repo_id=model_repo_id,
+        repo_type="model",
+        filename=f"releases/{version_tag}/model.joblib",
+    )
+    meta_file = hf_hub_download(
+        repo_id=model_repo_id,
+        repo_type="model",
+        filename=f"releases/{version_tag}/meta.json",
+    )
+    pipe = joblib.load(model_file)
+    with open(meta_file, "r", encoding="utf-8") as f:
+        meta = json.load(f)
+    return pipe, meta
+def load_latest_model(model_repo_id: str):
+    """
+    Loads latest/model.joblib and latest/meta.json
+    """
+    model_file = hf_hub_download(
+        repo_id=model_repo_id,
+        repo_type="model",
+        filename="latest/model.joblib",
+    )
+    meta_file = hf_hub_download(
+        repo_id=model_repo_id,
+        repo_type="model",
+        filename="latest/meta.json",
+    )
+    pipe = joblib.load(model_file)
+    with open(meta_file, "r", encoding="utf-8") as f:
+        meta = json.load(f)
+    return pipe, meta
 # ============================================================
 # Streamlit UI
 # ============================================================
 # ---------------- PREDICT ----------------
 with tab_predict:
+    st.subheader("Select a trained model (no retraining required)")
+    MODEL_REPO_ID = "Synav/LogiSHAP-Studio-LogReg"
+    # Ensure session state keys exist
+    if "pipe" not in st.session_state:
+        st.session_state.pipe = None
+    if "meta" not in st.session_state:
+        st.session_state.meta = None
+    if "explainer" not in st.session_state:
+        st.session_state.explainer = None
+    # List available releases
+    try:
+        versions = list_release_versions(MODEL_REPO_ID)
+    except Exception as e:
+        versions = []
+        st.error(f"Could not list model versions: {e}")
+    choices = ["latest"] + versions if versions else ["latest"]
+    selected = st.selectbox("Choose model version", choices, index=0)
+    if st.button("Load selected model"):
+        try:
+            with st.spinner("Loading model from Hugging Face Hub..."):
+                if selected == "latest":
+                    pipe, meta = load_latest_model(MODEL_REPO_ID)
+                else:
+                    pipe, meta = load_model_by_version(MODEL_REPO_ID, selected)
+            st.session_state.pipe = pipe
+            st.session_state.meta = meta
+            st.session_state.explainer = None  # rebuild later with inference data
+            st.success(f"Loaded model: {selected}")
+        except Exception as e:
+            st.error(f"Load failed: {e}")
+    st.divider()
     if st.session_state.pipe is None:
+        st.warning("Load a model version above, then upload an inference Excel.")
+        st.stop()
+    pipe = st.session_state.pipe
+    infer_file = st.file_uploader("Upload inference Excel (.xlsx)", type=["xlsx"])
+    if infer_file:
+        df_inf = pd.read_excel(infer_file, engine="openpyxl")
+        X_inf = df_inf[FEATURE_COLS].copy()
+        X_inf = X_inf.replace({pd.NA: np.nan})
+        for c in CAT_COLS:
+            X_inf[c] = X_inf[c].astype("object")
+            X_inf.loc[X_inf[c].isna(), c] = np.nan
+            X_inf[c] = X_inf[c].map(lambda v: v if pd.isna(v) else str(v))
+        for c in NUM_COLS:
+            X_inf[c] = pd.to_numeric(X_inf[c], errors="coerce")
+        for c in CAT_COLS:
+            X_inf[c] = X_inf[c].astype("object")
+        pipe = st.session_state.pipe
+        proba = pipe.predict_proba(X_inf)[:, 1]
+        df_out = df_inf.copy()
+        df_out["predicted_probability"] = proba
+        st.dataframe(df_out.head())
+        st.download_button(
+            "Download predictions",
+            df_out.to_csv(index=False).encode(),
+            "predictions.csv",
+            "text/csv"
+        )
+        st.subheader("SHAP explanation")
+        with st.form("shap_form"):
+            row = st.number_input("Row index", 0, len(X_inf) - 1, 0)
+            explain_btn = st.form_submit_button("Generate SHAP explanation")
+        if explain_btn:
+            X_one = X_inf.iloc[[int(row)]]
+            pre = pipe.named_steps["preprocess"]
+            X_one_t = pre.transform(X_one)
+            # Build explainer if missing
+            if st.session_state.get("explainer") is None:
+                st.session_state.explainer = build_shap_explainer(pipe, X_inf)
+            explainer = st.session_state.explainer
+            shap_vals = explainer.shap_values(X_one_t)
+            base = explainer.expected_value
+            if isinstance(shap_vals, list):
+                shap_vals = shap_vals[1]
+            try:
+                names = list(pre.get_feature_names_out())
+            except Exception:
+                names = [f"f{i}" for i in range(len(shap_vals[0]))]
+            try:
+                x_dense = X_one_t.toarray()[0]
+            except Exception:
+                x_dense = np.array(X_one_t)[0]
+            exp = shap.Explanation(
+                values=shap_vals[0],
+                base_values=float(base) if np.isscalar(base) else float(np.array(base).reshape(-1)[0]),
+                data=x_dense,
+                feature_names=names,
             )
+            c1, c2 = st.columns(2)
+            with c1:
+                st.markdown("**Waterfall**")
+                fig = plt.figure()
+                shap.plots.waterfall(exp, show=False, max_display=20)
+                st.pyplot(fig, clear_figure=True)
+            with c2:
+                st.markdown("**Top features**")
+                fig2 = plt.figure()
+                shap.plots.bar(exp, show=False, max_display=20)
+                st.pyplot(fig2, clear_figure=True)
+            st.stop()