Spaces:

Synav
/

Explainable-Acute-Leukemia-Mortality-Predictor

Running

App Files Files Community

Synav commited on Jan 24

Commit

598d303

verified ·

1 Parent(s): 6bbc45c

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -62

app.py CHANGED Viewed

@@ -145,7 +145,7 @@ def build_pipeline(
     cat_pipe = Pipeline([
         ("imputer", SimpleImputer(strategy="most_frequent")),
-        ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=True, drop="first"))
     ])
     preprocessor = ColumnTransformer(
@@ -813,7 +813,7 @@ st.warning(
 with st.expander("Admin controls", expanded=False):
     st.text_input("Admin key", type="password", key="admin_key")
     st.caption("Training and publishing are enabled only after admin authentication.")
 tab_train, tab_predict = st.tabs(["1️⃣ Train", "2️⃣ Predict + SHAP"])
@@ -823,34 +823,40 @@ if "pipe" not in st.session_state:
 if "explainer" not in st.session_state:
     st.session_state.explainer = None
-st.markdown("### Feature reduction options")
-use_feature_selection = st.checkbox(
-    "Drop columns that do not affect prediction (L1 feature selection)",
-    value=True
-)
-l1_C = st.slider(
-    "L1 selection strength (lower = fewer features)",
-    0.01, 10.0, 1.0, 0.01
-) if use_feature_selection else 1.0
-use_dimred = st.checkbox(
-    "Dimensionality reduction (TruncatedSVD) — reduces interpretability",
-    value=False
-)
-svd_components = st.slider(
-    "SVD components (only used if enabled)",
-    5, 300, 50, 5
-) if use_dimred else 50
 # ---------------- TRAIN ----------------
-with tab_train:
     st.subheader("Train model")
     if not is_admin():
@@ -1133,7 +1139,7 @@ with tab_predict:
     st.divider()
     if st.session_state.pipe is None:
         st.warning("Load a model version above, then upload an inference Excel.")
-        st.stop()
     pipe = st.session_state.pipe
@@ -1258,23 +1264,23 @@ with tab_predict:
     # --- header dates ---
     c1, c2 = st.columns(2)
     with c1:
-        dob = st.date_input(
-            "Date of birth (DOB)",
-            value=None,
-            min_value=MIN_DOB,
-            max_value=date.today(),
-            key="sp_dob",
-        )
     with c2:
-        dx_date = st.date_input(
-            "Date of Diagnosis / 1st Bone Marrow biopsy",
-            value=None,
-            min_value=MIN_DOB,
-            max_value=date.today(),
-            key="sp_dx_date",
-        )
-    derived_age = age_years_at(dob, dx_date)  # float or nan
     def yesno_to_01(v: str):
         if v == "Yes":
@@ -1393,14 +1399,7 @@ with tab_predict:
             v = st.text_input(f, value="", key=f"sp_{i}_other")
             values_by_index[i] = np.nan if v.strip() == "" else v
-        with tab_clin:
-            st.caption("Clinical flags: Yes=1, No=0")
-            for i, f in enumerate(feature_cols):
-                if f in YESNO_FIELDS:
-                    v = st.selectbox(f, options=["", "No", "Yes"], index=0, key=f"sp_{i}_yn")
-                    values_by_index[i] = yesno_to_01(v)
         # Apply FISH/NGS selections to row
         fish_set = set(fish_selected)
         ngs_set = set(ngs_selected)
@@ -1529,11 +1528,8 @@ with tab_predict:
                 cls_ext = compute_classification_metrics(y_ext01, proba, threshold=float(thr_ext))
                 pr_ext = compute_pr_curve(y_ext01, proba)
-                cal_ext = compute_calibration(
-                    y_ext01, proba,
-                    n_bins=int(n_bins) if "n_bins" in locals() else 10,
-                    strategy=str(cal_strategy) if "cal_strategy" in locals() else "uniform"
-                )
                 dca_ext = decision_curve_analysis(y_ext01, proba)
                 # Display headline metrics
@@ -1819,16 +1815,17 @@ with tab_predict:
             # BEESWARM SUMMARY (optional)
-            plt.figure(figsize=FIGSIZE, dpi=plot_dpi_screen)
-            shap.summary_plot(
-                shap_vals_batch,
-                features=X_dense,
-                feature_names=names,
-                max_display=max_display,
-                show=False
-            )
-            fig_swarm = plt.gcf()
-            render_plot_with_download(fig_swarm, title="SHAP beeswarm", filename="shap_beeswarm.png", export_dpi=export_dpi)

     cat_pipe = Pipeline([
         ("imputer", SimpleImputer(strategy="most_frequent")),
+        ("onehot", OneHotEncoder(handle_unknown="ignore", sparse=True, drop="first"))
     ])
     preprocessor = ColumnTransformer(
 with st.expander("Admin controls", expanded=False):
     st.text_input("Admin key", type="password", key="admin_key")
     st.caption("Training and publishing are enabled only after admin authentication.")
 tab_train, tab_predict = st.tabs(["1️⃣ Train", "2️⃣ Predict + SHAP"])
 if "explainer" not in st.session_state:
     st.session_state.explainer = None
+with tab_train:
+    st.subheader("Train model")
+    if not is_admin():
+        st.info("Training and publishing are restricted. Use Predict + SHAP for inference.")
+        st.stop()
+    st.markdown("### Feature reduction options")
+    use_feature_selection = st.checkbox(
+        "Drop columns that do not affect prediction (L1 feature selection)",
+        value=True,
+        key="train_use_feature_selection"
+    )
+    l1_C = st.slider(
+        "L1 selection strength (lower = fewer features)",
+        0.01, 10.0, 1.0, 0.01
+    ) if use_feature_selection else 1.0
+    use_dimred = st.checkbox(
+        "Dimensionality reduction (TruncatedSVD) — reduces interpretability",
+        value=False
+    )
+    svd_components = st.slider(
+        "SVD components (only used if enabled)",
+        5, 300, 50, 5
+    ) if use_dimred else 50
+    st.divider()
 # ---------------- TRAIN ----------------
     st.subheader("Train model")
     if not is_admin():
     st.divider()
     if st.session_state.pipe is None:
         st.warning("Load a model version above, then upload an inference Excel.")
     pipe = st.session_state.pipe
     # --- header dates ---
     c1, c2 = st.columns(2)
     with c1:
+        dob_unknown = st.checkbox("DOB unknown", value=False, key="dob_unknown")
+        dob = None
+        if not dob_unknown:
+            dob = st.date_input("Date of birth (DOB)", min_value=MIN_DOB, max_value=date.today(), key="dob")
     with c2:
+        dx_unknown = st.checkbox("Diagnosis date unknown", value=False, key="dx_unknown")
+        dx_date = None
+        if not dx_unknown:
+            dx_date = st.date_input(
+                "Date of Diagnosis / 1st Bone Marrow biopsy",
+                min_value=MIN_DOB, max_value=date.today(),
+                key="dx_date"
+            )
+    derived_age = age_years_at(dob, dx_date)
     def yesno_to_01(v: str):
         if v == "Yes":
             v = st.text_input(f, value="", key=f"sp_{i}_other")
             values_by_index[i] = np.nan if v.strip() == "" else v
         # Apply FISH/NGS selections to row
         fish_set = set(fish_selected)
         ngs_set = set(ngs_selected)
                 cls_ext = compute_classification_metrics(y_ext01, proba, threshold=float(thr_ext))
                 pr_ext = compute_pr_curve(y_ext01, proba)
+                cal_ext = compute_calibration(y_ext01, proba, n_bins=PRED_N_BINS, strategy=PRED_CAL_STRATEGY)
                 dca_ext = decision_curve_analysis(y_ext01, proba)
                 # Display headline metrics
             # BEESWARM SUMMARY (optional)
+            if show_beeswarm:
+                plt.figure(figsize=FIGSIZE, dpi=plot_dpi_screen)
+                shap.summary_plot(
+                    shap_vals_batch,
+                    features=X_dense,
+                    feature_names=names,
+                    max_display=max_display,
+                    show=False
+                )
+                fig_swarm = plt.gcf()
+                render_plot_with_download(fig_swarm, title="SHAP beeswarm", filename="shap_beeswarm.png", export_dpi=export_dpi)