Spaces:

Synav
/

Explainable-Acute-Leukemia-Mortality-Predictor

Running

App Files Files Community

Synav commited on Jan 21

Commit

2b2b5ee

verified ·

1 Parent(s): 500b8f5

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -2

app.py CHANGED Viewed

@@ -761,11 +761,133 @@ with tab_predict:
         proba = pipe.predict_proba(X_inf)[:, 1]
         df_out = df_inf.copy()
         df_out["predicted_probability"] = proba
-        st.dataframe(df_out.head())
         st.download_button(
             "Download predictions",
             df_out.to_csv(index=False).encode(),
@@ -773,6 +895,7 @@ with tab_predict:
             "text/csv"
         )
         st.subheader("SHAP explanation")
         with st.form("shap_form"):

         proba = pipe.predict_proba(X_inf)[:, 1]
+        st.divider()
+        st.subheader("External validation (if AA label is present)")
+        if LABEL_COL in df_inf.columns:
+            try:
+                y_ext_raw = df_inf[LABEL_COL].copy()
+                y_ext01, _ = coerce_binary_label(y_ext_raw)
+                # Core metrics
+                roc_auc_ext = float(roc_auc_score(y_ext01, proba))
+                fpr, tpr, roc_thresholds = roc_curve(y_ext01, proba)
+                # Threshold metrics (user-controlled)
+                thr_ext = st.slider("External validation threshold", 0.0, 1.0, 0.5, 0.01, key="thr_ext")
+                cls_ext = compute_classification_metrics(y_ext01, proba, threshold=float(thr_ext))
+                pr_ext = compute_pr_curve(y_ext01, proba)
+                cal_ext = compute_calibration(
+                    y_ext01, proba,
+                    n_bins=int(n_bins) if "n_bins" in locals() else 10,
+                    strategy=str(cal_strategy) if "cal_strategy" in locals() else "uniform"
+                )
+                dca_ext = decision_curve_analysis(y_ext01, proba)
+                # Display headline metrics
+                c1, c2, c3, c4 = st.columns(4)
+                c1.metric("ROC AUC (external)", f"{roc_auc_ext:.3f}")
+                c2.metric("Sensitivity", f"{cls_ext['sensitivity']:.3f}")
+                c3.metric("Specificity", f"{cls_ext['specificity']:.3f}")
+                c4.metric("F1", f"{cls_ext['f1']:.3f}")
+                # Confusion matrix
+                cm_df = pd.DataFrame(
+                    [[cls_ext["tn"], cls_ext["fp"]], [cls_ext["fn"], cls_ext["tp"]]],
+                    index=["Actual 0", "Actual 1"],
+                    columns=["Pred 0", "Pred 1"],
+                )
+                st.markdown("**Confusion Matrix (external)**")
+                st.dataframe(cm_df)
+                # ROC plot
+                fig = plt.figure()
+                plt.plot(fpr, tpr)
+                plt.plot([0, 1], [0, 1])
+                plt.xlabel("False Positive Rate (1 - Specificity)")
+                plt.ylabel("True Positive Rate (Sensitivity)")
+                plt.title(f"External ROC Curve (AUC = {roc_auc_ext:.3f})")
+                st.pyplot(fig, clear_figure=True)
+                # PR plot
+                st.subheader("Precision–Recall (external)")
+                c1, c2 = st.columns(2)
+                c1.metric("Average Precision (AP)", f"{pr_ext['average_precision']:.3f}")
+                fig_pr = plt.figure()
+                plt.plot(pr_ext["recall"], pr_ext["precision"])
+                plt.xlabel("Recall")
+                plt.ylabel("Precision")
+                plt.title(f"External PR Curve (AP = {pr_ext['average_precision']:.3f})")
+                st.pyplot(fig_pr, clear_figure=True)
+                # Calibration plot
+                st.subheader("Calibration (external)")
+                c1, c2 = st.columns(2)
+                c1.metric("Brier score", f"{cal_ext['brier']:.4f}")
+                c2.write(f"Bins: {cal_ext['n_bins']} | Strategy: {cal_ext['strategy']}")
+                fig_cal = plt.figure()
+                plt.plot(cal_ext["prob_pred"], cal_ext["prob_true"])
+                plt.plot([0, 1], [0, 1])
+                plt.xlabel("Mean predicted probability")
+                plt.ylabel("Observed event rate")
+                plt.title("External Calibration curve")
+                st.pyplot(fig_cal, clear_figure=True)
+                # DCA plot
+                st.subheader("Decision Curve Analysis (external)")
+                fig_dca = plt.figure()
+                plt.plot(dca_ext["thresholds"], dca_ext["net_benefit_model"])
+                plt.plot(dca_ext["thresholds"], dca_ext["net_benefit_all"])
+                plt.plot(dca_ext["thresholds"], dca_ext["net_benefit_none"])
+                plt.xlabel("Threshold probability")
+                plt.ylabel("Net benefit")
+                plt.title("External Decision curve analysis")
+                st.pyplot(fig_dca, clear_figure=True)
+            except Exception as e:
+                st.error(f"Could not compute external validation metrics: {e}")
+        else:
+            st.info("No AA column found in the inference Excel, so external validation metrics cannot be computed.")
+        # Predict probabilities
+        proba = pipe.predict_proba(X_inf)[:, 1]
         df_out = df_inf.copy()
         df_out["predicted_probability"] = proba
+        # --- classification + risk bands ---
+        st.divider()
+        st.subheader("Risk stratification")
+        thr = st.slider(
+            "Decision threshold for classification",
+            0.0, 1.0, 0.5, 0.01,
+            key="pred_thr"
+        )
+        df_out["predicted_class"] = (df_out["predicted_probability"] >= thr).astype(int)
+        low_cut, high_cut = st.slider(
+            "Risk band cutoffs (low, high)",
+            0.0, 1.0, (0.2, 0.8), 0.01,
+            key="risk_cuts"
+        )
+        def band(p):
+            if p < low_cut:
+                return "Low"
+            if p >= high_cut:
+                return "High"
+            return "Intermediate"
+        df_out["risk_band"] = df_out["predicted_probability"].map(band)
+        # --- END ADD ---
+        st.dataframe(df_out.head())
         st.download_button(
             "Download predictions",
             df_out.to_csv(index=False).encode(),
             "text/csv"
         )
         st.subheader("SHAP explanation")
         with st.form("shap_form"):