Spaces:

Synav
/

Explainable-Acute-Leukemia-Mortality-Predictor

Running

App Files Files Community

Synav commited on Jan 23

Commit

aa01912

verified ·

1 Parent(s): cd77cd4

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -0

app.py CHANGED Viewed

@@ -100,6 +100,31 @@ def get_feature_cols_from_df(df: pd.DataFrame):
                          f"Ensure your Excel header row contains a column named '{LABEL_COL}'.")
     return [c for c in df.columns if c != LABEL_COL]
 # ============================================================
 # Model pipeline
@@ -1110,6 +1135,136 @@ with tab_predict:
     pipe = st.session_state.pipe
     infer_file = st.file_uploader("Upload inference Excel (.xlsx)", type=["xlsx"])
     if infer_file:
         df_inf = pd.read_excel(infer_file, engine="openpyxl")
@@ -1117,6 +1272,7 @@ with tab_predict:
         if not meta:
             st.error("Model metadata not loaded. Please load a model version.")
             st.stop()
         feature_cols = meta["schema"]["features"]
         num_cols = meta["schema"]["numeric"]

                          f"Ensure your Excel header row contains a column named '{LABEL_COL}'.")
     return [c for c in df.columns if c != LABEL_COL]
+from datetime import date
+def to_date_safe(x):
+    """Accepts date/datetime/str; returns python date or None."""
+    if x is None or (isinstance(x, float) and np.isnan(x)):
+        return None
+    if isinstance(x, datetime):
+        return x.date()
+    if isinstance(x, date):
+        return x
+    # string
+    try:
+        return pd.to_datetime(x).date()
+    except Exception:
+        return None
+def age_years_at(dob, ref_date):
+    """Age in years at ref_date. Returns float or np.nan."""
+    dob = to_date_safe(dob)
+    ref_date = to_date_safe(ref_date)
+    if dob is None or ref_date is None:
+        return np.nan
+    if ref_date < dob:
+        return np.nan
+    return (ref_date - dob).days / 365.25
 # ============================================================
 # Model pipeline
     pipe = st.session_state.pipe
+    # =========================
+    # After model is loaded
+    # =========================
+    pipe = st.session_state.pipe
+    meta = st.session_state.meta
+    feature_cols = meta["schema"]["features"]
+    num_cols = meta["schema"]["numeric"]
+    cat_cols = meta["schema"]["categorical"]
+    st.divider()
+    st.subheader("Single patient entry (DOB → Age, Dx date → prediction)")
+    AGE_FEATURE = "Age (years)"
+    DX_DATE_FEATURE = "Date of 1st Bone Marrow biopsy (Date of Diagnosis) "  # note trailing space
+    # safer date inputs (Streamlit versions vary with None defaults)
+    c1, c2, c3 = st.columns(3)
+    with c1:
+        dob_tmp = st.date_input("Date of birth (DOB)", value=date.today(), key="sp_dob")
+        use_dob = st.checkbox("Use DOB", value=False, key="sp_use_dob")
+        dob = dob_tmp if use_dob else None
+    with c2:
+        dx_tmp = st.date_input("Date of Diagnosis / 1st Bone Marrow biopsy", value=date.today(), key="sp_dx_date")
+        use_dx = st.checkbox("Use Dx date", value=False, key="sp_use_dx")
+        dx_date = dx_tmp if use_dx else None
+    with c3:
+        ecog = st.selectbox("ECOG", options=[0, 1, 2, 3, 4], index=0, key="sp_ecog")
+    from typing import Optional
+    def age_years_at(dob: Optional[date], ref_date: Optional[date]) -> float:
+        if dob is None or ref_date is None:
+            return np.nan
+        if ref_date < dob:
+            return np.nan
+        return (ref_date - dob).days / 365.25
+    derived_age = age_years_at(dob, dx_date)
+    with st.expander("Enter remaining model features", expanded=False):
+        with st.form("single_patient_form"):
+            values_by_index = [np.nan] * len(feature_cols)
+            for i, f in enumerate(feature_cols):
+                if f == AGE_FEATURE:
+                    st.number_input(
+                        f"{f} (auto)",
+                        value=None if np.isnan(derived_age) else float(derived_age),
+                        format="%.2f",
+                        key=f"sp_{i}_age_display",
+                    )
+                    values_by_index[i] = derived_age
+                    continue
+                if f.strip() == DX_DATE_FEATURE.strip():
+                    # if model expects numeric, store numeric timestamp
+                    if f in num_cols and dx_date is not None:
+                        values_by_index[i] = pd.Timestamp(dx_date).toordinal()
+                    else:
+                        values_by_index[i] = np.nan if dx_date is None else str(dx_date)
+                    continue
+                if f.strip() == "ECOG":
+                    values_by_index[i] = ecog
+                    continue
+                if f in num_cols:
+                    v = st.number_input(f, value=None, format="%.6f", key=f"sp_{i}_num")
+                    values_by_index[i] = v
+                elif f in cat_cols:
+                    v = st.text_input(f, value="", key=f"sp_{i}_cat")
+                    values_by_index[i] = np.nan if v.strip() == "" else v
+                else:
+                    v = st.text_input(f, value="", key=f"sp_{i}_other")
+                    values_by_index[i] = np.nan if v.strip() == "" else v
+            submitted = st.form_submit_button("Predict single patient")
+        # IMPORTANT: everything below must be inside "if submitted:"
+        if submitted:
+            X_one = pd.DataFrame([values_by_index], columns=feature_cols).replace({pd.NA: np.nan})
+            for c in num_cols:
+                if c in X_one.columns:
+                    X_one[c] = pd.to_numeric(X_one[c], errors="coerce")
+            for c in cat_cols:
+                if c in X_one.columns:
+                    X_one[c] = X_one[c].astype("object")
+                    X_one.loc[X_one[c].isna(), c] = np.nan
+                    X_one[c] = X_one[c].map(lambda v: v if pd.isna(v) else str(v))
+            proba_one = float(pipe.predict_proba(X_one)[:, 1][0])
+            st.success("Prediction generated.")
+            st.metric("Predicted probability", f"{proba_one:.4f}")
+            thr_single = st.slider("Classification threshold", 0.0, 1.0, 0.5, 0.01, key="sp_thr")
+            pred_class = int(proba_one >= thr_single)
+            low_cut_s, high_cut_s = st.slider(
+                "Risk band cutoffs (low, high)", 0.0, 1.0, (0.2, 0.8), 0.01, key="sp_risk_cuts"
+            )
+            def band_one(p):
+                if p < low_cut_s:
+                    return "Low"
+                if p >= high_cut_s:
+                    return "High"
+                return "Intermediate"
+            out = X_one.copy()
+            out["predicted_probability"] = proba_one
+            out["predicted_class"] = pred_class
+            out["risk_band"] = band_one(proba_one)
+            st.dataframe(out, use_container_width=True)
+            st.download_button(
+                "Download single patient result (CSV)",
+                out.to_csv(index=False).encode("utf-8"),
+                file_name="single_patient_prediction.csv",
+                mime="text/csv",
+                key="dl_sp_csv",
+            )
     infer_file = st.file_uploader("Upload inference Excel (.xlsx)", type=["xlsx"])
     if infer_file:
         df_inf = pd.read_excel(infer_file, engine="openpyxl")
         if not meta:
             st.error("Model metadata not loaded. Please load a model version.")
             st.stop()
         feature_cols = meta["schema"]["features"]
         num_cols = meta["schema"]["numeric"]