Spaces:

GVHD-UAE
/

GVHD_Prediction

Sleeping

App Files Files Community

mfarnas commited on Oct 27, 2025

Commit

4da4fcb

1 Parent(s): 80ed7f2

move st_shap to inference_utils

Browse files

Files changed (2) hide show

src/inference_utils.py +50 -0
src/pages/3_Preprocessing_and_Training.py +9 -12

src/inference_utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import pandas as pd
 import streamlit as st
 from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, brier_score_loss, log_loss
 def compute_metrics(y_true, y_pred_proba, threshold=0.5):
     y_pred = (y_pred_proba >= threshold).astype(int)
@@ -37,3 +39,51 @@ def add_predictions(df, probs):
     )
     return df_styled

 import pandas as pd
 import streamlit as st
+import shap
 from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, brier_score_loss, log_loss
+import streamlit.components.v1 as components
 def compute_metrics(y_true, y_pred_proba, threshold=0.5):
     y_pred = (y_pred_proba >= threshold).astype(int)
     )
     return df_styled
+def st_shap(plot, height=None):
+    shap_html = f"<head>{shap.getjs()}</head><body>{plot.html()}</body>"
+    components.html(shap_html, height=height)
+def ensemble_shap(models, X, model_weights=None):
+    """
+    Compute ensemble SHAP values for a list of tree-based models.
+    Returns a shap.Explanation with mean SHAP values across models.
+    """
+    import numpy as np
+    import shap
+    all_values = []
+    all_base_values = []
+    for model in models:
+        explainer = shap.TreeExplainer(model)
+        shap_values = explainer(X)
+        # Handle binary classification
+        if shap_values.values.ndim == 3:
+            # safer class selection
+            class_index = getattr(model, "classes_", [0, 1]).index(1)
+            shap_values = shap.Explanation(
+                values=shap_values.values[:, :, class_index],
+                base_values=shap_values.base_values[:, class_index],
+                data=X,
+                feature_names=X.columns
+            )
+        all_values.append(shap_values.values)
+        all_base_values.append(shap_values.base_values)
+    # Handle weights
+    if model_weights is None:
+        model_weights = np.ones(len(models))
+    model_weights = np.array(model_weights) / np.sum(model_weights)
+    mean_values = np.average(all_values, axis=0, weights=model_weights)
+    mean_base = np.average(all_base_values, axis=0, weights=model_weights)
+    return shap.Explanation(
+        values=mean_values,
+        base_values=mean_base,
+        data=X,
+        feature_names=X.columns
+    )

src/pages/3_Preprocessing_and_Training.py CHANGED Viewed

@@ -1,26 +1,23 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from catboost import CatBoostClassifier, cv, Pool
 from model_utils import get_model, save_model, save_model_ensemble, ensemble_predict
 from preprocess_utils import load_train_features
 from preprocess_utils import preprocess_pipeline as preprocess
-from inference_utils import compute_metrics
 from sidebar import sidebar
-from sklearn.model_selection import StratifiedKFold
-import os
-from pathlib import Path
-import pyarrow.parquet as pq
 import shap
 import lime
 import lime.lime_tabular
-# Add this helper function at the top of the file
-import streamlit.components.v1 as components
-def st_shap(plot, height=None):
-    shap_html = f"<head>{shap.getjs()}</head><body>{plot.html()}</body>"
-    components.html(shap_html, height=height)
 LOCAL = False
@@ -255,7 +252,7 @@ if "trained_model" in st.session_state or "trained_models" in st.session_state:
                     shap_values_selected.values[sample_idx, :],
                     X_force.iloc[sample_idx, :]
                 ),
-                height=250
             )
             # ---- Display feature + SHAP values for selected single-sample ----

+import os
+from pathlib import Path
+import pyarrow.parquet as pq
 import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from catboost import CatBoostClassifier, cv, Pool
+from sklearn.model_selection import StratifiedKFold
 from model_utils import get_model, save_model, save_model_ensemble, ensemble_predict
 from preprocess_utils import load_train_features
 from preprocess_utils import preprocess_pipeline as preprocess
+from inference_utils import compute_metrics, st_shap
 from sidebar import sidebar
 import shap
 import lime
 import lime.lime_tabular
 LOCAL = False
                     shap_values_selected.values[sample_idx, :],
                     X_force.iloc[sample_idx, :]
                 ),
+                height=200
             )
             # ---- Display feature + SHAP values for selected single-sample ----