Spaces:
Running
Running
| import pandas as pd | |
| import streamlit as st | |
| import shap | |
| from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, brier_score_loss, log_loss | |
| import streamlit.components.v1 as components | |
| def compute_metrics(y_true, y_pred_proba, threshold=0.5): | |
| y_pred = (y_pred_proba >= threshold).astype(int) | |
| return { | |
| "AUC": roc_auc_score(y_true, y_pred_proba), | |
| "F1": f1_score(y_true, y_pred), | |
| "Accuracy": accuracy_score(y_true, y_pred), | |
| "Precision": precision_score(y_true, y_pred), | |
| "Recall": recall_score(y_true, y_pred), | |
| "BrierScore": brier_score_loss(y_true, y_pred_proba), | |
| "Logloss": log_loss(y_true, y_pred_proba), | |
| } | |
| def add_predictions(df, probs): | |
| df['Predicted Probability'] = probs | |
| pred_col = f"{st.session_state.target_col} Prediction" | |
| df[pred_col] = ['POSITIVE' if p > 0.5 else 'NEGATIVE' for p in probs] | |
| df_with_gt = df[['Predicted Probability', pred_col]].join(st.session_state.targets_df) | |
| # Define cell-level styling | |
| def highlight_prediction(val): | |
| if val == "POSITIVE": | |
| return "background-color: #d4edda; color: #155724; text-align: center;" | |
| elif val == "NEGATIVE": | |
| return "background-color: #f8d7da; color: #721c24; text-align: center;" | |
| return "text-align: center;" | |
| # Apply color and alignment | |
| df_styled = ( | |
| df_with_gt.style | |
| .applymap(highlight_prediction, subset=[pred_col]) | |
| .set_properties(**{'text-align': 'center'}) # Apply center alignment to all cells | |
| ) | |
| return df_styled | |
| def st_shap(plot, height=None): | |
| shap_html = f"<head>{shap.getjs()}</head><body>{plot.html()}</body>" | |
| components.html(shap_html, height=height) | |
| def ensemble_shap(models, X, model_weights=None): | |
| """ | |
| Compute ensemble SHAP values for a list of tree-based models. | |
| Returns a shap.Explanation with mean SHAP values across models. | |
| """ | |
| import numpy as np | |
| import shap | |
| all_values = [] | |
| all_base_values = [] | |
| for model in models: | |
| explainer = shap.TreeExplainer(model) | |
| shap_values = explainer(X) | |
| # Handle binary classification | |
| if shap_values.values.ndim == 3: | |
| # safer class selection | |
| class_index = getattr(model, "classes_", [0, 1]).index(1) | |
| shap_values = shap.Explanation( | |
| values=shap_values.values[:, :, class_index], | |
| base_values=shap_values.base_values[:, class_index], | |
| data=X, | |
| feature_names=X.columns | |
| ) | |
| all_values.append(shap_values.values) | |
| all_base_values.append(shap_values.base_values) | |
| # Handle weights | |
| if model_weights is None: | |
| model_weights = np.ones(len(models)) | |
| model_weights = np.array(model_weights) / np.sum(model_weights) | |
| mean_values = np.average(all_values, axis=0, weights=model_weights) | |
| mean_base = np.average(all_base_values, axis=0, weights=model_weights) | |
| return shap.Explanation( | |
| values=mean_values, | |
| base_values=mean_base, | |
| data=X, | |
| feature_names=X.columns | |
| ) | |