Spaces:
Sleeping
Sleeping
| import os | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| # Optional DiCE | |
| try: | |
| import dice_ml | |
| from dice_ml import Dice | |
| DICE_OK = True | |
| except Exception: | |
| dice_ml = None | |
| Dice = None | |
| DICE_OK = False | |
| # ----------------------------- | |
| # Paths | |
| # ----------------------------- | |
| MODEL_PATH = os.getenv("MODEL_PATH", "xgb_credit_model.pkl") | |
| DATA_PATH = os.getenv("DATA_PATH", "german_credit_data.csv") | |
| # ----------------------------- | |
| # Label column candidates | |
| # ----------------------------- | |
| LABEL_CANDIDATES = ["label", "target", "class", "risk", "credit_risk", "kredit"] | |
| # ----------------------------- | |
| # Value -> Label maps (codes -> readable labels) | |
| # ----------------------------- | |
| VALUE_LABEL_MAPS = { | |
| "laufkont": {1: "no checking account", 2: "less than 0 DM", 3: "0 to 200 DM", 4: "200 DM or more"}, | |
| "moral": { | |
| 0: "delay in paying off in the past", | |
| 1: "critical account/other credits elsewhere", | |
| 2: "no credits taken/all credits paid back duly", | |
| 3: "existing credits paid back duly till now", | |
| 4: "all credits at this bank paid back duly till now", | |
| }, | |
| "verw": { | |
| 0: "others", | |
| 1: "car (new)", | |
| 2: "car (used)", | |
| 3: "furniture/equipment", | |
| 4: "radio/television", | |
| 5: "domestic appliances", | |
| 6: "repairs", | |
| 7: "education", | |
| 8: "vacation", | |
| 9: "retraining", | |
| 10: "business", | |
| }, | |
| "sparkont": { | |
| 1: "unknown/no savings account", | |
| 2: "less than 100 DM", | |
| 3: "100 to 500 DM", | |
| 4: "500 to 1000 DM", | |
| 5: "1000 DM or more", | |
| }, | |
| "beszeit": {1: "unemployed", 2: "less than 1 year", 3: "1 to 4 yrs", 4: "4 to 7 yrs", 5: "7 yrs or more"}, | |
| "rate": {1: "35 or more", 2: "25 to 35", 3: "20 to 25", 4: "less than 20"}, | |
| "famges": {1: "male : divorced/separated", 2: "female : non-single or male : single", 3: "male : married/widowed", 4: "female : single"}, | |
| "buerge": {1: "none", 2: "co-applicant", 3: "guarantor"}, | |
| "wohnzeit": {1: "less than 1 year", 2: "1 to 4 yrs", 3: "4 to 7 yrs", 4: "7 yrs or more"}, | |
| "verm": {1: "unknown/no property", 2: "car or other", 3: "building soc. savings agr./life insurance", 4: "real estate"}, | |
| "weitkred": {1: "bank", 2: "stores", 3: "none"}, | |
| "wohn": {1: "for free", 2: "rent", 3: "own"}, | |
| "beruf": { | |
| 1: "unemployed/unskilled - non-resident", | |
| 2: "unskilled-resident", | |
| 3: "skilled employee/official", | |
| 4: "manager/self-employed/highly qualified employee", | |
| }, | |
| "bishkred": {1: "1", 2: "2-3", 3: "4-5", 4: "6 or more"}, | |
| "pers": {1: "3 or more", 2: "0 to 2"}, | |
| "telef": {1: "no", 2: "yes (under customer name)"}, | |
| "gastarb": {1: "yes", 2: "no"}, | |
| } | |
| # ----------------------------- | |
| # Feature names (DE -> EN) | |
| # ----------------------------- | |
| FEATURE_NAME_MAP = { | |
| "laufkont": "Checking account status", | |
| "laufzeit": "Loan duration (months)", | |
| "moral": "Credit history", | |
| "verw": "Loan purpose", | |
| "hoehe": "Credit amount", | |
| "sparkont": "Savings account status", | |
| "beszeit": "Employment duration", | |
| "rate": "Installment rate", | |
| "famges": "Personal status / sex", | |
| "buerge": "Other debtors / guarantors", | |
| "wohnzeit": "Present residence (years)", | |
| "verm": "Property", | |
| "alter": "Age", | |
| "weitkred": "Other installment plans", | |
| "wohn": "Housing", | |
| "bishkred": "Number of existing credits", | |
| "beruf": "Job", | |
| "pers": "Number of people liable", | |
| "telef": "Telephone", | |
| "gastarb": "Foreign worker", | |
| } | |
| def pretty_name(col: str) -> str: | |
| return FEATURE_NAME_MAP.get(col, col) | |
| def decode_value(col: str, v): | |
| """Show readable value in tables while keeping numeric codes for the model.""" | |
| if col in VALUE_LABEL_MAPS: | |
| try: | |
| return VALUE_LABEL_MAPS[col].get(int(v), str(v)) | |
| except Exception: | |
| return str(v) | |
| return str(v) | |
| # ----------------------------- | |
| # Load model/data | |
| # ----------------------------- | |
| def load_model(): | |
| if not os.path.exists(MODEL_PATH): | |
| raise FileNotFoundError( | |
| f"Model file not found: {MODEL_PATH}. " | |
| "Upload xgb_credit_model.pkl into the Space root folder." | |
| ) | |
| return joblib.load(MODEL_PATH) | |
| def load_data(): | |
| if os.path.exists(DATA_PATH): | |
| return pd.read_csv(DATA_PATH) | |
| return None | |
| model = load_model() | |
| df = load_data() | |
| if df is not None: | |
| label_col = next((c for c in LABEL_CANDIDATES if c in df.columns), None) | |
| feature_cols = [c for c in df.columns if c != label_col] | |
| else: | |
| feature_cols = list(getattr(model, "feature_names_in_", [])) | |
| label_col = None | |
| if not feature_cols: | |
| raise ValueError("Could not infer feature columns. Upload german_credit_data.csv with correct columns.") | |
| # ----------------------------- | |
| # Numeric slider ranges | |
| # ----------------------------- | |
| col_minmax = {} | |
| if df is not None: | |
| for c in feature_cols: | |
| if c in VALUE_LABEL_MAPS: | |
| continue | |
| if pd.api.types.is_numeric_dtype(df[c]): | |
| col_minmax[c] = (float(df[c].min()), float(df[c].max()), float(df[c].median())) | |
| # ----------------------------- | |
| # Prediction helpers | |
| # ----------------------------- | |
| def predict_proba(input_row: pd.DataFrame) -> float: | |
| if hasattr(model, "predict_proba"): | |
| return float(model.predict_proba(input_row)[0, 1]) | |
| return float(model.predict(input_row)[0]) | |
| def decision_from_threshold(prob: float, threshold: float) -> str: | |
| return "Risky ❌" if prob >= threshold else "Good ✅" | |
| # ----------------------------- | |
| # FAST explanation: TRUE XGBoost contributions (TreeSHAP via pred_contribs) | |
| # ----------------------------- | |
| def xgb_fast_contribs(input_row: pd.DataFrame, top_k: int = 8): | |
| try: | |
| import xgboost as xgb | |
| booster = model.get_booster() if hasattr(model, "get_booster") else None | |
| if booster is None: | |
| return None, pd.DataFrame({"message": ["This model is not an XGBoost booster-based model."]}) | |
| dmat = xgb.DMatrix(input_row, feature_names=list(input_row.columns)) | |
| contrib = booster.predict(dmat, pred_contribs=True) # (1, n_features + 1 bias) | |
| contrib = np.array(contrib)[0] | |
| values = contrib[:-1] # last element is bias | |
| temp = pd.DataFrame({ | |
| "Feature": [pretty_name(c) for c in input_row.columns], | |
| "Contribution": values, | |
| "Input value": [decode_value(c, input_row.iloc[0][c]) for c in input_row.columns], | |
| }) | |
| temp["abs"] = temp["Contribution"].abs() | |
| top = temp.sort_values("abs", ascending=False).drop(columns=["abs"]).head(top_k).reset_index(drop=True) | |
| fig = plt.figure(figsize=(7.2, 4.2)) | |
| y = top["Feature"].tolist()[::-1] | |
| x = top["Contribution"].tolist()[::-1] | |
| plt.barh(y, x) | |
| plt.title("Top local drivers (FAST explanation)") | |
| plt.xlabel("Contribution (positive => Risky, negative => Good)") | |
| plt.tight_layout() | |
| return fig, top | |
| except Exception as e: | |
| return None, pd.DataFrame({"message": [f"FAST explanation failed: {e}"]}) | |
| # ----------------------------- | |
| # Strict Recourse (Policy-aligned) | |
| # ONLY: amount + duration + installment | |
| # ----------------------------- | |
| ACTIONABLE = ["hoehe", "laufzeit", "rate"] | |
| def strict_recourse_suggestions(input_row: pd.DataFrame, threshold: float): | |
| missing = [c for c in ACTIONABLE if c not in input_row.columns] | |
| if missing: | |
| return pd.DataFrame({"message": [f"Strict recourse needs columns {ACTIONABLE}, missing: {missing}"]}) | |
| base_prob = predict_proba(input_row) | |
| if base_prob < threshold: | |
| return pd.DataFrame({"message": ["Applicant already approved. No strict recourse needed."]}) | |
| suggestions = [] | |
| # 1) Credit amount reductions | |
| original_amt = float(input_row["hoehe"].iloc[0]) | |
| for pct in [0.9, 0.8, 0.7, 0.6]: | |
| new_row = input_row.copy() | |
| new_row["hoehe"] = max(250.0, original_amt * pct) | |
| new_prob = predict_proba(new_row) | |
| suggestions.append({ | |
| "Suggestion": f"Reduce credit amount to {int(pct*100)}%", | |
| "Changes": f"Credit amount: {original_amt:.1f} -> {new_row['hoehe'].iloc[0]:.1f}", | |
| "New risk probability": round(new_prob, 4), | |
| "Decision @ threshold": decision_from_threshold(new_prob, threshold), | |
| }) | |
| # 2) Loan duration reductions | |
| original_dur = float(input_row["laufzeit"].iloc[0]) | |
| for new_dur in [max(6, original_dur - 6), max(6, original_dur - 12), max(6, original_dur - 18)]: | |
| new_row = input_row.copy() | |
| new_row["laufzeit"] = float(new_dur) | |
| new_prob = predict_proba(new_row) | |
| suggestions.append({ | |
| "Suggestion": "Shorten loan duration", | |
| "Changes": f"Loan duration: {original_dur:.0f} -> {new_dur:.0f} months", | |
| "New risk probability": round(new_prob, 4), | |
| "Decision @ threshold": decision_from_threshold(new_prob, threshold), | |
| }) | |
| # 3) Installment rate options | |
| original_rate = int(input_row["rate"].iloc[0]) | |
| for new_rate in [1, 2, 3, 4]: | |
| if new_rate == original_rate: | |
| continue | |
| new_row = input_row.copy() | |
| new_row["rate"] = int(new_rate) | |
| new_prob = predict_proba(new_row) | |
| suggestions.append({ | |
| "Suggestion": f"Change installment rate to {decode_value('rate', new_rate)}", | |
| "Changes": f"Installment rate: {decode_value('rate', original_rate)} -> {decode_value('rate', new_rate)}", | |
| "New risk probability": round(new_prob, 4), | |
| "Decision @ threshold": decision_from_threshold(new_prob, threshold), | |
| }) | |
| out = pd.DataFrame(suggestions) | |
| out = out.sort_values("New risk probability", ascending=True).reset_index(drop=True) | |
| return out.head(8) | |
| # ----------------------------- | |
| # DiCE Counterfactuals (STRICT) | |
| # ----------------------------- | |
| def dice_counterfactuals_strict(input_row: pd.DataFrame, threshold: float, total_CFs: int = 3): | |
| if not DICE_OK: | |
| return pd.DataFrame({"message": ["dice-ml is not installed. Add dice-ml to requirements.txt."]}) | |
| if df is None: | |
| return pd.DataFrame({"message": ["Dataset missing. Upload german_credit_data.csv to enable DiCE."]}) | |
| ycol = next((c for c in LABEL_CANDIDATES if c in df.columns), None) | |
| if ycol is None: | |
| return pd.DataFrame({"message": ["No label column found in dataset (expected kredit/label/target)."]}) | |
| base_prob = predict_proba(input_row) | |
| if base_prob < threshold: | |
| return pd.DataFrame({"message": ["Applicant already approved. No DiCE recourse needed."]}) | |
| df_num = df.copy() | |
| df_num[ycol] = pd.to_numeric(df_num[ycol], errors="coerce") | |
| for c in feature_cols: | |
| df_num[c] = pd.to_numeric(df_num[c], errors="coerce") | |
| if df_num[c].isna().any(): | |
| df_num[c] = df_num[c].fillna(df_num[c].median()) | |
| query = input_row.copy() | |
| for c in feature_cols: | |
| query[c] = pd.to_numeric(query[c], errors="coerce") | |
| query = query.fillna(query.median(numeric_only=True)) | |
| try: | |
| continuous_features = feature_cols | |
| data_dice = dice_ml.Data( | |
| dataframe=df_num, | |
| continuous_features=continuous_features, | |
| outcome_name=ycol, | |
| ) | |
| model_dice = dice_ml.Model(model=model, backend="sklearn") | |
| dice = Dice(data_dice, model_dice, method="random") | |
| cf = dice.generate_counterfactuals( | |
| query_instances=query, | |
| total_CFs=total_CFs, | |
| desired_class=0, | |
| features_to_vary=ACTIONABLE, | |
| ) | |
| cf_df = cf.cf_examples_list[0].final_cfs_df.copy() | |
| for c in feature_cols: | |
| cf_df[c] = pd.to_numeric(cf_df[c], errors="coerce") | |
| cf_df = cf_df.fillna(cf_df.median(numeric_only=True)) | |
| probs = model.predict_proba(cf_df[feature_cols])[:, 1] if hasattr(model, "predict_proba") else model.predict(cf_df[feature_cols]) | |
| out_rows = [] | |
| for i in range(len(cf_df)): | |
| row = cf_df.iloc[i] | |
| new_prob = float(probs[i]) | |
| out_rows.append({ | |
| "Credit amount": int(round(float(row["hoehe"]))), | |
| "Loan duration (months)": int(round(float(row["laufzeit"]))), | |
| "Installment rate": decode_value("rate", int(round(row["rate"]))), | |
| "New risk probability": round(new_prob, 4), | |
| "Decision": "Good ✅" if new_prob < threshold else "Risky ❌", | |
| }) | |
| out = pd.DataFrame(out_rows).sort_values("New risk probability").reset_index(drop=True) | |
| out = out[out["New risk probability"] < threshold].reset_index(drop=True) | |
| if out.empty: | |
| return pd.DataFrame({"message": ["DiCE produced CFs but none passed the threshold. Increase total_CFs."]}) | |
| return out | |
| except Exception as e: | |
| return pd.DataFrame({"message": [f"DiCE failed: {e}"]}) | |
| # ----------------------------- | |
| # Fairness Snapshot | |
| # ----------------------------- | |
| def fairness_snapshot(threshold: float): | |
| if df is None: | |
| return pd.DataFrame({"message": ["Upload german_credit_data.csv to enable fairness snapshot."]}) | |
| if "alter" not in df.columns or "beruf" not in df.columns: | |
| return pd.DataFrame({"message": ["No fairness columns found (alter/beruf missing)."]}) | |
| X = df[feature_cols].copy() | |
| probs = model.predict_proba(X)[:, 1] if hasattr(model, "predict_proba") else model.predict(X) | |
| approved = (probs < threshold).astype(int) | |
| out_rows = [] | |
| age = df["alter"].astype(float) | |
| bins = pd.cut(age, bins=[0, 25, 35, 45, 55, 100], right=True) | |
| grp_age = pd.DataFrame({"Group": bins.astype(str), "Approved": approved}) | |
| rate_age = grp_age.groupby("Group")["Approved"].mean().reset_index() | |
| rate_age["Metric"] = "Approval rate by Age group" | |
| out_rows.append(rate_age) | |
| job = df["beruf"].astype(int) | |
| grp_job = pd.DataFrame({"Group": job.astype(str), "Approved": approved}) | |
| rate_job = grp_job.groupby("Group")["Approved"].mean().reset_index() | |
| rate_job["Metric"] = "Approval rate by Job" | |
| out_rows.append(rate_job) | |
| out = pd.concat(out_rows, ignore_index=True) | |
| out["Approval rate"] = (out["Approved"] * 100).round(1) | |
| out = out.drop(columns=["Approved"]) | |
| out["Approval rate"] = out["Approval rate"].astype(str) + "%" | |
| return out[["Metric", "Group", "Approval rate"]] | |
| # ----------------------------- | |
| # Build inputs | |
| # ----------------------------- | |
| def build_inputs(): | |
| comps = [] | |
| for c in feature_cols: | |
| label = pretty_name(c) | |
| if c in VALUE_LABEL_MAPS: | |
| choices = [(v, k) for k, v in VALUE_LABEL_MAPS[c].items()] # (label, code) | |
| choices = sorted(choices, key=lambda x: x[1]) | |
| default_code = choices[0][1] | |
| comps.append( | |
| gr.Dropdown( | |
| choices=choices, | |
| value=default_code, | |
| label=label, | |
| type="value", | |
| interactive=True, | |
| ) | |
| ) | |
| elif c in col_minmax: | |
| mn, mx, med = col_minmax[c] | |
| step = 1 if float(mn).is_integer() and float(mx).is_integer() else 0.1 | |
| comps.append( | |
| gr.Slider( | |
| minimum=mn, | |
| maximum=mx, | |
| value=med, | |
| step=step, | |
| label=label, | |
| interactive=True, | |
| ) | |
| ) | |
| else: | |
| comps.append(gr.Number(value=0, label=label, interactive=True)) | |
| return comps | |
| input_components = build_inputs() | |
| # ============================================================ | |
| # SAFE TABLE TEXT (NO tabulate) | |
| # ============================================================ | |
| def df_to_text(df_: pd.DataFrame, max_rows=6) -> str: | |
| if df_ is None or not isinstance(df_, pd.DataFrame) or df_.empty: | |
| return "No data available." | |
| try: | |
| return df_.head(max_rows).to_string(index=False) | |
| except Exception: | |
| return "Could not display the table." | |
| # ============================================================ | |
| # Stakeholder Q&A (READY QUESTIONS ONLY) | |
| # ============================================================ | |
| ALLOWED_TYPES = ["Why", "Data", "Performance", "How to be that"] | |
| READY_QUESTIONS = { | |
| "Why": [ | |
| "Why is this applicant predicted as Risky?", | |
| "What are the top risk factors for this applicant?", | |
| "Which features pushed the decision toward Risky the most?", | |
| ], | |
| "Data": [ | |
| "What dataset was the model trained on?", | |
| "What is the label column and how many rows are in the dataset?", | |
| "Are there any limitations or bias risks in the data?", | |
| ], | |
| "Performance": [ | |
| "How accurate is the model on the available dataset?", | |
| "What is the approval rate at the current threshold?", | |
| "Can the model make mistakes? When?", | |
| ], | |
| "How to be that": [ | |
| "How can this applicant become Good (approved)?", | |
| "What are the best strict recourse suggestions?", | |
| "Show DiCE counterfactuals (strict) if available.", | |
| ], | |
| } | |
| def build_context(prob, decision, threshold, drivers_table, strict_table, dice_table, fair): | |
| return { | |
| "prob": float(prob), | |
| "decision": str(decision), | |
| "threshold": float(threshold), | |
| "drivers_table": drivers_table if isinstance(drivers_table, pd.DataFrame) else None, | |
| "strict_table": strict_table if isinstance(strict_table, pd.DataFrame) else None, | |
| "dice_table": dice_table if isinstance(dice_table, pd.DataFrame) else None, | |
| "fair": fair if isinstance(fair, pd.DataFrame) else None, | |
| "has_run": True, | |
| } | |
| def answer_by_type(qtype: str, question: str, ctx: dict) -> str: | |
| if not ctx or not ctx.get("has_run"): | |
| return "⚠️ Please click **Run Dashboard** first, then ask a ready question." | |
| prob = ctx["prob"] | |
| thr = ctx["threshold"] | |
| decision = ctx["decision"] | |
| drivers = ctx.get("drivers_table") | |
| strict = ctx.get("strict_table") | |
| dice = ctx.get("dice_table") | |
| header = f"Type: {qtype}\nQuestion: {question}\n\n" | |
| # WHY | |
| if qtype == "Why": | |
| if drivers is not None and "Contribution" in drivers.columns: | |
| tmp = drivers.sort_values("Contribution", ascending=False).head(5) | |
| keep = [c for c in ["Feature", "Input value", "Contribution"] if c in tmp.columns] | |
| top_text = df_to_text(tmp[keep], max_rows=5) | |
| else: | |
| top_text = "No driver table available." | |
| return ( | |
| header + | |
| "Answer:\n" | |
| f"- Decision: {decision}\n" | |
| f"- Risk probability: {prob*100:.1f}% (threshold={thr:.2f})\n\n" | |
| f"Top risk-increasing factors:\n{top_text}" | |
| ) | |
| # HOW TO BE THAT | |
| if qtype == "How to be that": | |
| strict_text = df_to_text(strict, max_rows=6) if strict is not None else "No strict recourse available." | |
| dice_text = df_to_text(dice, max_rows=6) if dice is not None else "No DiCE recourse available." | |
| return ( | |
| header + | |
| "Answer:\n" | |
| f"- Current: {decision} | risk={prob*100:.1f}% (threshold={thr:.2f})\n\n" | |
| f"Strict recourse (allowed changes only):\n{strict_text}\n\n" | |
| f"DiCE counterfactuals (if available):\n{dice_text}\n\n" | |
| "Policy note: Only these can change: credit amount / duration / installment rate." | |
| ) | |
| # DATA | |
| if qtype == "Data": | |
| n = len(df) if df is not None else 0 | |
| feats = len(feature_cols) if feature_cols else 0 | |
| lbl = label_col if label_col else "Not found" | |
| return ( | |
| header + | |
| "Answer:\n" | |
| f"- Dataset rows: {n}\n" | |
| f"- Number of features: {feats}\n" | |
| f"- Label column: {lbl}\n" | |
| "- Data quality / imbalance can affect bias and reliability." | |
| ) | |
| # PERFORMANCE | |
| if qtype == "Performance": | |
| if df is None or label_col is None: | |
| return ( | |
| header + | |
| "Answer:\n" | |
| "- Performance metrics are not available because the dataset label column is missing.\n" | |
| "- Add a label column (e.g. `label` or `kredit`) to compute accuracy/AUC." | |
| ) | |
| try: | |
| y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).values | |
| X = df[feature_cols] | |
| probs = model.predict_proba(X)[:, 1] if hasattr(model, "predict_proba") else model.predict(X) | |
| pred = (probs >= thr).astype(int) | |
| acc = float((pred == y).mean()) | |
| approve_rate = float((probs < thr).mean()) * 100 | |
| return ( | |
| header + | |
| "Answer:\n" | |
| f"- Accuracy @ threshold={thr:.2f}: {acc*100:.1f}%\n" | |
| f"- Approval rate @ threshold={thr:.2f}: {approve_rate:.1f}%\n" | |
| "Note: computed on the available dataset (not an external test set)." | |
| ) | |
| except Exception as e: | |
| return header + f"Answer:\n⚠️ Failed to compute performance: {e}" | |
| return header + "Answer:\nUnknown type." | |
| def update_ready_questions(qtype: str): | |
| qs = READY_QUESTIONS.get(qtype, []) | |
| default_q = qs[0] if qs else "" | |
| # Works across most Gradio versions: | |
| try: | |
| return gr.update(choices=qs, value=default_q) | |
| except Exception: | |
| return gr.Dropdown.update(choices=qs, value=default_q) | |
| def ask_ready_question(qtype: str, ready_q: str, ctx: dict): | |
| if not ready_q: | |
| return "⚠️ Please select a ready question." | |
| try: | |
| return answer_by_type(qtype, ready_q, ctx) | |
| except Exception as e: | |
| return f"⚠️ Internal error: {e}" | |
| # ----------------------------- | |
| # Main dashboard function | |
| # ----------------------------- | |
| def run_dashboard(threshold: float, *vals): | |
| values = {feature_cols[i]: vals[i] for i in range(len(feature_cols))} | |
| input_row = pd.DataFrame([values], columns=feature_cols) | |
| prob = predict_proba(input_row) | |
| decision = decision_from_threshold(prob, threshold) | |
| fig, drivers_table = xgb_fast_contribs(input_row, top_k=8) | |
| # Adverse action notice | |
| reasons = "Not available" | |
| if isinstance(drivers_table, pd.DataFrame) and "Feature" in drivers_table.columns and "Contribution" in drivers_table.columns: | |
| top_pos = drivers_table.sort_values("Contribution", ascending=False).head(3)["Feature"].tolist() | |
| reasons = ", ".join(top_pos) | |
| if "Risky" in decision: | |
| notice = ( | |
| "Adverse Action Notice (auto-summary):\n" | |
| f"- Decision: DECLINED under policy threshold = {threshold:.2f}\n" | |
| f"- Estimated risk probability: {prob*100:.1f}%\n\n" | |
| f"Top factors that increased risk score:\n- {reasons}\n" | |
| ) | |
| else: | |
| notice = ( | |
| "Adverse Action Notice (auto-summary):\n" | |
| f"- Decision: APPROVED under policy threshold = {threshold:.2f}\n" | |
| f"- Estimated risk probability: {prob*100:.1f}%\n" | |
| ) | |
| strict_table = strict_recourse_suggestions(input_row, threshold) | |
| dice_table = dice_counterfactuals_strict(input_row, threshold, total_CFs=3) | |
| fair = fairness_snapshot(threshold) | |
| decision_md = f"## Decision: **{decision}**" | |
| prob_md = f"### Risk probability: **{prob*100:.1f}%** (threshold = {threshold:.2f})" | |
| ctx = build_context(prob, decision, threshold, drivers_table, strict_table, dice_table, fair) | |
| return decision_md, prob_md, notice, fig, drivers_table, strict_table, dice_table, fair, ctx | |
| # ----------------------------- | |
| # UI layout (English) | |
| # ----------------------------- | |
| CSS = """ | |
| <style> | |
| .title {font-size: 32px; font-weight: 800;} | |
| .sub {opacity: 0.85; font-size: 14px;} | |
| </style> | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Credit Risk XAI Dashboard") as demo: | |
| gr.HTML(CSS) | |
| latest_ctx = gr.State({}) | |
| gr.Markdown("## Credit Risk Explainable AI Dashboard") | |
| gr.Markdown( | |
| "Prediction + FAST explanation + Strict Recourse + DiCE (strict) + Fairness Snapshot + Stakeholder Q&A (Ready Questions Only)." | |
| ) | |
| gr.Markdown(f"**Model loaded from:** `{MODEL_PATH}`") | |
| with gr.Tab("Applicant Input"): | |
| gr.Markdown("### Enter applicant features") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| half = len(input_components) // 2 | |
| left = input_components[:half] | |
| right = input_components[half:] | |
| with gr.Row(): | |
| with gr.Column(): | |
| for comp in left: | |
| comp.render() | |
| with gr.Column(): | |
| for comp in right: | |
| comp.render() | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Decision Policy") | |
| threshold = gr.Slider( | |
| 0.1, 0.9, | |
| value=0.28, | |
| step=0.01, | |
| label="Decision threshold" | |
| ) | |
| run_btn = gr.Button("Run Dashboard", variant="primary") | |
| with gr.Tab("Prediction"): | |
| decision_out = gr.Markdown("") | |
| prob_out = gr.Markdown("") | |
| notice_out = gr.Textbox(label="Adverse Action Notice (auto-summary)", lines=8) | |
| with gr.Tab("Explain (FAST)"): | |
| gr.Markdown("### Local Explanation") | |
| fast_plot = gr.Plot(label="Local drivers plot") | |
| drivers_df = gr.Dataframe(label="Top drivers table", interactive=False) | |
| with gr.Tab("Recourse"): | |
| gr.Markdown("### Recourse (Strict + DiCE)") | |
| strict_df = gr.Dataframe(label="Strict recourse suggestions", interactive=False) | |
| dice_df = gr.Dataframe(label="DiCE counterfactuals (strict)", interactive=False) | |
| with gr.Tab("Fairness Snapshot"): | |
| fair_df = gr.Dataframe(label="Approval rate across groups", interactive=False) | |
| # ✅ READY QUESTIONS ONLY Q&A | |
| with gr.Tab("Stakeholder Q&A"): | |
| gr.Markdown("## Stakeholder Q&A (Ready Questions Only)") | |
| gr.Markdown("1) Run Dashboard → 2) Choose Type → 3) Pick a Ready Question → 4) Ask") | |
| q_type = gr.Dropdown( | |
| choices=ALLOWED_TYPES, | |
| value="Why", | |
| label="Question Type (required)", | |
| interactive=True | |
| ) | |
| ready_q = gr.Dropdown( | |
| choices=READY_QUESTIONS["Why"], | |
| value=READY_QUESTIONS["Why"][0], | |
| label="Ready Question", | |
| interactive=True | |
| ) | |
| answer_out = gr.Textbox( | |
| label="Answer", | |
| lines=14, | |
| interactive=False | |
| ) | |
| ask_btn = gr.Button("Ask", variant="primary") | |
| q_type.change(fn=update_ready_questions, inputs=q_type, outputs=ready_q) | |
| ask_btn.click(fn=ask_ready_question, inputs=[q_type, ready_q, latest_ctx], outputs=answer_out) | |
| # Wire dashboard run | |
| run_btn.click( | |
| fn=run_dashboard, | |
| inputs=[threshold, *input_components], | |
| outputs=[decision_out, prob_out, notice_out, fast_plot, drivers_df, strict_df, dice_df, fair_df, latest_ctx], | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |