Upload 9 files
Browse files- .gitattributes +1 -0
- Dockerfile +18 -0
- README.md +29 -5
- app.py +281 -0
- assets/Bank Churn.png +3 -0
- data/bankChurn.csv +0 -0
- data/batch_template.csv +2 -0
- requirements.txt +7 -0
- scripts/pipeline.py +181 -0
- style.css +34 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/Bank[[:space:]]Churn.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 6 |
+
ENV GRADIO_SERVER_PORT=7860
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
COPY requirements.txt /app/requirements.txt
|
| 11 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 12 |
+
pip install --no-cache-dir -r /app/requirements.txt
|
| 13 |
+
|
| 14 |
+
COPY . /app
|
| 15 |
+
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
CMD ["python", "-u", "app.py"]
|
README.md
CHANGED
|
@@ -1,10 +1,34 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Bank Churn Pro Demo
|
| 3 |
+
emoji: 🏦
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# Bank Churn Pro Demo
|
| 12 |
+
|
| 13 |
+
A Hugging Face Docker Space for bank customer churn analysis with:
|
| 14 |
+
|
| 15 |
+
- Full-screen Bank Churn background UI
|
| 16 |
+
- Pipeline Step 1/2/3 execution log
|
| 17 |
+
- Feature importance chart
|
| 18 |
+
- Churn probability gauge
|
| 19 |
+
- CSV batch prediction
|
| 20 |
+
- SHAP explainability
|
| 21 |
+
|
| 22 |
+
## Included files
|
| 23 |
+
|
| 24 |
+
- `app.py` - Gradio app
|
| 25 |
+
- `scripts/pipeline.py` - training / artifact generation pipeline
|
| 26 |
+
- `data/bankChurn.csv` - sample dataset
|
| 27 |
+
- `assets/Bank Churn.png` - background image
|
| 28 |
+
|
| 29 |
+
## Expected workflow
|
| 30 |
+
|
| 31 |
+
1. Open the Space
|
| 32 |
+
2. Go to **Pipeline** and click **Run Pipeline**
|
| 33 |
+
3. Wait for the 3-step pipeline to finish
|
| 34 |
+
4. Use **Single Prediction**, **Batch CSV**, and **Explainability** tabs
|
app.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import subprocess
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Generator
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import joblib
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import shap
|
| 14 |
+
|
| 15 |
+
APP_DIR = Path(__file__).parent.resolve()
|
| 16 |
+
STYLE_FILE = APP_DIR / "style.css"
|
| 17 |
+
ASSETS_DIR = APP_DIR / "assets"
|
| 18 |
+
DATA_DIR = APP_DIR / "data"
|
| 19 |
+
MODELS_DIR = APP_DIR / "models"
|
| 20 |
+
OUT_DIR = APP_DIR / "outputs"
|
| 21 |
+
FIG_DIR = OUT_DIR / "figures"
|
| 22 |
+
TAB_DIR = OUT_DIR / "tables"
|
| 23 |
+
|
| 24 |
+
MODEL_FILE = MODELS_DIR / "pipeline.joblib"
|
| 25 |
+
META_FILE = MODELS_DIR / "model_meta.json"
|
| 26 |
+
BG_FILE = MODELS_DIR / "background_sample.csv"
|
| 27 |
+
TEMPLATE_CSV = DATA_DIR / "batch_template.csv"
|
| 28 |
+
|
| 29 |
+
DEFAULTS = {
|
| 30 |
+
"AGE": 42,
|
| 31 |
+
"OPEN_ACC_DUR": 120,
|
| 32 |
+
"GENDER_CD": "1",
|
| 33 |
+
"HASNT_HOME_ADDRESS_INF": "N",
|
| 34 |
+
"HASNT_MOBILE_TEL_NUM_INF": "N",
|
| 35 |
+
"LOCAL_CUR_MON_AVG_BAL": 25000.0,
|
| 36 |
+
"LOCAL_FIX_MON_AVG_BAL": 18000.0,
|
| 37 |
+
"LOCAL_SAV_CUR_ALL_BAL": 28000.0,
|
| 38 |
+
"POS_CONSUME_TX_AMT": 5000.0,
|
| 39 |
+
"ATM_ALL_TX_NUM": 6,
|
| 40 |
+
"COUNTER_ALL_TX_NUM": 2,
|
| 41 |
+
}
|
| 42 |
+
FEATURES = list(DEFAULTS.keys())
|
| 43 |
+
|
| 44 |
+
PIPE = None
|
| 45 |
+
META = None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def ensure_template_csv() -> None:
|
| 49 |
+
if not TEMPLATE_CSV.exists():
|
| 50 |
+
pd.DataFrame([DEFAULTS]).to_csv(TEMPLATE_CSV, index=False)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def load_assets() -> tuple[object | None, dict | None]:
|
| 54 |
+
pipe = joblib.load(MODEL_FILE) if MODEL_FILE.exists() else None
|
| 55 |
+
meta = json.loads(META_FILE.read_text(encoding="utf-8")) if META_FILE.exists() else None
|
| 56 |
+
return pipe, meta
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def refresh_model_state() -> str:
|
| 60 |
+
global PIPE, META
|
| 61 |
+
PIPE, META = load_assets()
|
| 62 |
+
if PIPE is None:
|
| 63 |
+
return "⚠️ 当前为演示状态:请先在 Pipeline 标签页点击 **Run Pipeline** 生成模型。"
|
| 64 |
+
return "✅ 模型已加载,可以进行单条预测、批量预测和 SHAP 解释。"
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def gauge_html(prob: float) -> str:
|
| 68 |
+
pct = max(0.0, min(100.0, prob * 100.0))
|
| 69 |
+
color = "#16a34a" if prob < 0.35 else ("#f59e0b" if prob < 0.65 else "#dc2626")
|
| 70 |
+
return f"""
|
| 71 |
+
<div style='background:rgba(255,255,255,0.88);padding:16px;border-radius:18px'>
|
| 72 |
+
<div style='font-size:18px;font-weight:700;margin-bottom:8px'>Churn Probability Gauge</div>
|
| 73 |
+
<div style='width:100%;height:20px;background:#e5e7eb;border-radius:999px;overflow:hidden'>
|
| 74 |
+
<div style='width:{pct:.1f}%;height:20px;background:{color};border-radius:999px'></div>
|
| 75 |
+
</div>
|
| 76 |
+
<div style='margin-top:10px;font-size:28px;font-weight:800;color:{color}'>{pct:.1f}%</div>
|
| 77 |
+
</div>
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def input_df(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
|
| 82 |
+
local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
|
| 83 |
+
pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num) -> pd.DataFrame:
|
| 84 |
+
return pd.DataFrame([{
|
| 85 |
+
"AGE": int(age),
|
| 86 |
+
"OPEN_ACC_DUR": int(open_acc_dur),
|
| 87 |
+
"GENDER_CD": str(gender_cd),
|
| 88 |
+
"HASNT_HOME_ADDRESS_INF": str(hasnt_home_address_inf),
|
| 89 |
+
"HASNT_MOBILE_TEL_NUM_INF": str(hasnt_mobile_tel_num_inf),
|
| 90 |
+
"LOCAL_CUR_MON_AVG_BAL": float(local_cur_mon_avg_bal),
|
| 91 |
+
"LOCAL_FIX_MON_AVG_BAL": float(local_fix_mon_avg_bal),
|
| 92 |
+
"LOCAL_SAV_CUR_ALL_BAL": float(local_sav_cur_all_bal),
|
| 93 |
+
"POS_CONSUME_TX_AMT": float(pos_consume_tx_amt),
|
| 94 |
+
"ATM_ALL_TX_NUM": int(atm_all_tx_num),
|
| 95 |
+
"COUNTER_ALL_TX_NUM": int(counter_all_tx_num),
|
| 96 |
+
}])
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def predict_single(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
|
| 100 |
+
local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
|
| 101 |
+
pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num):
|
| 102 |
+
if PIPE is None:
|
| 103 |
+
return {"error": "Run Pipeline first."}, "请先运行 Pipeline。", gauge_html(0.0), None
|
| 104 |
+
df = input_df(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
|
| 105 |
+
local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
|
| 106 |
+
pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num)
|
| 107 |
+
prob = float(PIPE.predict_proba(df)[0, 1])
|
| 108 |
+
pred = int(prob >= 0.5)
|
| 109 |
+
risk = "低风险" if prob < 0.35 else ("中风险" if prob < 0.65 else "高风险")
|
| 110 |
+
payload = {
|
| 111 |
+
"churn_probability": round(prob, 6),
|
| 112 |
+
"predicted_label": pred,
|
| 113 |
+
"risk_level": risk,
|
| 114 |
+
}
|
| 115 |
+
summary = f"**预测结果**:{'流失' if pred == 1 else '留存'} \n\n**概率**:{prob:.2%} \n**风险等级**:{risk}"
|
| 116 |
+
return payload, summary, gauge_html(prob), None
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def predict_batch(file_obj):
|
| 120 |
+
if PIPE is None:
|
| 121 |
+
return None, None, "请先运行 Pipeline。"
|
| 122 |
+
if file_obj is None:
|
| 123 |
+
return None, None, "请先上传 CSV。"
|
| 124 |
+
df = pd.read_csv(file_obj.name)
|
| 125 |
+
missing = [c for c in FEATURES if c not in df.columns]
|
| 126 |
+
if missing:
|
| 127 |
+
return None, None, f"CSV 缺少列:{missing}"
|
| 128 |
+
x = df[FEATURES].copy()
|
| 129 |
+
proba = PIPE.predict_proba(x)[:, 1]
|
| 130 |
+
pred = (proba >= 0.5).astype(int)
|
| 131 |
+
out = df.copy()
|
| 132 |
+
out["churn_proba"] = proba
|
| 133 |
+
out["churn_pred"] = pred
|
| 134 |
+
out_path = OUT_DIR / "batch_predictions.csv"
|
| 135 |
+
out.to_csv(out_path, index=False)
|
| 136 |
+
return out.head(50), str(out_path), "批量预测完成。"
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def make_feature_importance_plot():
|
| 140 |
+
fp = TAB_DIR / "feature_importance.csv"
|
| 141 |
+
if not fp.exists():
|
| 142 |
+
return None
|
| 143 |
+
fi = pd.read_csv(fp)
|
| 144 |
+
plt.figure(figsize=(8, 4.5))
|
| 145 |
+
plt.barh(fi["feature"][::-1], fi["importance"][::-1])
|
| 146 |
+
plt.title("Feature Importance")
|
| 147 |
+
plt.xlabel("Importance")
|
| 148 |
+
plt.tight_layout()
|
| 149 |
+
fig_path = FIG_DIR / "feature_importance_runtime.png"
|
| 150 |
+
plt.savefig(fig_path, dpi=160)
|
| 151 |
+
plt.close()
|
| 152 |
+
return str(fig_path)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def explain_single(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
|
| 156 |
+
local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
|
| 157 |
+
pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num):
|
| 158 |
+
if PIPE is None or not BG_FILE.exists():
|
| 159 |
+
return None, "请先运行 Pipeline。"
|
| 160 |
+
|
| 161 |
+
row = input_df(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
|
| 162 |
+
local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
|
| 163 |
+
pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num)
|
| 164 |
+
background = pd.read_csv(BG_FILE)
|
| 165 |
+
background = background[FEATURES].head(40)
|
| 166 |
+
|
| 167 |
+
def f(x):
|
| 168 |
+
x_df = pd.DataFrame(x, columns=FEATURES)
|
| 169 |
+
for c in ["GENDER_CD", "HASNT_HOME_ADDRESS_INF", "HASNT_MOBILE_TEL_NUM_INF"]:
|
| 170 |
+
x_df[c] = x_df[c].astype(str)
|
| 171 |
+
for c in [col for col in FEATURES if col not in ["GENDER_CD", "HASNT_HOME_ADDRESS_INF", "HASNT_MOBILE_TEL_NUM_INF"]]:
|
| 172 |
+
x_df[c] = pd.to_numeric(x_df[c], errors="coerce")
|
| 173 |
+
return PIPE.predict_proba(x_df)[:, 1]
|
| 174 |
+
|
| 175 |
+
explainer = shap.Explainer(f, background, feature_names=FEATURES)
|
| 176 |
+
sv = explainer(row)
|
| 177 |
+
|
| 178 |
+
plt.figure(figsize=(9, 4.8))
|
| 179 |
+
shap.plots.waterfall(sv[0], max_display=10, show=False)
|
| 180 |
+
plt.tight_layout()
|
| 181 |
+
out_path = FIG_DIR / "shap_waterfall.png"
|
| 182 |
+
plt.savefig(out_path, dpi=160, bbox_inches="tight")
|
| 183 |
+
plt.close()
|
| 184 |
+
prob = float(PIPE.predict_proba(row)[0, 1])
|
| 185 |
+
txt = f"SHAP 解释已生成。该客户流失概率约为 **{prob:.2%}**。"
|
| 186 |
+
return str(out_path), txt
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def run_pipeline_stream() -> Generator[tuple[str, str, str], None, None]:
|
| 190 |
+
log_lines = []
|
| 191 |
+
cmd = ["python", "-u", str(APP_DIR / "scripts" / "pipeline.py")]
|
| 192 |
+
proc = subprocess.Popen(cmd, cwd=str(APP_DIR), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
|
| 193 |
+
assert proc.stdout is not None
|
| 194 |
+
yield "", "⏳ Pipeline 正在运行...", refresh_model_state()
|
| 195 |
+
for line in proc.stdout:
|
| 196 |
+
log_lines.append(line.rstrip("\n"))
|
| 197 |
+
if len(log_lines) > 400:
|
| 198 |
+
log_lines = log_lines[-400:]
|
| 199 |
+
yield "\n".join(log_lines), "⏳ Pipeline 正在运行...", refresh_model_state()
|
| 200 |
+
rc = proc.wait()
|
| 201 |
+
status = "✅ Pipeline 运行完成。" if rc == 0 else f"❌ Pipeline 失败,退出码 {rc}。"
|
| 202 |
+
model_status = refresh_model_state()
|
| 203 |
+
yield "\n".join(log_lines), status, model_status
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def build_ui():
|
| 207 |
+
ensure_template_csv()
|
| 208 |
+
gr.set_static_paths(paths=[str(ASSETS_DIR)])
|
| 209 |
+
css = STYLE_FILE.read_text(encoding="utf-8") if STYLE_FILE.exists() else ""
|
| 210 |
+
model_status = refresh_model_state()
|
| 211 |
+
with gr.Blocks() as demo:
|
| 212 |
+
gr.HTML(f"<style>{css}</style>")
|
| 213 |
+
with gr.Column(elem_id="main_panel"):
|
| 214 |
+
gr.Markdown("# 🏦 Bank Churn Pro Demo\n全屏背景 + Pipeline 日志 + 特征重要性 + 概率仪表盘 + CSV 批量预测 + SHAP 解释")
|
| 215 |
+
model_state_md = gr.Markdown(model_status)
|
| 216 |
+
pipeline_status_md = gr.Markdown("尚未运行 Pipeline。")
|
| 217 |
+
|
| 218 |
+
with gr.Tabs():
|
| 219 |
+
with gr.Tab("Pipeline"):
|
| 220 |
+
gr.Markdown("点击按钮执行 3 步流水线:数据准备 → 模型训练与特征重要性 → 验证与 SHAP 背景缓存")
|
| 221 |
+
run_btn = gr.Button("▶ Run Pipeline", variant="primary")
|
| 222 |
+
log_box = gr.Textbox(label="Pipeline Step 1/2/3 日志", lines=22, interactive=False)
|
| 223 |
+
fi_image = gr.Image(label="Feature Importance 图", type="filepath")
|
| 224 |
+
run_btn.click(fn=run_pipeline_stream, inputs=[], outputs=[log_box, pipeline_status_md, model_state_md]).then(fn=make_feature_importance_plot, inputs=[], outputs=fi_image)
|
| 225 |
+
|
| 226 |
+
with gr.Tab("Single Prediction"):
|
| 227 |
+
with gr.Row():
|
| 228 |
+
with gr.Column():
|
| 229 |
+
age = gr.Slider(18, 100, value=DEFAULTS["AGE"], step=1, label="AGE")
|
| 230 |
+
open_acc_dur = gr.Slider(0, 400, value=DEFAULTS["OPEN_ACC_DUR"], step=1, label="OPEN_ACC_DUR")
|
| 231 |
+
gender_cd = gr.Dropdown(choices=["0", "1"], value=DEFAULTS["GENDER_CD"], label="GENDER_CD")
|
| 232 |
+
hasnt_home = gr.Dropdown(choices=["N", "Y"], value=DEFAULTS["HASNT_HOME_ADDRESS_INF"], label="HASNT_HOME_ADDRESS_INF")
|
| 233 |
+
hasnt_mobile = gr.Dropdown(choices=["N", "Y"], value=DEFAULTS["HASNT_MOBILE_TEL_NUM_INF"], label="HASNT_MOBILE_TEL_NUM_INF")
|
| 234 |
+
local_cur = gr.Number(value=DEFAULTS["LOCAL_CUR_MON_AVG_BAL"], label="LOCAL_CUR_MON_AVG_BAL")
|
| 235 |
+
local_fix = gr.Number(value=DEFAULTS["LOCAL_FIX_MON_AVG_BAL"], label="LOCAL_FIX_MON_AVG_BAL")
|
| 236 |
+
local_sav = gr.Number(value=DEFAULTS["LOCAL_SAV_CUR_ALL_BAL"], label="LOCAL_SAV_CUR_ALL_BAL")
|
| 237 |
+
pos_amt = gr.Number(value=DEFAULTS["POS_CONSUME_TX_AMT"], label="POS_CONSUME_TX_AMT")
|
| 238 |
+
atm_num = gr.Slider(0, 100, value=DEFAULTS["ATM_ALL_TX_NUM"], step=1, label="ATM_ALL_TX_NUM")
|
| 239 |
+
counter_num = gr.Slider(0, 100, value=DEFAULTS["COUNTER_ALL_TX_NUM"], step=1, label="COUNTER_ALL_TX_NUM")
|
| 240 |
+
pred_btn = gr.Button("Predict", variant="primary")
|
| 241 |
+
with gr.Column():
|
| 242 |
+
pred_json = gr.JSON(label="Prediction JSON")
|
| 243 |
+
pred_md = gr.Markdown()
|
| 244 |
+
gauge = gr.HTML(label="Gauge")
|
| 245 |
+
pred_btn.click(
|
| 246 |
+
fn=predict_single,
|
| 247 |
+
inputs=[age, open_acc_dur, gender_cd, hasnt_home, hasnt_mobile, local_cur, local_fix, local_sav, pos_amt, atm_num, counter_num],
|
| 248 |
+
outputs=[pred_json, pred_md, gauge, fi_image],
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
with gr.Tab("CSV Batch"):
|
| 252 |
+
gr.Markdown("上传包含以下列的 CSV:" + ", ".join(FEATURES))
|
| 253 |
+
with gr.Row():
|
| 254 |
+
batch_file = gr.File(label="Upload CSV", file_types=[".csv"])
|
| 255 |
+
template_file = gr.File(value=str(TEMPLATE_CSV), label="Template CSV")
|
| 256 |
+
batch_btn = gr.Button("Run Batch Prediction")
|
| 257 |
+
batch_df = gr.Dataframe(label="Preview (Top 50)")
|
| 258 |
+
batch_out_file = gr.File(label="Download Result CSV")
|
| 259 |
+
batch_msg = gr.Markdown()
|
| 260 |
+
batch_btn.click(fn=predict_batch, inputs=[batch_file], outputs=[batch_df, batch_out_file, batch_msg])
|
| 261 |
+
|
| 262 |
+
with gr.Tab("Explainability"):
|
| 263 |
+
gr.Markdown("使用当前表单中的同一组输入生成 SHAP waterfall 图。")
|
| 264 |
+
explain_btn = gr.Button("Generate SHAP Explainability")
|
| 265 |
+
shap_image = gr.Image(label="SHAP Explainability", type="filepath")
|
| 266 |
+
shap_md = gr.Markdown()
|
| 267 |
+
explain_btn.click(
|
| 268 |
+
fn=explain_single,
|
| 269 |
+
inputs=[age, open_acc_dur, gender_cd, hasnt_home, hasnt_mobile, local_cur, local_fix, local_sav, pos_amt, atm_num, counter_num],
|
| 270 |
+
outputs=[shap_image, shap_md],
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
gr.Markdown("<div class='footer-note'>提示:首次进入请先运行 Pipeline,再使用预测、批量预测和解释功能。</div>")
|
| 274 |
+
return demo
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
if __name__ == "__main__":
|
| 278 |
+
demo = build_ui()
|
| 279 |
+
demo.queue()
|
| 280 |
+
port = int(os.environ.get("PORT", "7860"))
|
| 281 |
+
demo.launch(server_name="0.0.0.0", server_port=port)
|
assets/Bank Churn.png
ADDED
|
Git LFS Details
|
data/bankChurn.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/batch_template.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
AGE,OPEN_ACC_DUR,GENDER_CD,HASNT_HOME_ADDRESS_INF,HASNT_MOBILE_TEL_NUM_INF,LOCAL_CUR_MON_AVG_BAL,LOCAL_FIX_MON_AVG_BAL,LOCAL_SAV_CUR_ALL_BAL,POS_CONSUME_TX_AMT,ATM_ALL_TX_NUM,COUNTER_ALL_TX_NUM
|
| 2 |
+
42,120,1,N,N,25000.0,18000.0,28000.0,5000.0,6,2
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44.1
|
| 2 |
+
pandas>=2.0.0
|
| 3 |
+
numpy>=1.24.0
|
| 4 |
+
scikit-learn>=1.3.0
|
| 5 |
+
joblib>=1.3.0
|
| 6 |
+
matplotlib>=3.8.0
|
| 7 |
+
shap>=0.45.0
|
scripts/pipeline.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import joblib
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from sklearn.compose import ColumnTransformer
|
| 11 |
+
from sklearn.impute import SimpleImputer
|
| 12 |
+
from sklearn.inspection import permutation_importance
|
| 13 |
+
from sklearn.linear_model import LogisticRegression
|
| 14 |
+
from sklearn.metrics import roc_auc_score
|
| 15 |
+
from sklearn.model_selection import train_test_split
|
| 16 |
+
from sklearn.pipeline import Pipeline
|
| 17 |
+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
| 18 |
+
|
| 19 |
+
APP_DIR = Path(__file__).resolve().parents[1]
|
| 20 |
+
DATA_PATH = APP_DIR / "data" / "bankChurn.csv"
|
| 21 |
+
MODELS_DIR = APP_DIR / "models"
|
| 22 |
+
OUT_DIR = APP_DIR / "outputs"
|
| 23 |
+
FIG_DIR = OUT_DIR / "figures"
|
| 24 |
+
TAB_DIR = OUT_DIR / "tables"
|
| 25 |
+
|
| 26 |
+
TARGET = "CHURN_CUST_IND"
|
| 27 |
+
FEATURES = [
|
| 28 |
+
"AGE",
|
| 29 |
+
"OPEN_ACC_DUR",
|
| 30 |
+
"GENDER_CD",
|
| 31 |
+
"HASNT_HOME_ADDRESS_INF",
|
| 32 |
+
"HASNT_MOBILE_TEL_NUM_INF",
|
| 33 |
+
"LOCAL_CUR_MON_AVG_BAL",
|
| 34 |
+
"LOCAL_FIX_MON_AVG_BAL",
|
| 35 |
+
"LOCAL_SAV_CUR_ALL_BAL",
|
| 36 |
+
"POS_CONSUME_TX_AMT",
|
| 37 |
+
"ATM_ALL_TX_NUM",
|
| 38 |
+
"COUNTER_ALL_TX_NUM",
|
| 39 |
+
]
|
| 40 |
+
CAT_COLS = ["GENDER_CD", "HASNT_HOME_ADDRESS_INF", "HASNT_MOBILE_TEL_NUM_INF"]
|
| 41 |
+
NUM_COLS = [c for c in FEATURES if c not in CAT_COLS]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def ensure_dirs() -> None:
|
| 45 |
+
MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
| 46 |
+
FIG_DIR.mkdir(parents=True, exist_ok=True)
|
| 47 |
+
TAB_DIR.mkdir(parents=True, exist_ok=True)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def step1_prepare() -> pd.DataFrame:
|
| 51 |
+
print("=" * 58)
|
| 52 |
+
print("STEP 1/3: Data Preparation")
|
| 53 |
+
print("=" * 58)
|
| 54 |
+
df = pd.read_csv(DATA_PATH)
|
| 55 |
+
keep = FEATURES + [TARGET]
|
| 56 |
+
missing = [c for c in keep if c not in df.columns]
|
| 57 |
+
if missing:
|
| 58 |
+
raise ValueError(f"Missing expected columns: {missing}")
|
| 59 |
+
|
| 60 |
+
df = df[keep].copy()
|
| 61 |
+
for c in CAT_COLS:
|
| 62 |
+
df[c] = df[c].astype(str)
|
| 63 |
+
for c in NUM_COLS + [TARGET]:
|
| 64 |
+
df[c] = pd.to_numeric(df[c], errors="coerce")
|
| 65 |
+
|
| 66 |
+
processed_path = OUT_DIR / "processed_bank_churn.csv"
|
| 67 |
+
df.to_csv(processed_path, index=False)
|
| 68 |
+
print(f"Rows: {len(df):,} | Cols: {df.shape[1]}")
|
| 69 |
+
print(f"Saved: {processed_path.relative_to(APP_DIR)}")
|
| 70 |
+
return df
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def build_pipeline() -> Pipeline:
|
| 74 |
+
numeric_pipe = Pipeline(
|
| 75 |
+
steps=[
|
| 76 |
+
("imputer", SimpleImputer(strategy="median")),
|
| 77 |
+
("scaler", StandardScaler()),
|
| 78 |
+
]
|
| 79 |
+
)
|
| 80 |
+
categorical_pipe = Pipeline(
|
| 81 |
+
steps=[
|
| 82 |
+
("imputer", SimpleImputer(strategy="most_frequent")),
|
| 83 |
+
("onehot", OneHotEncoder(handle_unknown="ignore")),
|
| 84 |
+
]
|
| 85 |
+
)
|
| 86 |
+
preprocess = ColumnTransformer(
|
| 87 |
+
transformers=[
|
| 88 |
+
("num", numeric_pipe, NUM_COLS),
|
| 89 |
+
("cat", categorical_pipe, CAT_COLS),
|
| 90 |
+
]
|
| 91 |
+
)
|
| 92 |
+
model = LogisticRegression(max_iter=1500, class_weight="balanced")
|
| 93 |
+
return Pipeline(steps=[("preprocess", preprocess), ("model", model)])
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def step2_train(df: pd.DataFrame) -> tuple[Pipeline, pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
|
| 97 |
+
print("\n" + "=" * 58)
|
| 98 |
+
print("STEP 2/3: Train Model + Artifacts")
|
| 99 |
+
print("=" * 58)
|
| 100 |
+
X = df[FEATURES].copy()
|
| 101 |
+
y = df[TARGET].astype(int)
|
| 102 |
+
|
| 103 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 104 |
+
X, y, test_size=0.2, random_state=42, stratify=y
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
pipe = build_pipeline()
|
| 108 |
+
pipe.fit(X_train, y_train)
|
| 109 |
+
|
| 110 |
+
proba = pipe.predict_proba(X_test)[:, 1]
|
| 111 |
+
pred = (proba >= 0.5).astype(int)
|
| 112 |
+
auc = float(roc_auc_score(y_test, proba))
|
| 113 |
+
|
| 114 |
+
model_path = MODELS_DIR / "pipeline.joblib"
|
| 115 |
+
joblib.dump(pipe, model_path)
|
| 116 |
+
print(f"Saved model: {model_path.relative_to(APP_DIR)}")
|
| 117 |
+
print(f"ROC-AUC: {auc:.4f}")
|
| 118 |
+
|
| 119 |
+
pred_df = X_test.copy()
|
| 120 |
+
pred_df["actual"] = y_test.to_numpy()
|
| 121 |
+
pred_df["churn_proba"] = proba
|
| 122 |
+
pred_df["churn_pred"] = pred
|
| 123 |
+
test_pred_path = TAB_DIR / "test_predictions.csv"
|
| 124 |
+
pred_df.to_csv(test_pred_path, index=False)
|
| 125 |
+
print(f"Saved: {test_pred_path.relative_to(APP_DIR)}")
|
| 126 |
+
|
| 127 |
+
r = permutation_importance(pipe, X_test, y_test, n_repeats=5, random_state=42, scoring="roc_auc")
|
| 128 |
+
fi = pd.DataFrame({"feature": FEATURES, "importance": r.importances_mean}).sort_values("importance", ascending=False)
|
| 129 |
+
fi_path = TAB_DIR / "feature_importance.csv"
|
| 130 |
+
fi.to_csv(fi_path, index=False)
|
| 131 |
+
|
| 132 |
+
plt.figure(figsize=(8, 4.5))
|
| 133 |
+
plt.barh(fi["feature"][::-1], fi["importance"][::-1])
|
| 134 |
+
plt.title("Feature Importance (Permutation)")
|
| 135 |
+
plt.xlabel("Importance")
|
| 136 |
+
plt.tight_layout()
|
| 137 |
+
fi_fig = FIG_DIR / "feature_importance.png"
|
| 138 |
+
plt.savefig(fi_fig, dpi=160)
|
| 139 |
+
plt.close()
|
| 140 |
+
print(f"Saved: {fi_path.relative_to(APP_DIR)}")
|
| 141 |
+
print(f"Saved: {fi_fig.relative_to(APP_DIR)}")
|
| 142 |
+
|
| 143 |
+
return pipe, X_train, y_train, X_test, y_test
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def step3_finalize(pipe: Pipeline, X_train: pd.DataFrame, y_train: pd.Series, X_test: pd.DataFrame, y_test: pd.Series) -> None:
|
| 147 |
+
print("\n" + "=" * 58)
|
| 148 |
+
print("STEP 3/3: Validation + SHAP Background Cache")
|
| 149 |
+
print("=" * 58)
|
| 150 |
+
bg = X_train.sample(min(80, len(X_train)), random_state=42)
|
| 151 |
+
bg_path = MODELS_DIR / "background_sample.csv"
|
| 152 |
+
bg.to_csv(bg_path, index=False)
|
| 153 |
+
|
| 154 |
+
proba = pipe.predict_proba(X_test)[:, 1]
|
| 155 |
+
meta = {
|
| 156 |
+
"features": FEATURES,
|
| 157 |
+
"categorical_features": CAT_COLS,
|
| 158 |
+
"numeric_features": NUM_COLS,
|
| 159 |
+
"target": TARGET,
|
| 160 |
+
"threshold": 0.5,
|
| 161 |
+
"positive_rate_test": float(np.mean(y_test)),
|
| 162 |
+
"mean_predicted_proba_test": float(np.mean(proba)),
|
| 163 |
+
}
|
| 164 |
+
meta_path = MODELS_DIR / "model_meta.json"
|
| 165 |
+
meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
| 166 |
+
print(f"Saved: {bg_path.relative_to(APP_DIR)}")
|
| 167 |
+
print(f"Saved: {meta_path.relative_to(APP_DIR)}")
|
| 168 |
+
print("Pipeline completed successfully.")
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def main() -> int:
|
| 172 |
+
ensure_dirs()
|
| 173 |
+
df = step1_prepare()
|
| 174 |
+
pipe, X_train, y_train, X_test, y_test = step2_train(df)
|
| 175 |
+
step3_finalize(pipe, X_train, y_train, X_test, y_test)
|
| 176 |
+
print("DONE")
|
| 177 |
+
return 0
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
raise SystemExit(main())
|
style.css
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
html, body, .gradio-container {
|
| 2 |
+
min-height: 100vh !important;
|
| 3 |
+
margin: 0 !important;
|
| 4 |
+
background: #09111f !important;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
body {
|
| 8 |
+
background-image: url('/gradio_api/file=assets/Bank%20Churn.png') !important;
|
| 9 |
+
background-size: cover !important;
|
| 10 |
+
background-repeat: no-repeat !important;
|
| 11 |
+
background-position: center center !important;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
.gradio-container {
|
| 15 |
+
background: transparent !important;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
#main_panel {
|
| 19 |
+
background: rgba(255,255,255,0.90);
|
| 20 |
+
border-radius: 22px;
|
| 21 |
+
padding: 18px;
|
| 22 |
+
box-shadow: 0 16px 40px rgba(0,0,0,0.30);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.soft-card {
|
| 26 |
+
background: rgba(255,255,255,0.86);
|
| 27 |
+
border-radius: 18px;
|
| 28 |
+
padding: 12px;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
.footer-note {
|
| 32 |
+
opacity: 0.8;
|
| 33 |
+
font-size: 0.92rem;
|
| 34 |
+
}
|