Spaces:

XRachel
/

bc5

Runtime error

App Files Files Community

XRachel commited on Mar 16

Commit

fde2bc0

verified ·

1 Parent(s): 194fb23

Upload 9 files

Browse files

Files changed (10) hide show

.gitattributes +1 -0
Dockerfile +18 -0
README.md +29 -5
app.py +281 -0
assets/Bank Churn.png +3 -0
data/bankChurn.csv +0 -0
data/batch_template.csv +2 -0
requirements.txt +7 -0
scripts/pipeline.py +181 -0
style.css +34 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/Bank[[:space:]]Churn.png filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+WORKDIR /app
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /app/requirements.txt
+COPY . /app
+EXPOSE 7860
+CMD ["python", "-u", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,34 @@
 ---
-title: Bc5
-emoji: 📉
-colorFrom: purple
-colorTo: yellow
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Bank Churn Pro Demo
+emoji: 🏦
+colorFrom: blue
+colorTo: indigo
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# Bank Churn Pro Demo
+A Hugging Face Docker Space for bank customer churn analysis with:
+- Full-screen Bank Churn background UI
+- Pipeline Step 1/2/3 execution log
+- Feature importance chart
+- Churn probability gauge
+- CSV batch prediction
+- SHAP explainability
+## Included files
+- `app.py` - Gradio app
+- `scripts/pipeline.py` - training / artifact generation pipeline
+- `data/bankChurn.csv` - sample dataset
+- `assets/Bank Churn.png` - background image
+## Expected workflow
+1. Open the Space
+2. Go to **Pipeline** and click **Run Pipeline**
+3. Wait for the 3-step pipeline to finish
+4. Use **Single Prediction**, **Batch CSV**, and **Explainability** tabs

app.py ADDED Viewed

	@@ -0,0 +1,281 @@

+from __future__ import annotations
+import json
+import os
+import subprocess
+from pathlib import Path
+from typing import Generator
+import gradio as gr
+import joblib
+import matplotlib.pyplot as plt
+import pandas as pd
+import shap
+APP_DIR = Path(__file__).parent.resolve()
+STYLE_FILE = APP_DIR / "style.css"
+ASSETS_DIR = APP_DIR / "assets"
+DATA_DIR = APP_DIR / "data"
+MODELS_DIR = APP_DIR / "models"
+OUT_DIR = APP_DIR / "outputs"
+FIG_DIR = OUT_DIR / "figures"
+TAB_DIR = OUT_DIR / "tables"
+MODEL_FILE = MODELS_DIR / "pipeline.joblib"
+META_FILE = MODELS_DIR / "model_meta.json"
+BG_FILE = MODELS_DIR / "background_sample.csv"
+TEMPLATE_CSV = DATA_DIR / "batch_template.csv"
+DEFAULTS = {
+    "AGE": 42,
+    "OPEN_ACC_DUR": 120,
+    "GENDER_CD": "1",
+    "HASNT_HOME_ADDRESS_INF": "N",
+    "HASNT_MOBILE_TEL_NUM_INF": "N",
+    "LOCAL_CUR_MON_AVG_BAL": 25000.0,
+    "LOCAL_FIX_MON_AVG_BAL": 18000.0,
+    "LOCAL_SAV_CUR_ALL_BAL": 28000.0,
+    "POS_CONSUME_TX_AMT": 5000.0,
+    "ATM_ALL_TX_NUM": 6,
+    "COUNTER_ALL_TX_NUM": 2,
+}
+FEATURES = list(DEFAULTS.keys())
+PIPE = None
+META = None
+def ensure_template_csv() -> None:
+    if not TEMPLATE_CSV.exists():
+        pd.DataFrame([DEFAULTS]).to_csv(TEMPLATE_CSV, index=False)
+def load_assets() -> tuple[object | None, dict | None]:
+    pipe = joblib.load(MODEL_FILE) if MODEL_FILE.exists() else None
+    meta = json.loads(META_FILE.read_text(encoding="utf-8")) if META_FILE.exists() else None
+    return pipe, meta
+def refresh_model_state() -> str:
+    global PIPE, META
+    PIPE, META = load_assets()
+    if PIPE is None:
+        return "⚠️ 当前为演示状态：请先在 Pipeline 标签页点击 **Run Pipeline** 生成模型。"
+    return "✅ 模型已加载，可以进行单条预测、批量预测和 SHAP 解释。"
+def gauge_html(prob: float) -> str:
+    pct = max(0.0, min(100.0, prob * 100.0))
+    color = "#16a34a" if prob < 0.35 else ("#f59e0b" if prob < 0.65 else "#dc2626")
+    return f"""
+    <div style='background:rgba(255,255,255,0.88);padding:16px;border-radius:18px'>
+      <div style='font-size:18px;font-weight:700;margin-bottom:8px'>Churn Probability Gauge</div>
+      <div style='width:100%;height:20px;background:#e5e7eb;border-radius:999px;overflow:hidden'>
+        <div style='width:{pct:.1f}%;height:20px;background:{color};border-radius:999px'></div>
+      </div>
+      <div style='margin-top:10px;font-size:28px;font-weight:800;color:{color}'>{pct:.1f}%</div>
+    </div>
+    """
+def input_df(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
+             local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
+             pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num) -> pd.DataFrame:
+    return pd.DataFrame([{
+        "AGE": int(age),
+        "OPEN_ACC_DUR": int(open_acc_dur),
+        "GENDER_CD": str(gender_cd),
+        "HASNT_HOME_ADDRESS_INF": str(hasnt_home_address_inf),
+        "HASNT_MOBILE_TEL_NUM_INF": str(hasnt_mobile_tel_num_inf),
+        "LOCAL_CUR_MON_AVG_BAL": float(local_cur_mon_avg_bal),
+        "LOCAL_FIX_MON_AVG_BAL": float(local_fix_mon_avg_bal),
+        "LOCAL_SAV_CUR_ALL_BAL": float(local_sav_cur_all_bal),
+        "POS_CONSUME_TX_AMT": float(pos_consume_tx_amt),
+        "ATM_ALL_TX_NUM": int(atm_all_tx_num),
+        "COUNTER_ALL_TX_NUM": int(counter_all_tx_num),
+    }])
+def predict_single(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
+                   local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
+                   pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num):
+    if PIPE is None:
+        return {"error": "Run Pipeline first."}, "请先运行 Pipeline。", gauge_html(0.0), None
+    df = input_df(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
+                  local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
+                  pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num)
+    prob = float(PIPE.predict_proba(df)[0, 1])
+    pred = int(prob >= 0.5)
+    risk = "低风险" if prob < 0.35 else ("中风险" if prob < 0.65 else "高风险")
+    payload = {
+        "churn_probability": round(prob, 6),
+        "predicted_label": pred,
+        "risk_level": risk,
+    }
+    summary = f"**预测结果**：{'流失' if pred == 1 else '留存'}  \n\n**概率**：{prob:.2%}  \n**风险等级**：{risk}"
+    return payload, summary, gauge_html(prob), None
+def predict_batch(file_obj):
+    if PIPE is None:
+        return None, None, "请先运行 Pipeline。"
+    if file_obj is None:
+        return None, None, "请先上传 CSV。"
+    df = pd.read_csv(file_obj.name)
+    missing = [c for c in FEATURES if c not in df.columns]
+    if missing:
+        return None, None, f"CSV 缺少列：{missing}"
+    x = df[FEATURES].copy()
+    proba = PIPE.predict_proba(x)[:, 1]
+    pred = (proba >= 0.5).astype(int)
+    out = df.copy()
+    out["churn_proba"] = proba
+    out["churn_pred"] = pred
+    out_path = OUT_DIR / "batch_predictions.csv"
+    out.to_csv(out_path, index=False)
+    return out.head(50), str(out_path), "批量预测完成。"
+def make_feature_importance_plot():
+    fp = TAB_DIR / "feature_importance.csv"
+    if not fp.exists():
+        return None
+    fi = pd.read_csv(fp)
+    plt.figure(figsize=(8, 4.5))
+    plt.barh(fi["feature"][::-1], fi["importance"][::-1])
+    plt.title("Feature Importance")
+    plt.xlabel("Importance")
+    plt.tight_layout()
+    fig_path = FIG_DIR / "feature_importance_runtime.png"
+    plt.savefig(fig_path, dpi=160)
+    plt.close()
+    return str(fig_path)
+def explain_single(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
+                   local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
+                   pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num):
+    if PIPE is None or not BG_FILE.exists():
+        return None, "请先运行 Pipeline。"
+    row = input_df(age, open_acc_dur, gender_cd, hasnt_home_address_inf, hasnt_mobile_tel_num_inf,
+                   local_cur_mon_avg_bal, local_fix_mon_avg_bal, local_sav_cur_all_bal,
+                   pos_consume_tx_amt, atm_all_tx_num, counter_all_tx_num)
+    background = pd.read_csv(BG_FILE)
+    background = background[FEATURES].head(40)
+    def f(x):
+        x_df = pd.DataFrame(x, columns=FEATURES)
+        for c in ["GENDER_CD", "HASNT_HOME_ADDRESS_INF", "HASNT_MOBILE_TEL_NUM_INF"]:
+            x_df[c] = x_df[c].astype(str)
+        for c in [col for col in FEATURES if col not in ["GENDER_CD", "HASNT_HOME_ADDRESS_INF", "HASNT_MOBILE_TEL_NUM_INF"]]:
+            x_df[c] = pd.to_numeric(x_df[c], errors="coerce")
+        return PIPE.predict_proba(x_df)[:, 1]
+    explainer = shap.Explainer(f, background, feature_names=FEATURES)
+    sv = explainer(row)
+    plt.figure(figsize=(9, 4.8))
+    shap.plots.waterfall(sv[0], max_display=10, show=False)
+    plt.tight_layout()
+    out_path = FIG_DIR / "shap_waterfall.png"
+    plt.savefig(out_path, dpi=160, bbox_inches="tight")
+    plt.close()
+    prob = float(PIPE.predict_proba(row)[0, 1])
+    txt = f"SHAP 解释已生成。该客户流失概率约为 **{prob:.2%}**。"
+    return str(out_path), txt
+def run_pipeline_stream() -> Generator[tuple[str, str, str], None, None]:
+    log_lines = []
+    cmd = ["python", "-u", str(APP_DIR / "scripts" / "pipeline.py")]
+    proc = subprocess.Popen(cmd, cwd=str(APP_DIR), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
+    assert proc.stdout is not None
+    yield "", "⏳ Pipeline 正在运行...", refresh_model_state()
+    for line in proc.stdout:
+        log_lines.append(line.rstrip("\n"))
+        if len(log_lines) > 400:
+            log_lines = log_lines[-400:]
+        yield "\n".join(log_lines), "⏳ Pipeline 正在运行...", refresh_model_state()
+    rc = proc.wait()
+    status = "✅ Pipeline 运行完成。" if rc == 0 else f"❌ Pipeline 失败，退出码 {rc}。"
+    model_status = refresh_model_state()
+    yield "\n".join(log_lines), status, model_status
+def build_ui():
+    ensure_template_csv()
+    gr.set_static_paths(paths=[str(ASSETS_DIR)])
+    css = STYLE_FILE.read_text(encoding="utf-8") if STYLE_FILE.exists() else ""
+    model_status = refresh_model_state()
+    with gr.Blocks() as demo:
+        gr.HTML(f"<style>{css}</style>")
+        with gr.Column(elem_id="main_panel"):
+            gr.Markdown("# 🏦 Bank Churn Pro Demo\n全屏背景 + Pipeline 日志 + 特征重要性 + 概率仪表盘 + CSV 批量预测 + SHAP 解释")
+            model_state_md = gr.Markdown(model_status)
+            pipeline_status_md = gr.Markdown("尚未运行 Pipeline。")
+            with gr.Tabs():
+                with gr.Tab("Pipeline"):
+                    gr.Markdown("点击按钮执行 3 步流水线：数据准备 → 模型训练与特征重要性 → 验证与 SHAP 背景缓存")
+                    run_btn = gr.Button("▶ Run Pipeline", variant="primary")
+                    log_box = gr.Textbox(label="Pipeline Step 1/2/3 日志", lines=22, interactive=False)
+                    fi_image = gr.Image(label="Feature Importance 图", type="filepath")
+                    run_btn.click(fn=run_pipeline_stream, inputs=[], outputs=[log_box, pipeline_status_md, model_state_md]).then(fn=make_feature_importance_plot, inputs=[], outputs=fi_image)
+                with gr.Tab("Single Prediction"):
+                    with gr.Row():
+                        with gr.Column():
+                            age = gr.Slider(18, 100, value=DEFAULTS["AGE"], step=1, label="AGE")
+                            open_acc_dur = gr.Slider(0, 400, value=DEFAULTS["OPEN_ACC_DUR"], step=1, label="OPEN_ACC_DUR")
+                            gender_cd = gr.Dropdown(choices=["0", "1"], value=DEFAULTS["GENDER_CD"], label="GENDER_CD")
+                            hasnt_home = gr.Dropdown(choices=["N", "Y"], value=DEFAULTS["HASNT_HOME_ADDRESS_INF"], label="HASNT_HOME_ADDRESS_INF")
+                            hasnt_mobile = gr.Dropdown(choices=["N", "Y"], value=DEFAULTS["HASNT_MOBILE_TEL_NUM_INF"], label="HASNT_MOBILE_TEL_NUM_INF")
+                            local_cur = gr.Number(value=DEFAULTS["LOCAL_CUR_MON_AVG_BAL"], label="LOCAL_CUR_MON_AVG_BAL")
+                            local_fix = gr.Number(value=DEFAULTS["LOCAL_FIX_MON_AVG_BAL"], label="LOCAL_FIX_MON_AVG_BAL")
+                            local_sav = gr.Number(value=DEFAULTS["LOCAL_SAV_CUR_ALL_BAL"], label="LOCAL_SAV_CUR_ALL_BAL")
+                            pos_amt = gr.Number(value=DEFAULTS["POS_CONSUME_TX_AMT"], label="POS_CONSUME_TX_AMT")
+                            atm_num = gr.Slider(0, 100, value=DEFAULTS["ATM_ALL_TX_NUM"], step=1, label="ATM_ALL_TX_NUM")
+                            counter_num = gr.Slider(0, 100, value=DEFAULTS["COUNTER_ALL_TX_NUM"], step=1, label="COUNTER_ALL_TX_NUM")
+                            pred_btn = gr.Button("Predict", variant="primary")
+                        with gr.Column():
+                            pred_json = gr.JSON(label="Prediction JSON")
+                            pred_md = gr.Markdown()
+                            gauge = gr.HTML(label="Gauge")
+                    pred_btn.click(
+                        fn=predict_single,
+                        inputs=[age, open_acc_dur, gender_cd, hasnt_home, hasnt_mobile, local_cur, local_fix, local_sav, pos_amt, atm_num, counter_num],
+                        outputs=[pred_json, pred_md, gauge, fi_image],
+                    )
+                with gr.Tab("CSV Batch"):
+                    gr.Markdown("上传包含以下列的 CSV：" + ", ".join(FEATURES))
+                    with gr.Row():
+                        batch_file = gr.File(label="Upload CSV", file_types=[".csv"])
+                        template_file = gr.File(value=str(TEMPLATE_CSV), label="Template CSV")
+                    batch_btn = gr.Button("Run Batch Prediction")
+                    batch_df = gr.Dataframe(label="Preview (Top 50)")
+                    batch_out_file = gr.File(label="Download Result CSV")
+                    batch_msg = gr.Markdown()
+                    batch_btn.click(fn=predict_batch, inputs=[batch_file], outputs=[batch_df, batch_out_file, batch_msg])
+                with gr.Tab("Explainability"):
+                    gr.Markdown("使用当前表单中的同一组输入生成 SHAP waterfall 图。")
+                    explain_btn = gr.Button("Generate SHAP Explainability")
+                    shap_image = gr.Image(label="SHAP Explainability", type="filepath")
+                    shap_md = gr.Markdown()
+                    explain_btn.click(
+                        fn=explain_single,
+                        inputs=[age, open_acc_dur, gender_cd, hasnt_home, hasnt_mobile, local_cur, local_fix, local_sav, pos_amt, atm_num, counter_num],
+                        outputs=[shap_image, shap_md],
+                    )
+            gr.Markdown("<div class='footer-note'>提示：首次进入请先运行 Pipeline，再使用预测、批量预测和解释功能。</div>")
+    return demo
+if __name__ == "__main__":
+    demo = build_ui()
+    demo.queue()
+    port = int(os.environ.get("PORT", "7860"))
+    demo.launch(server_name="0.0.0.0", server_port=port)

assets/Bank Churn.png ADDED Viewed

Git LFS Details

SHA256: 1ce095ba6507023114cb93cb1cca0c5ef4a2153a17b14545d40f1bbd99ef16eb
Pointer size: 132 Bytes
Size of remote file: 4.54 MB

data/bankChurn.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/batch_template.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ AGE,OPEN_ACC_DUR,GENDER_CD,HASNT_HOME_ADDRESS_INF,HASNT_MOBILE_TEL_NUM_INF,LOCAL_CUR_MON_AVG_BAL,LOCAL_FIX_MON_AVG_BAL,LOCAL_SAV_CUR_ALL_BAL,POS_CONSUME_TX_AMT,ATM_ALL_TX_NUM,COUNTER_ALL_TX_NUM
2	+ 42,120,1,N,N,25000.0,18000.0,28000.0,5000.0,6,2

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==4.44.1
+pandas>=2.0.0
+numpy>=1.24.0
+scikit-learn>=1.3.0
+joblib>=1.3.0
+matplotlib>=3.8.0
+shap>=0.45.0

scripts/pipeline.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+import joblib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.inspection import permutation_importance
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+APP_DIR = Path(__file__).resolve().parents[1]
+DATA_PATH = APP_DIR / "data" / "bankChurn.csv"
+MODELS_DIR = APP_DIR / "models"
+OUT_DIR = APP_DIR / "outputs"
+FIG_DIR = OUT_DIR / "figures"
+TAB_DIR = OUT_DIR / "tables"
+TARGET = "CHURN_CUST_IND"
+FEATURES = [
+    "AGE",
+    "OPEN_ACC_DUR",
+    "GENDER_CD",
+    "HASNT_HOME_ADDRESS_INF",
+    "HASNT_MOBILE_TEL_NUM_INF",
+    "LOCAL_CUR_MON_AVG_BAL",
+    "LOCAL_FIX_MON_AVG_BAL",
+    "LOCAL_SAV_CUR_ALL_BAL",
+    "POS_CONSUME_TX_AMT",
+    "ATM_ALL_TX_NUM",
+    "COUNTER_ALL_TX_NUM",
+]
+CAT_COLS = ["GENDER_CD", "HASNT_HOME_ADDRESS_INF", "HASNT_MOBILE_TEL_NUM_INF"]
+NUM_COLS = [c for c in FEATURES if c not in CAT_COLS]
+def ensure_dirs() -> None:
+    MODELS_DIR.mkdir(parents=True, exist_ok=True)
+    FIG_DIR.mkdir(parents=True, exist_ok=True)
+    TAB_DIR.mkdir(parents=True, exist_ok=True)
+def step1_prepare() -> pd.DataFrame:
+    print("=" * 58)
+    print("STEP 1/3: Data Preparation")
+    print("=" * 58)
+    df = pd.read_csv(DATA_PATH)
+    keep = FEATURES + [TARGET]
+    missing = [c for c in keep if c not in df.columns]
+    if missing:
+        raise ValueError(f"Missing expected columns: {missing}")
+    df = df[keep].copy()
+    for c in CAT_COLS:
+        df[c] = df[c].astype(str)
+    for c in NUM_COLS + [TARGET]:
+        df[c] = pd.to_numeric(df[c], errors="coerce")
+    processed_path = OUT_DIR / "processed_bank_churn.csv"
+    df.to_csv(processed_path, index=False)
+    print(f"Rows: {len(df):,} | Cols: {df.shape[1]}")
+    print(f"Saved: {processed_path.relative_to(APP_DIR)}")
+    return df
+def build_pipeline() -> Pipeline:
+    numeric_pipe = Pipeline(
+        steps=[
+            ("imputer", SimpleImputer(strategy="median")),
+            ("scaler", StandardScaler()),
+        ]
+    )
+    categorical_pipe = Pipeline(
+        steps=[
+            ("imputer", SimpleImputer(strategy="most_frequent")),
+            ("onehot", OneHotEncoder(handle_unknown="ignore")),
+        ]
+    )
+    preprocess = ColumnTransformer(
+        transformers=[
+            ("num", numeric_pipe, NUM_COLS),
+            ("cat", categorical_pipe, CAT_COLS),
+        ]
+    )
+    model = LogisticRegression(max_iter=1500, class_weight="balanced")
+    return Pipeline(steps=[("preprocess", preprocess), ("model", model)])
+def step2_train(df: pd.DataFrame) -> tuple[Pipeline, pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
+    print("\n" + "=" * 58)
+    print("STEP 2/3: Train Model + Artifacts")
+    print("=" * 58)
+    X = df[FEATURES].copy()
+    y = df[TARGET].astype(int)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    pipe = build_pipeline()
+    pipe.fit(X_train, y_train)
+    proba = pipe.predict_proba(X_test)[:, 1]
+    pred = (proba >= 0.5).astype(int)
+    auc = float(roc_auc_score(y_test, proba))
+    model_path = MODELS_DIR / "pipeline.joblib"
+    joblib.dump(pipe, model_path)
+    print(f"Saved model: {model_path.relative_to(APP_DIR)}")
+    print(f"ROC-AUC: {auc:.4f}")
+    pred_df = X_test.copy()
+    pred_df["actual"] = y_test.to_numpy()
+    pred_df["churn_proba"] = proba
+    pred_df["churn_pred"] = pred
+    test_pred_path = TAB_DIR / "test_predictions.csv"
+    pred_df.to_csv(test_pred_path, index=False)
+    print(f"Saved: {test_pred_path.relative_to(APP_DIR)}")
+    r = permutation_importance(pipe, X_test, y_test, n_repeats=5, random_state=42, scoring="roc_auc")
+    fi = pd.DataFrame({"feature": FEATURES, "importance": r.importances_mean}).sort_values("importance", ascending=False)
+    fi_path = TAB_DIR / "feature_importance.csv"
+    fi.to_csv(fi_path, index=False)
+    plt.figure(figsize=(8, 4.5))
+    plt.barh(fi["feature"][::-1], fi["importance"][::-1])
+    plt.title("Feature Importance (Permutation)")
+    plt.xlabel("Importance")
+    plt.tight_layout()
+    fi_fig = FIG_DIR / "feature_importance.png"
+    plt.savefig(fi_fig, dpi=160)
+    plt.close()
+    print(f"Saved: {fi_path.relative_to(APP_DIR)}")
+    print(f"Saved: {fi_fig.relative_to(APP_DIR)}")
+    return pipe, X_train, y_train, X_test, y_test
+def step3_finalize(pipe: Pipeline, X_train: pd.DataFrame, y_train: pd.Series, X_test: pd.DataFrame, y_test: pd.Series) -> None:
+    print("\n" + "=" * 58)
+    print("STEP 3/3: Validation + SHAP Background Cache")
+    print("=" * 58)
+    bg = X_train.sample(min(80, len(X_train)), random_state=42)
+    bg_path = MODELS_DIR / "background_sample.csv"
+    bg.to_csv(bg_path, index=False)
+    proba = pipe.predict_proba(X_test)[:, 1]
+    meta = {
+        "features": FEATURES,
+        "categorical_features": CAT_COLS,
+        "numeric_features": NUM_COLS,
+        "target": TARGET,
+        "threshold": 0.5,
+        "positive_rate_test": float(np.mean(y_test)),
+        "mean_predicted_proba_test": float(np.mean(proba)),
+    }
+    meta_path = MODELS_DIR / "model_meta.json"
+    meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
+    print(f"Saved: {bg_path.relative_to(APP_DIR)}")
+    print(f"Saved: {meta_path.relative_to(APP_DIR)}")
+    print("Pipeline completed successfully.")
+def main() -> int:
+    ensure_dirs()
+    df = step1_prepare()
+    pipe, X_train, y_train, X_test, y_test = step2_train(df)
+    step3_finalize(pipe, X_train, y_train, X_test, y_test)
+    print("DONE")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

style.css ADDED Viewed

	@@ -0,0 +1,34 @@

+html, body, .gradio-container {
+  min-height: 100vh !important;
+  margin: 0 !important;
+  background: #09111f !important;
+}
+body {
+  background-image: url('/gradio_api/file=assets/Bank%20Churn.png') !important;
+  background-size: cover !important;
+  background-repeat: no-repeat !important;
+  background-position: center center !important;
+}
+.gradio-container {
+  background: transparent !important;
+}
+#main_panel {
+  background: rgba(255,255,255,0.90);
+  border-radius: 22px;
+  padding: 18px;
+  box-shadow: 0 16px 40px rgba(0,0,0,0.30);
+}
+.soft-card {
+  background: rgba(255,255,255,0.86);
+  border-radius: 18px;
+  padding: 12px;
+}
+.footer-note {
+  opacity: 0.8;
+  font-size: 0.92rem;
+}