File size: 3,010 Bytes
ae7c16d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from __future__ import annotations

from pathlib import Path

import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

from scripts.train_xgb import TARGET, _read_joined, _select_columns, _freq_encode


def _group_metrics(y: np.ndarray, p: np.ndarray, group: pd.Series) -> pd.DataFrame:
    rows = []
    group = group.fillna("missing").astype(str)
    group = group.reset_index(drop=True)
    for g in group.unique().tolist():
        mask = (group == g).to_numpy()
        n = int(mask.sum())
        if n < 500:
            continue
        yy = y[mask]
        pp = p[mask]
        auc = float("nan")
        if len(np.unique(yy)) == 2:
            auc = float(roc_auc_score(yy, pp))
        rows.append(
            {
                "group": str(g),
                "n": n,
                "fraud_rate": float(np.mean(yy)),
                "mean_score": float(np.mean(pp)),
                "roc_auc": auc,
            }
        )
    return pd.DataFrame(rows).sort_values("n", ascending=False)


def main() -> None:
    repo_root = Path(__file__).resolve().parents[1]
    raw_dir = repo_root / "data" / "raw"
    artifacts_path = repo_root / "artifacts" / "model.joblib"
    out_dir = repo_root / "reports"
    out_dir.mkdir(parents=True, exist_ok=True)

    df = _read_joined(raw_dir, max_rows=200000)
    df, numeric, categorical = _select_columns(df)
    df = df.dropna(subset=[TARGET]).copy()

    y = df[TARGET].astype(int).to_numpy()
    X = df.drop(columns=[TARGET])

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42, stratify=y
    )
    X_test = X_test.reset_index(drop=True)

    payload = joblib.load(artifacts_path)
    model = payload["model"]
    imputer = payload["imputer"]
    cat_cols = list(payload.get("categorical_cols") or [])

    X_train_enc, X_test_enc, _ = _freq_encode(X_train, X_test, cat_cols)
    Xte = imputer.transform(X_test_enc)
    p = model.predict_proba(Xte)[:, 1]

    product = X_test.get("ProductCD", pd.Series([None] * len(X_test)))
    device = X_test.get("DeviceType", pd.Series([None] * len(X_test)))

    product_table = _group_metrics(y_test, p, product)
    device_table = _group_metrics(y_test, p, device)

    md = []
    md.append("## Bias audit (limited slices)\n")
    md.append(
        "This audit checks basic performance skews across a couple of available categorical slices. It is not a substitute for a full fairness review.\n"
    )
    md.append("### Slice: ProductCD\n")
    md.append(product_table.to_markdown(index=False) if not product_table.empty else "_Not available._")
    md.append("\n\n### Slice: DeviceType\n")
    md.append(device_table.to_markdown(index=False) if not device_table.empty else "_Not available._")
    md.append("\n")

    out_path = out_dir / "bias_audit.md"
    out_path.write_text("\n".join(md))
    print(f"Wrote {out_path}")


if __name__ == "__main__":
    main()