| """ |
| UPI-Sim Benchmark Runner |
| ========================= |
| Node-level temporal fraud risk prediction benchmark. |
| |
| Runs: 3 difficulties × 5 seeds × (TGN + GNN + baselines + ablations) |
| Reports: mean ± std for ROC-AUC, PR-AUC, Brier Score |
| """ |
|
|
| import os |
| import sys |
| import pickle |
| import time |
| import torch |
| import numpy as np |
| import pandas as pd |
|
|
| from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.ensemble import GradientBoostingClassifier |
| from sklearn.neural_network import MLPClassifier |
| from sklearn.preprocessing import StandardScaler |
|
|
| from src.core.config_loader import load_config |
| from src.generators.user_generator import generate_users |
| from src.generators.transaction_generator import generate_transactions |
| from src.fraud.fraud_engine import FraudEngine |
| from src.risk.risk_engine import apply_risk_engine |
| from src.graph.dataset_builder import build_graph_dataset |
| from src.tgn.train import train_tgn |
| from src.tgn.memory import Memory |
| from src.tgn.time_encoding import TimeEncoding |
| from src.gnn.train import train_gnn |
|
|
|
|
| |
| |
| |
|
|
| def temporal_split(df, train_ratio=0.7): |
| df = df.sort_values("timestamp") |
| split_time = df["timestamp"].quantile(train_ratio) |
| past = df[df["timestamp"] <= split_time] |
| return past, split_time |
|
|
|
|
| def build_node_features(df_past, all_nodes): |
| |
| |
| return np.zeros((len(all_nodes), 2), dtype=np.float32) |
|
|
|
|
| def build_node_labels(df, split_time, all_nodes, horizon=0.05): |
| t_end = df["timestamp"].max() |
| window_end = split_time + horizon * (t_end - split_time) |
| future = df[(df["timestamp"] > split_time) & (df["timestamp"] <= window_end)] |
| fraud = future.groupby("sender_id")["is_fraud"].max() |
| return np.array([fraud.get(u, 0) for u in all_nodes], dtype=np.float32) |
|
|
|
|
| def compute_ece(y_true, y_prob, n_bins=10): |
| """Expected Calibration Error.""" |
| bins = np.linspace(0, 1, n_bins + 1) |
| ece = 0.0 |
| for lo, hi in zip(bins[:-1], bins[1:]): |
| mask = (y_prob >= lo) & (y_prob < hi) |
| if mask.sum() == 0: |
| continue |
| frac = mask.sum() / len(y_true) |
| avg_conf = y_prob[mask].mean() |
| avg_acc = y_true[mask].mean() |
| ece += frac * abs(avg_conf - avg_acc) |
| return ece |
|
|
|
|
| def evaluate_metrics(y_true, y_prob): |
| """Compute ROC-AUC, PR-AUC, Brier, ECE, Expected Cost.""" |
| cost_fn = lambda y, p: ( |
| (y == 1) * (1 - p) * 5 |
| + (y == 0) * p * 1 |
| ) |
| expected_cost = cost_fn(y_true, y_prob).mean() |
| |
| return { |
| "roc": roc_auc_score(y_true, y_prob), |
| "pr": average_precision_score(y_true, y_prob), |
| "brier": brier_score_loss(y_true, y_prob), |
| "ece": compute_ece(y_true, y_prob), |
| "cost": expected_cost, |
| } |
|
|
|
|
| |
| |
| |
|
|
| def train_node_classifier(model, memory, x_node, y_node, num_epochs=100): |
| device = torch.device("cpu") |
| x = torch.tensor(x_node, dtype=torch.float32).to(device) |
| x = (x - x.mean(dim=0)) / (x.std(dim=0) + 1e-6) |
| y = torch.tensor(y_node, dtype=torch.float32).to(device) |
|
|
| for param in model.parameters(): |
| param.requires_grad = False |
| for param in model.node_classifier.parameters(): |
| param.requires_grad = True |
|
|
| optimizer = torch.optim.Adam(model.node_classifier.parameters(), lr=1e-3) |
| pw = torch.clamp((y == 0).sum().float() / (y == 1).sum().float(), max=10.0) |
| loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=pw) |
|
|
| model.train() |
| for epoch in range(num_epochs): |
| node_emb = memory.memory.detach() |
| combined = torch.cat([node_emb, x], dim=1) |
| logits = model.node_classifier(combined).squeeze(-1) |
| loss = loss_fn(logits, y) |
| optimizer.zero_grad() |
| loss.backward() |
| optimizer.step() |
|
|
| for param in model.parameters(): |
| param.requires_grad = True |
|
|
|
|
| def evaluate_tgn_node(model, memory, x_node, y_node, ablation=None): |
| device = torch.device("cpu") |
| x = torch.tensor(x_node, dtype=torch.float32).to(device) |
| x = (x - x.mean(dim=0)) / (x.std(dim=0) + 1e-6) |
| y_true = y_node.copy() |
|
|
| model.eval() |
| with torch.no_grad(): |
| node_emb = memory.memory.clone() |
|
|
| |
| if ablation == "no_memory": |
| node_emb = torch.zeros_like(node_emb) |
| if ablation == "no_features": |
| x = torch.zeros_like(x) |
|
|
| combined = torch.cat([node_emb, x], dim=1) |
| logits = model.node_classifier(combined).squeeze(-1) |
| probs = torch.sigmoid(logits).cpu().numpy() |
|
|
| return evaluate_metrics(y_true, probs) |
|
|
|
|
| def evaluate_gnn_node(model, graph_data, x_node, y_node): |
| device = torch.device("cpu") |
| edge_index = torch.tensor(graph_data["edge_index"], dtype=torch.long).to(device) |
| edge_attr = torch.tensor(graph_data["edge_attr"], dtype=torch.float32).to(device) |
| edge_attr = (edge_attr - edge_attr.mean(dim=0)) / (edge_attr.std(dim=0) + 1e-6) |
|
|
| x = torch.tensor(x_node, dtype=torch.float32).to(device) |
| x = (x - x.mean(dim=0)) / (x.std(dim=0) + 1e-6) |
| y_true = y_node.copy() |
|
|
| model.eval() |
| with torch.no_grad(): |
| edge_logits = model(x, edge_index, edge_attr, edge_index[0], edge_index[1]) |
| edge_probs = torch.sigmoid(edge_logits) |
|
|
| node_scores = torch.zeros(x.shape[0], device=device) |
| node_scores.index_add_(0, edge_index[0], edge_probs) |
| deg = torch.bincount(edge_index[0], minlength=x.shape[0]).float() + 1e-6 |
| node_scores = node_scores / deg |
|
|
| return evaluate_metrics(y_true, node_scores.cpu().numpy()) |
|
|
|
|
| |
| |
| |
|
|
| def run_baselines(x_node, y_node): |
| scaler = StandardScaler() |
| X = scaler.fit_transform(x_node) |
| y = y_node |
|
|
| results = {} |
|
|
| |
| lr = LogisticRegression(max_iter=500, class_weight="balanced") |
| lr.fit(X, y) |
| probs_lr = lr.predict_proba(X)[:, 1] |
| results["LogReg"] = evaluate_metrics(y, probs_lr) |
|
|
| |
| xgb = GradientBoostingClassifier(n_estimators=100, max_depth=4, random_state=42) |
| xgb.fit(X, y) |
| probs_xgb = xgb.predict_proba(X)[:, 1] |
| results["XGBoost"] = evaluate_metrics(y, probs_xgb) |
|
|
| |
| mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=300, random_state=42) |
| mlp.fit(X, y) |
| probs_mlp = mlp.predict_proba(X)[:, 1] |
| results["MLP"] = evaluate_metrics(y, probs_mlp) |
|
|
| return results |
|
|
|
|
| |
| |
| |
|
|
| def run_single(difficulty, config, users, seed=42): |
| """Run one seed for one difficulty. Returns dict of all metrics.""" |
| torch.manual_seed(seed) |
| np.random.seed(seed) |
|
|
| df = generate_transactions(users, config) |
| df = apply_risk_engine(df, users, config) |
| engine = FraudEngine(seed=seed, difficulty=difficulty) |
| df = engine.apply(df) |
| df = df.sort_values("timestamp").reset_index(drop=True) |
|
|
| graph_data = build_graph_dataset(df, users) |
|
|
| past, split_time = temporal_split(df) |
| all_nodes = sorted(df["sender_id"].unique()) |
| x_node = build_node_features(past, all_nodes) |
| y_node = build_node_labels(df, split_time, all_nodes, horizon=0.05) |
| node_fraud = y_node.mean() |
|
|
| results = {"node_fraud": node_fraud} |
|
|
| |
| tgn_model, memory, _, _ = train_tgn(graph_data, num_epochs=3) |
| train_node_classifier(tgn_model, memory, x_node, y_node, num_epochs=100) |
| results["TGN"] = evaluate_tgn_node(tgn_model, memory, x_node, y_node) |
|
|
| |
| results["TGN-no-mem"] = evaluate_tgn_node(tgn_model, memory, x_node, y_node, ablation="no_memory") |
| results["TGN-no-feat"] = evaluate_tgn_node(tgn_model, memory, x_node, y_node, ablation="no_features") |
|
|
| |
| gnn_model = train_gnn(graph_data) |
| results["GNN"] = evaluate_gnn_node(gnn_model, graph_data, x_node, y_node) |
|
|
| |
| baseline_results = run_baselines(x_node, y_node) |
| results.update(baseline_results) |
|
|
| return results |
|
|
|
|
| |
| |
| |
|
|
| SEEDS = [42, 43, 44, 45, 46] |
| DIFFICULTIES = ["easy", "medium", "hard"] |
| MODELS = ["TGN", "TGN-no-mem", "TGN-no-feat", "GNN", "LogReg", "XGBoost", "MLP"] |
| METRICS = ["roc", "pr", "brier", "ece", "cost"] |
|
|
|
|
| def main(): |
| config = load_config("config/default.yaml") |
| users = generate_users(config) |
|
|
| |
| all_results = {} |
|
|
| for diff in DIFFICULTIES: |
| all_results[diff] = {m: {k: [] for k in METRICS} for m in MODELS} |
| fraud_rates = [] |
|
|
| for seed in SEEDS: |
| print(f"\n{'='*50}") |
| print(f" {diff.upper()} | seed={seed}") |
| print(f"{'='*50}") |
|
|
| r = run_single(diff, config, users, seed=seed) |
| fraud_rates.append(r["node_fraud"]) |
|
|
| for model in MODELS: |
| for metric in METRICS: |
| all_results[diff][model][metric].append(r[model][metric]) |
|
|
| avg_fraud = np.mean(fraud_rates) |
| print(f"\n {diff} avg node fraud: {avg_fraud:.1%}") |
|
|
| |
| |
| |
| print("\n") |
| print("=" * 100) |
| print(" UPI-Sim BENCHMARK: Node-Level Fraud Risk Prediction") |
| print(" Task: predict user fraud in future window | 5 seeds | mean ± std") |
| print("=" * 100) |
|
|
| for diff in DIFFICULTIES: |
| fraud_avg = np.mean([all_results[diff][MODELS[0]]["roc"]]) |
| print(f"\n--- {diff.upper()} ---") |
| print(f"{'Model':<14} {'ROC-AUC':>14} {'PR-AUC':>14} {'Brier':>14} {'ECE':>14} {'Cost':>14}") |
| print("-" * 88) |
|
|
| for model in MODELS: |
| row = [] |
| for metric in METRICS: |
| vals = all_results[diff][model][metric] |
| m, s = np.mean(vals), np.std(vals) |
| row.append(f"{m:.4f}±{s:.4f}") |
|
|
| print(f"{model:<14} {row[0]:>14} {row[1]:>14} {row[2]:>14} {row[3]:>14} {row[4]:>14}") |
|
|
| |
| |
| |
| print(f"\n{'='*65}") |
| print(f" DIFFICULTY SCALING LAW: TGN Advantage (Δ ROC-AUC)") |
| print(f"{'='*65}") |
| print(f"{'Difficulty':<14} | {'Δ(TGN - GNN)':>15} | {'Δ(TGN - XGBoost)':>15}") |
| print("-" * 52) |
|
|
| for diff in DIFFICULTIES: |
| tgn_rocs = all_results[diff]["TGN"]["roc"] |
| gnn_rocs = all_results[diff]["GNN"]["roc"] |
| xgb_rocs = all_results[diff]["XGBoost"]["roc"] |
| |
| gaps_gnn = [t - g for t, g in zip(tgn_rocs, gnn_rocs)] |
| gaps_xgb = [t - x for t, x in zip(tgn_rocs, xgb_rocs)] |
| |
| gnn_str = f"{np.mean(gaps_gnn):+.4f} ± {np.std(gaps_gnn):.4f}" |
| xgb_str = f"{np.mean(gaps_xgb):+.4f} ± {np.std(gaps_xgb):.4f}" |
| |
| print(f"{diff:<14} | {gnn_str:>15} | {xgb_str:>15}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |