| """Run one DeepWalk/Node2Vec ablation config on top of the fixed stacker.""" |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| from pathlib import Path |
|
|
| import numpy as np |
| import pandas as pd |
|
|
| import randomwalk_systematic_ablation as rw |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--package-root", type=Path, default=Path(__file__).resolve().parents[1]) |
| parser.add_argument("--split-seed", type=int, default=202) |
| parser.add_argument("--main-val-score-file", type=Path, required=True) |
| parser.add_argument("--version-name", required=True) |
| parser.add_argument("--workers", type=int, default=8) |
| parser.add_argument("--seed", type=int, default=202) |
| parser.add_argument("--n-splits", type=int, default=5) |
| args = parser.parse_args() |
|
|
| root = args.package_root |
| out_dir = root / "validation_runs" / f"dynamic_seed{args.split_seed}" / "randomwalk_systematic" |
| model_dir = out_dir / "models" |
| out_dir.mkdir(parents=True, exist_ok=True) |
| model_dir.mkdir(parents=True, exist_ok=True) |
|
|
| cfgs = {c.version_name: c for c in rw.small_configs() + rw.graph_configs() + rw.extra_configs()} |
| if args.version_name not in cfgs: |
| raise SystemExit(f"unknown version_name={args.version_name}; known={sorted(cfgs)}") |
| cfg = cfgs[args.version_name] |
|
|
| train_refs, pairs, y, X_base = rw.build_base_features(root, args.split_seed, args.main_val_score_file) |
| print(f"=== {cfg.version_name} ===") |
| G = rw.build_graph(root, train_refs, cfg.graph_type) |
| print(f"graph_type={cfg.graph_type} nodes={G.number_of_nodes()} edges={G.number_of_edges()}") |
| model = rw.train_model(G, cfg, model_dir, args.workers) |
| block, _ = rw.pair_feature_block(model, pairs, cfg, root, args.split_seed, train_refs) |
| X = np.column_stack([X_base, block]).astype(np.float32) |
| oof = rw.fit_lgb_oof(X, y, args.seed, args.n_splits) |
| f1, th, auc, p, r = rw.best_f1(y, oof) |
| np.save(out_dir / f"{cfg.version_name}_oof.npy", oof) |
|
|
| row = { |
| "version_name": cfg.version_name, |
| "graph_type": cfg.graph_type, |
| "method": cfg.method, |
| "dim": cfg.dim, |
| "walk_length": cfg.walk_length, |
| "num_walks": cfg.num_walks, |
| "window": cfg.window, |
| "p": cfg.p, |
| "q": cfg.q, |
| "validation_F1": f1, |
| "threshold": th, |
| "auc": auc, |
| "precision": p, |
| "recall": r, |
| } |
| path = out_dir / f"one_{cfg.version_name}_ablation.csv" |
| pd.DataFrame([row]).to_csv(path, index=False) |
| print(pd.DataFrame([row]).to_string(index=False)) |
| print(path) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|