cs3319-project2 / code /randomwalk_one_ablation.py
NLP-beginner's picture
CS3319 Project 2 final deliverable (public F1 = 0.96626)
f28d994
Raw
History Blame Contribute Delete
2.64 kB
"""Run one DeepWalk/Node2Vec ablation config on top of the fixed stacker."""
from __future__ import annotations
import argparse
from pathlib import Path
import numpy as np
import pandas as pd
import randomwalk_systematic_ablation as rw
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--package-root", type=Path, default=Path(__file__).resolve().parents[1])
parser.add_argument("--split-seed", type=int, default=202)
parser.add_argument("--main-val-score-file", type=Path, required=True)
parser.add_argument("--version-name", required=True)
parser.add_argument("--workers", type=int, default=8)
parser.add_argument("--seed", type=int, default=202)
parser.add_argument("--n-splits", type=int, default=5)
args = parser.parse_args()
root = args.package_root
out_dir = root / "validation_runs" / f"dynamic_seed{args.split_seed}" / "randomwalk_systematic"
model_dir = out_dir / "models"
out_dir.mkdir(parents=True, exist_ok=True)
model_dir.mkdir(parents=True, exist_ok=True)
cfgs = {c.version_name: c for c in rw.small_configs() + rw.graph_configs() + rw.extra_configs()}
if args.version_name not in cfgs:
raise SystemExit(f"unknown version_name={args.version_name}; known={sorted(cfgs)}")
cfg = cfgs[args.version_name]
train_refs, pairs, y, X_base = rw.build_base_features(root, args.split_seed, args.main_val_score_file)
print(f"=== {cfg.version_name} ===")
G = rw.build_graph(root, train_refs, cfg.graph_type)
print(f"graph_type={cfg.graph_type} nodes={G.number_of_nodes()} edges={G.number_of_edges()}")
model = rw.train_model(G, cfg, model_dir, args.workers)
block, _ = rw.pair_feature_block(model, pairs, cfg, root, args.split_seed, train_refs)
X = np.column_stack([X_base, block]).astype(np.float32)
oof = rw.fit_lgb_oof(X, y, args.seed, args.n_splits)
f1, th, auc, p, r = rw.best_f1(y, oof)
np.save(out_dir / f"{cfg.version_name}_oof.npy", oof)
row = {
"version_name": cfg.version_name,
"graph_type": cfg.graph_type,
"method": cfg.method,
"dim": cfg.dim,
"walk_length": cfg.walk_length,
"num_walks": cfg.num_walks,
"window": cfg.window,
"p": cfg.p,
"q": cfg.q,
"validation_F1": f1,
"threshold": th,
"auc": auc,
"precision": p,
"recall": r,
}
path = out_dir / f"one_{cfg.version_name}_ablation.csv"
pd.DataFrame([row]).to_csv(path, index=False)
print(pd.DataFrame([row]).to_string(index=False))
print(path)
if __name__ == "__main__":
main()