"""Validation OOF for a selected random-walk ensemble.""" from __future__ import annotations import argparse from pathlib import Path import numpy as np import pandas as pd from gensim.models import Word2Vec import randomwalk_systematic_ablation as rw from generate_randomwalk_ensemble_submission import aggregate def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--package-root", type=Path, default=Path(__file__).resolve().parents[1]) parser.add_argument("--split-seed", type=int, default=202) parser.add_argument("--main-val-score-file", type=Path, required=True) parser.add_argument("--versions", nargs="+", required=True) parser.add_argument("--seed", type=int, default=202) parser.add_argument("--n-splits", type=int, default=5) args = parser.parse_args() root = args.package_root sys_dir = root / "validation_runs" / f"dynamic_seed{args.split_seed}" / "randomwalk_systematic" cfgs = {c.version_name: c for c in rw.small_configs() + rw.graph_configs() + rw.extra_configs()} train_refs, pairs, y, X_base = rw.build_base_features(root, args.split_seed, args.main_val_score_file) blocks = [] for version in args.versions: cfg = cfgs[version] model = Word2Vec.load(str(sys_dir / "models" / f"{version}.model")) block, _ = rw.pair_feature_block(model, pairs, cfg, root, args.split_seed, train_refs) blocks.append(block) X = np.column_stack([X_base, *blocks, aggregate(blocks)]).astype(np.float32) print("fit_oof", X.shape) oof = rw.fit_lgb_oof(X, y, args.seed, args.n_splits) f1, th, auc, p, r = rw.best_f1(y, oof) version_name = "rwens_" + "_".join(args.versions) np.save(sys_dir / f"{version_name}_oof.npy", oof) row = { "version_name": version_name, "versions": ",".join(args.versions), "validation_F1": f1, "threshold": th, "auc": auc, "precision": p, "recall": r, "n_features": X.shape[1], } path = sys_dir / f"ensemble_{len(args.versions)}_ablation.csv" pd.DataFrame([row]).to_csv(path, index=False) print(pd.DataFrame([row]).to_string(index=False)) print(path) if __name__ == "__main__": main()