"""Conservative blends anchored to the public-validated random-walk score.""" from __future__ import annotations from pathlib import Path import numpy as np import pandas as pd ROOT = Path(__file__).resolve().parents[1] OUT = ROOT / "validation_runs" / "dynamic_seed202" / "randomwalk_conservative_blends" def rank01(x: np.ndarray) -> np.ndarray: order = np.argsort(x, kind="mergesort") out = np.empty(len(x), dtype=np.float32) out[order] = np.linspace(0, 1, len(x), dtype=np.float32) return out def write_ratio_submission(name: str, score: np.ndarray, ratio: float, reference: np.ndarray | None) -> dict: known = np.load(ROOT / "cached_scores" / "test_known_mask.npy").astype(bool) pred = np.zeros(len(score), dtype=np.int8) pred[np.argsort(score, kind="mergesort")[-int(round(len(score) * ratio)):]] = 1 pred[known] = 1 path = OUT / f"{name}_r{ratio:.3f}.csv" pd.DataFrame({"Index": np.arange(len(pred), dtype=np.int64), "Predicted": pred}).to_csv(path, index=False) changed = int((pred != reference).sum()) if reference is not None else -1 return {"path": str(path), "ratio": ratio, "positive_ratio": float(pred.mean()), "changed_vs_public_anchor": changed} def main() -> None: OUT.mkdir(parents=True, exist_ok=True) score_paths = { "anchor": ROOT / "validation_runs/dynamic_seed202/node2vec_deepwalk_submission/test_content_mf_deepwalk_node2vec_lgb_pred.npy", "highdim": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_dw_highdim_d256_l40_w10_win10_pred.npy", "d256l80": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_dw_d256_l80_w10_win10_pred.npy", "ens5": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_rwens_dw_basel40_dw_longl80_dw_highdim_d256_l40_dw_graph_ap_pp_n2v_p2_q1l40_pred.npy", "ens7": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_rwens_dw_basel40_dw_longl80_dw_highdim_d256_l40_dw_d256_l80_dw_seed3407l40_dw_graph_ap_pp_n2v_p2_q1l40_pred.npy", } scores = {k: np.load(v).astype(np.float32) for k, v in score_paths.items()} anchor_sub = ROOT / "validation_runs/dynamic_seed202/node2vec_deepwalk_submission/submission_content_mf_deepwalk_node2vec_lgb_th0.480000.csv" anchor_pred = pd.read_csv(anchor_sub)["Predicted"].to_numpy(np.int8) if anchor_sub.exists() else None rank = {k: rank01(v) for k, v in scores.items()} blends = { "anchor_rank_only": rank["anchor"], "blend_anchor90_highdim10": 0.90 * rank["anchor"] + 0.10 * rank["highdim"], "blend_anchor85_highdim15": 0.85 * rank["anchor"] + 0.15 * rank["highdim"], "blend_anchor80_highdim20": 0.80 * rank["anchor"] + 0.20 * rank["highdim"], "blend_anchor90_d256l80_10": 0.90 * rank["anchor"] + 0.10 * rank["d256l80"], "blend_anchor85_d256l80_15": 0.85 * rank["anchor"] + 0.15 * rank["d256l80"], "blend_anchor90_ens5_10": 0.90 * rank["anchor"] + 0.10 * rank["ens5"], "blend_anchor85_ens5_15": 0.85 * rank["anchor"] + 0.15 * rank["ens5"], "blend_anchor90_ens7_10": 0.90 * rank["anchor"] + 0.10 * rank["ens7"], "blend_anchor85_ens7_15": 0.85 * rank["anchor"] + 0.15 * rank["ens7"], "blend_anchor80_highdim10_d256l80_10": 0.80 * rank["anchor"] + 0.10 * rank["highdim"] + 0.10 * rank["d256l80"], "blend_anchor75_highdim10_d256l80_10_ens5_05": 0.75 * rank["anchor"] + 0.10 * rank["highdim"] + 0.10 * rank["d256l80"] + 0.05 * rank["ens5"], } rows = [] for name, score in blends.items(): for ratio in [0.499, 0.500, 0.501]: rows.append(write_ratio_submission(name, score.astype(np.float32), ratio, anchor_pred)) pd.DataFrame(rows).to_csv(OUT / "conservative_blend_summary.csv", index=False) print(pd.DataFrame(rows).to_string(index=False)) if __name__ == "__main__": main()