| """Conservative blends anchored to the public-validated random-walk score.""" |
|
|
| from __future__ import annotations |
|
|
| from pathlib import Path |
|
|
| import numpy as np |
| import pandas as pd |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| OUT = ROOT / "validation_runs" / "dynamic_seed202" / "randomwalk_conservative_blends" |
|
|
|
|
| def rank01(x: np.ndarray) -> np.ndarray: |
| order = np.argsort(x, kind="mergesort") |
| out = np.empty(len(x), dtype=np.float32) |
| out[order] = np.linspace(0, 1, len(x), dtype=np.float32) |
| return out |
|
|
|
|
| def write_ratio_submission(name: str, score: np.ndarray, ratio: float, reference: np.ndarray | None) -> dict: |
| known = np.load(ROOT / "cached_scores" / "test_known_mask.npy").astype(bool) |
| pred = np.zeros(len(score), dtype=np.int8) |
| pred[np.argsort(score, kind="mergesort")[-int(round(len(score) * ratio)):]] = 1 |
| pred[known] = 1 |
| path = OUT / f"{name}_r{ratio:.3f}.csv" |
| pd.DataFrame({"Index": np.arange(len(pred), dtype=np.int64), "Predicted": pred}).to_csv(path, index=False) |
| changed = int((pred != reference).sum()) if reference is not None else -1 |
| return {"path": str(path), "ratio": ratio, "positive_ratio": float(pred.mean()), "changed_vs_public_anchor": changed} |
|
|
|
|
| def main() -> None: |
| OUT.mkdir(parents=True, exist_ok=True) |
| score_paths = { |
| "anchor": ROOT / "validation_runs/dynamic_seed202/node2vec_deepwalk_submission/test_content_mf_deepwalk_node2vec_lgb_pred.npy", |
| "highdim": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_dw_highdim_d256_l40_w10_win10_pred.npy", |
| "d256l80": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_dw_d256_l80_w10_win10_pred.npy", |
| "ens5": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_rwens_dw_basel40_dw_longl80_dw_highdim_d256_l40_dw_graph_ap_pp_n2v_p2_q1l40_pred.npy", |
| "ens7": ROOT / "validation_runs/dynamic_seed202/randomwalk_ensemble_submission/test_rwens_dw_basel40_dw_longl80_dw_highdim_d256_l40_dw_d256_l80_dw_seed3407l40_dw_graph_ap_pp_n2v_p2_q1l40_pred.npy", |
| } |
| scores = {k: np.load(v).astype(np.float32) for k, v in score_paths.items()} |
| anchor_sub = ROOT / "validation_runs/dynamic_seed202/node2vec_deepwalk_submission/submission_content_mf_deepwalk_node2vec_lgb_th0.480000.csv" |
| anchor_pred = pd.read_csv(anchor_sub)["Predicted"].to_numpy(np.int8) if anchor_sub.exists() else None |
|
|
| rank = {k: rank01(v) for k, v in scores.items()} |
| blends = { |
| "anchor_rank_only": rank["anchor"], |
| "blend_anchor90_highdim10": 0.90 * rank["anchor"] + 0.10 * rank["highdim"], |
| "blend_anchor85_highdim15": 0.85 * rank["anchor"] + 0.15 * rank["highdim"], |
| "blend_anchor80_highdim20": 0.80 * rank["anchor"] + 0.20 * rank["highdim"], |
| "blend_anchor90_d256l80_10": 0.90 * rank["anchor"] + 0.10 * rank["d256l80"], |
| "blend_anchor85_d256l80_15": 0.85 * rank["anchor"] + 0.15 * rank["d256l80"], |
| "blend_anchor90_ens5_10": 0.90 * rank["anchor"] + 0.10 * rank["ens5"], |
| "blend_anchor85_ens5_15": 0.85 * rank["anchor"] + 0.15 * rank["ens5"], |
| "blend_anchor90_ens7_10": 0.90 * rank["anchor"] + 0.10 * rank["ens7"], |
| "blend_anchor85_ens7_15": 0.85 * rank["anchor"] + 0.15 * rank["ens7"], |
| "blend_anchor80_highdim10_d256l80_10": 0.80 * rank["anchor"] + 0.10 * rank["highdim"] + 0.10 * rank["d256l80"], |
| "blend_anchor75_highdim10_d256l80_10_ens5_05": 0.75 * rank["anchor"] + 0.10 * rank["highdim"] + 0.10 * rank["d256l80"] + 0.05 * rank["ens5"], |
| } |
|
|
| rows = [] |
| for name, score in blends.items(): |
| for ratio in [0.499, 0.500, 0.501]: |
| rows.append(write_ratio_submission(name, score.astype(np.float32), ratio, anchor_pred)) |
| pd.DataFrame(rows).to_csv(OUT / "conservative_blend_summary.csv", index=False) |
| print(pd.DataFrame(rows).to_string(index=False)) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|