"""Generate test submissions for the post-0.95 stacked ensemble."""

from __future__ import annotations

import argparse
import importlib.util
import pickle as pkl
import re
from pathlib import Path

import lightgbm as lgb
import numpy as np
import pandas as pd
import torch


def load_module(name: str, path: Path):
    spec = importlib.util.spec_from_file_location(name, path)
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)
    return module


def read_txt(path: Path) -> list[list[int]]:
    return [list(map(int, line.strip().split())) for line in path.open()]


def infer_layers(path: Path, state: dict) -> int:
    if "layer_weight" in state:
        return int(state["layer_weight"].shape[0] - 1)
    text = f"{path.parent.parent.name}_{path.name}"
    match = re.search(r"_l(\d+)d", text)
    if match:
        return int(match.group(1))
    match = re.search(r"L(\d+)", text)
    if match:
        return int(match.group(1))
    return 4


def infer_mode(score_path: Path) -> str:
    name = score_path.name
    if "_dot_" in name:
        return "dot"
    if "_neg_l2_" in name:
        return "neg_l2"
    return "cos"


def score_cache_path(root: Path, split_seed: int, val_score_path: Path) -> Path:
    val_score_path = val_score_path.resolve()
    rel = val_score_path.relative_to(root / "validation_runs" / f"dynamic_seed{split_seed}")
    name = rel.name.replace("val_", "test_", 1)
    return root / "validation_runs" / f"dynamic_seed{split_seed}" / "post95_test_scores" / rel.parent / name


def checkpoint_for_score(score_path: Path) -> Path:
    score_path = score_path.resolve()
    stem = score_path.stem.replace("val_", "", 1)
    if stem.endswith("_ensemble_mean"):
        raise ValueError("ensemble scores do not map to a single checkpoint")
    parts = stem.split("_")
    variant = parts[0]
    seed = next(p for p in parts if p.startswith("s") and p[1:].isdigit())
    dim = next(p for p in parts if p.startswith("d") and p[1:].isdigit())
    return score_path.parent.parent / "checkpoints" / f"{variant}_val_{seed}_{dim}.pt"


def ensemble_member_scores(score_path: Path) -> list[Path]:
    score_path = score_path.resolve()
    result_path = score_path.parent.parent / "ensemble_result.txt"
    text = result_path.read_text().splitlines()
    models_line = next(line for line in text if line.startswith("models="))
    stems = [x.strip() for x in models_line.split("=", 1)[1].split(",") if x.strip()]
    return [score_path.parent / f"{stem}.npy" for stem in stems]


@torch.no_grad()
def score_checkpoint_on_test(
    root: Path,
    split_seed: int,
    module,
    parts,
    data_cache: dict,
    test_pairs: np.ndarray,
    val_score_path: Path,
    device: str,
    batch_size: int,
) -> np.ndarray:
    out_path = score_cache_path(root, split_seed, val_score_path)
    if out_path.exists():
        return np.load(out_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)

    ckpt_path = checkpoint_for_score(val_score_path)
    state = torch.load(ckpt_path, map_location=device)
    embed_dim = state["author_emb.weight"].shape[1]
    layers = infer_layers(ckpt_path, state)
    variant = "learnw" if "learnw" in ckpt_path.name else "vanilla"
    run_name = ckpt_path.parent.parent.name
    use_citation = "no_cite" not in run_name and "author_paper_only" not in run_name
    use_coauthor = "no_coauthor" not in run_name and "author_paper_only" not in run_name
    data_key = (use_citation, use_coauthor)
    if data_key not in data_cache:
        data_cache[data_key] = module.build_data(
            parts,
            6611,
            79937,
            torch.device(device),
            use_citation=use_citation,
            use_coauthor=use_coauthor,
        )
    model_cls = module.LearnableWeightLightGCN if variant == "learnw" else module.LightGCN
    model = model_cls(6611, parts["paper_feat_aug"].shape[1], embed_dim, layers).to(torch.device(device))
    model.load_state_dict(state)
    scores = module.predict_scores(
        model,
        data_cache[data_key],
        test_pairs,
        batch_size,
        mode=infer_mode(val_score_path),
        normalize_embeddings=False,
    ).astype(np.float32)
    np.save(out_path, scores)
    del model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    print(f"saved {out_path}")
    return scores


def score_val_path_on_test(
    root: Path,
    split_seed: int,
    module,
    parts,
    data_cache: dict,
    test_pairs: np.ndarray,
    val_score_path: Path,
    device: str,
    batch_size: int,
) -> np.ndarray:
    out_path = score_cache_path(root, split_seed, val_score_path)
    if out_path.exists():
        return np.load(out_path)
    if val_score_path.name.endswith("_ensemble_mean.npy"):
        members = [
            score_val_path_on_test(root, split_seed, module, parts, data_cache, test_pairs, p, device, batch_size)
            for p in ensemble_member_scores(val_score_path)
        ]
        out_path.parent.mkdir(parents=True, exist_ok=True)
        scores = np.mean(members, axis=0).astype(np.float32)
        np.save(out_path, scores)
        print(f"saved {out_path}")
        return scores
    return score_checkpoint_on_test(root, split_seed, module, parts, data_cache, test_pairs, val_score_path, device, batch_size)


def select_variant_val_scores(post95, root: Path, split_seed: int, y: np.ndarray, max_cols: int) -> list[Path]:
    files = sorted((root / "validation_runs" / f"dynamic_seed{split_seed}").glob("dyn*/scores/val_*.npy"))
    rows = []
    for path in files:
        if "hgt" in str(path) or "sage" in str(path) or "bce" in str(path) or "norm" in str(path) or "hinge" in str(path):
            continue
        scores = np.load(path).astype(np.float32)
        if len(scores) != len(y) or np.std(scores) < 1e-8:
            continue
        f1, _, auc, _, _ = post95.best_f1(y, scores)
        rows.append((f1, auc, path))
    rows.sort(key=lambda r: r[0], reverse=True)
    return [p for _, _, p in rows[:max_cols]]


def variant_feature_matrix(post95, raw_scores: list[np.ndarray]) -> np.ndarray:
    if not raw_scores:
        return np.zeros((0, 0), dtype=np.float32)
    cols = []
    for scores in raw_scores:
        cols.extend([post95.zscore(scores), post95.rank01(scores)])
    raw = np.vstack(raw_scores)
    cols.extend([post95.zscore(raw.mean(axis=0)), post95.zscore(raw.std(axis=0)), post95.rank01(raw.mean(axis=0))])
    return np.column_stack(cols).astype(np.float32)


def topk_content_similarity_fast(root: Path, pairs: np.ndarray, builder) -> np.ndarray:
    cache = root / "validation_runs" / "feature_cache"
    cache.mkdir(parents=True, exist_ok=True)
    key = f"topk_content_{len(pairs)}_{int(pairs[:,0].sum())}_{int(pairs[:,1].sum())}.npy"
    path = cache / key
    if path.exists():
        return np.load(path)

    with (root / "data_and_docs" / "feature.pkl").open("rb") as f:
        feat = pkl.load(f).numpy().astype(np.float32)
    feat /= np.linalg.norm(feat, axis=1, keepdims=True) + 1e-8

    out = np.zeros((len(pairs), 3), dtype=np.float32)
    order = np.argsort(pairs[:, 0], kind="mergesort")
    authors = pairs[order, 0]
    boundaries = np.r_[0, np.flatnonzero(authors[1:] != authors[:-1]) + 1, len(order)]
    for lo, hi in zip(boundaries[:-1], boundaries[1:]):
        idx = order[lo:hi]
        author = int(pairs[idx[0], 0])
        hist = np.asarray(list(builder.author_papers[author]), dtype=np.int64)
        if len(hist) == 0:
            continue
        cand = pairs[idx, 1].astype(np.int64)
        sims = feat[cand] @ feat[hist].T
        out[idx, 0] = sims.max(axis=1)
        for col, k in [(1, 3), (2, 5)]:
            kk = min(k, sims.shape[1])
            top = np.partition(sims, -kk, axis=1)[:, -kk:]
            out[idx, col] = top.mean(axis=1)
    np.save(path, out)
    return out


def make_submissions(root: Path, out_dir: Path, pred_score: np.ndarray, ratios: list[float]) -> None:
    known = np.load(root / "cached_scores" / "test_known_mask.npy").astype(bool)
    for ratio in ratios:
        n_pos = int(round(len(pred_score) * ratio))
        pred = np.zeros(len(pred_score), dtype=np.int8)
        pred[np.argsort(pred_score)[-n_pos:]] = 1
        pred[known] = 1
        sub = pd.DataFrame({"Index": np.arange(len(pred), dtype=np.int64), "Predicted": pred})
        path = out_dir / f"submission_post95_ens_r{ratio:.3f}.csv"
        sub.to_csv(path, index=False)
        print(f"{path} positives={int(pred.sum())} ratio={pred.mean():.6f}")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--package-root", type=Path, default=Path(__file__).resolve().parents[1])
    parser.add_argument("--split-seed", type=int, default=202)
    parser.add_argument("--main-val-score-file", type=Path, required=True)
    parser.add_argument("--device", default="cuda:0" if torch.cuda.is_available() else "cpu")
    parser.add_argument("--batch-size", type=int, default=131072)
    parser.add_argument("--max-variant-cols", type=int, default=20)
    parser.add_argument("--seed", type=int, default=202)
    parser.add_argument("--ratios", nargs="*", type=float, default=[0.498, 0.500, 0.502, 0.504, 0.505])
    args = parser.parse_args()

    root = args.package_root
    args.main_val_score_file = args.main_val_score_file.resolve()
    stack_mod = load_module("stack_rank_calibration", root / "code" / "stack_rank_calibration.py")
    lgcn_mod = load_module("train_val_lgcn_ensemble", root / "code" / "train_val_lgcn_ensemble.py")
    post95 = load_module("post95_ablation", root / "code" / "post95_ablation.py")

    out_dir = root / "validation_runs" / f"dynamic_seed{args.split_seed}" / "post95_submission"
    out_dir.mkdir(parents=True, exist_ok=True)

    train_refs, val_pairs = lgcn_mod.make_notebook_style_split(root, args.split_seed, 0.9)
    val_pairs_arr = val_pairs[["source", "target"]].to_numpy(np.int64)
    y = val_pairs["label"].to_numpy(np.int8)
    main_val_score = np.load(args.main_val_score_file).astype(np.float32)

    print("building validation features")
    val_builder = stack_mod.ExplicitGraphFeatures(root, train_refs)
    X_val_hand = val_builder.transform(val_pairs_arr)
    X_val = np.column_stack(
        [
            stack_mod.add_rank_features(val_pairs_arr, main_val_score),
            X_val_hand,
            post95.negative_evidence_features(X_val_hand, main_val_score),
            topk_content_similarity_fast(root, val_pairs_arr, val_builder),
        ]
    ).astype(np.float32)

    selected_paths = select_variant_val_scores(post95, root, args.split_seed, y, args.max_variant_cols)
    (out_dir / "selected_variant_val_scores.txt").write_text("\n".join(str(p) for p in selected_paths) + "\n")
    X_val_var = variant_feature_matrix(post95, [np.load(p).astype(np.float32) for p in selected_paths])
    X_val = np.column_stack([X_val, X_val_var]).astype(np.float32)
    print(f"validation matrix {X_val.shape}")

    clf = lgb.LGBMClassifier(
        n_estimators=1200,
        learning_rate=0.025,
        num_leaves=31,
        subsample=0.9,
        colsample_bytree=0.9,
        reg_lambda=5.0,
        min_child_samples=80,
        objective="binary",
        verbose=-1,
        random_state=args.seed,
    )
    clf.fit(X_val, y)

    print("building test features")
    test_pairs = np.array(read_txt(root / "data_and_docs" / "bipartite_test_ann.txt"), dtype=np.int64)
    parts = lgcn_mod.build_parts(root, None, 79937, split_seed=args.split_seed, train_frac=0.9)
    data_cache = {}
    main_test_score = score_val_path_on_test(
        root,
        args.split_seed,
        lgcn_mod,
        parts,
        data_cache,
        test_pairs,
        args.main_val_score_file,
        args.device,
        args.batch_size,
    )
    full_refs = pd.DataFrame(read_txt(root / "data_and_docs" / "bipartite_train_ann.txt"), columns=["source", "target"])
    test_builder = stack_mod.ExplicitGraphFeatures(root, full_refs)
    X_test_hand = test_builder.transform(test_pairs)
    X_test = np.column_stack(
        [
            stack_mod.add_rank_features(test_pairs, main_test_score),
            X_test_hand,
            post95.negative_evidence_features(X_test_hand, main_test_score),
            topk_content_similarity_fast(root, test_pairs, test_builder),
        ]
    ).astype(np.float32)

    test_variant_scores = [
        score_val_path_on_test(root, args.split_seed, lgcn_mod, parts, data_cache, test_pairs, p, args.device, args.batch_size)
        for p in selected_paths
    ]
    X_test_var = variant_feature_matrix(post95, test_variant_scores)
    X_test = np.column_stack([X_test, X_test_var]).astype(np.float32)
    print(f"test matrix {X_test.shape}")

    pred_score = clf.predict_proba(X_test)[:, 1].astype(np.float32)
    np.save(out_dir / "test_post95_ens_pred.npy", pred_score)
    make_submissions(root, out_dir, pred_score, args.ratios)


if __name__ == "__main__":
    main()