#!/usr/bin/env python3
"""Generate N random (x, y, true k_state) pairs OUTSIDE the training range,
run all trained models on them, and report accuracy."""
import os, sys, time, secrets
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import xgboost as xgb
import lightgbm as lgb
import torch, torch.nn as nn

p = 2**256 - 2**32 - 977
n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 55066263022277343669578718895168534326250603453777594175500187360389116729240
Gy = 32670510020758816978083085130507043184471273380659243275938904335757337482424

def inv(a): return pow(a, p-2, p)
def add(P, Q):
    if P is None: return Q
    if Q is None: return P
    x1,y1=P; x2,y2=Q
    if x1==x2 and (y1+y2)%p==0: return None
    m=(3*x1*x1)*inv(2*y1)%p if P==Q else (y2-y1)*inv(x2-x1)%p
    x3=(m*m-x1-x2)%p
    return (x3,(m*(x1-x3)-y1)%p)
def mul(k, P):
    R=None
    while k:
        if k&1: R=add(R,P)
        P=add(P,P); k>>=1
    return R

def num_features(v, prefix):
    s = str(v); digs = [int(c) for c in s]
    return {
        f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0],
        f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100,
        f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs),
        f"{prefix}_digit_sum_mod_9": sum(digs) % 9,
        f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0),
        f"{prefix}_odd_digit_count":  sum(1 for d in digs if d%2==1),
        f"{prefix}_zero_count": s.count("0"),
        f"{prefix}_unique_digit_count": len(set(s)),
        f"{prefix}_bit_length": v.bit_length(),
        f"{prefix}_popcount": bin(v).count("1"),
        f"{prefix}_state": v % 2,
        f"{prefix}_mod_3":  v % 3, f"{prefix}_mod_5":  v % 5,
        f"{prefix}_mod_7":  v % 7, f"{prefix}_mod_11": v % 11,
        f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17,
        f"{prefix}_mod_19": v % 19,
    }

def featurize(x, y):
    sxd = sum(int(c) for c in str(x)); syd = sum(int(c) for c in str(y))
    row = {}
    row.update(num_features(x, "x")); row.update(num_features(y, "y"))
    row["x_gt_y"] = int(x > y)
    row["digit_sum_diff_xy"] = sxd - syd
    return row

def main(N=10000, k_start=2_000_000):
    """Sequential k = k_start .. k_start+N-1 (outside the 1..1M training range).
    Incremental P_{k+1} = P_k + G, ~1 modular inverse per step."""
    G = (Gx, Gy)
    print(f"computing {N} sequential points: k = {k_start} .. {k_start+N-1}")
    t0 = time.time()
    P = mul(k_start - 1, G)            # one full scalar mult to reach k_start-1
    print(f"  jump to k_start-1 done in {time.time()-t0:.1f}s; now walking +G")
    t0 = time.time()
    feats, labels, ks = [], [], []
    for i in range(N):
        k = k_start + i
        P = add(P, G)
        x, y = P
        feats.append(featurize(x, y))
        labels.append(k & 1)
        ks.append(k)
    print(f"  walked {N} steps in {time.time()-t0:.1f}s")

    # column order must match training
    df_tr = pd.read_parquet("features.parquet")
    drop = {"k", "k_state", "abs_x_minus_y"}
    feat_cols = [c for c in df_tr.columns if c not in drop]
    X = np.array([[r[c] for c in feat_cols] for r in feats], dtype=np.float32)
    y = np.array(labels, dtype=np.int8)
    print(f"label balance in batch: mean={y.mean():.4f}  (expect ~0.5)")

    # XGBoost
    bst = xgb.XGBClassifier(); bst.load_model("results/xgb.json")
    p_xgb = bst.predict_proba(X)[:,1]

    # LightGBM
    lgbm = lgb.Booster(model_file="results/lgbm.txt")
    p_lgb = lgbm.predict(X)

    # MLP
    Xtr_full = df_tr[feat_cols].astype(np.float32).values
    sc = StandardScaler().fit(Xtr_full[:int(0.7*len(Xtr_full))])
    Xs = sc.transform(X)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    D = X.shape[1]
    mlp = nn.Sequential(
        nn.Linear(D,512), nn.ReLU(),
        nn.Linear(512,512), nn.ReLU(),
        nn.Linear(512,256), nn.ReLU(),
        nn.Linear(256,1)
    ).to(device)
    mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device))
    mlp.eval()
    with torch.no_grad():
        logits = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze(1).cpu().numpy()
    p_mlp = 1/(1+np.exp(-logits))

    p_ens = (p_xgb + p_lgb + p_mlp) / 3

    print(f"\n--- batch evaluation on {N} fresh (x, y) outside training range ---")
    def row(name, prob):
        acc = accuracy_score(y, prob>0.5); auc = roc_auc_score(y, prob)
        # 95% CI for accuracy via normal approx
        se = (acc*(1-acc)/N)**0.5; lo, hi = acc-1.96*se, acc+1.96*se
        return f"  {name:9}  acc={acc:.4f}  (95% CI {lo:.4f}-{hi:.4f})  auc={auc:.4f}"
    print(row("XGBoost",   p_xgb))
    print(row("LightGBM",  p_lgb))
    print(row("MLP",       p_mlp))
    print(row("Ensemble",  p_ens))
    print("\n50% would be coin-flip.  If a CI excludes 0.5, that'd be statistically suspicious — but theory says it won't.")

if __name__ == "__main__":
    main(int(sys.argv[1]) if len(sys.argv)>1 else 10000)