File size: 5,081 Bytes
6b93c3b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/usr/bin/env python3
"""Generate N random (x, y, true k_state) pairs OUTSIDE the training range,
run all trained models on them, and report accuracy."""
import os, sys, time, secrets
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import xgboost as xgb
import lightgbm as lgb
import torch, torch.nn as nn
p = 2**256 - 2**32 - 977
n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 55066263022277343669578718895168534326250603453777594175500187360389116729240
Gy = 32670510020758816978083085130507043184471273380659243275938904335757337482424
def inv(a): return pow(a, p-2, p)
def add(P, Q):
if P is None: return Q
if Q is None: return P
x1,y1=P; x2,y2=Q
if x1==x2 and (y1+y2)%p==0: return None
m=(3*x1*x1)*inv(2*y1)%p if P==Q else (y2-y1)*inv(x2-x1)%p
x3=(m*m-x1-x2)%p
return (x3,(m*(x1-x3)-y1)%p)
def mul(k, P):
R=None
while k:
if k&1: R=add(R,P)
P=add(P,P); k>>=1
return R
def num_features(v, prefix):
s = str(v); digs = [int(c) for c in s]
return {
f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0],
f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100,
f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs),
f"{prefix}_digit_sum_mod_9": sum(digs) % 9,
f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0),
f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1),
f"{prefix}_zero_count": s.count("0"),
f"{prefix}_unique_digit_count": len(set(s)),
f"{prefix}_bit_length": v.bit_length(),
f"{prefix}_popcount": bin(v).count("1"),
f"{prefix}_state": v % 2,
f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5,
f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11,
f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17,
f"{prefix}_mod_19": v % 19,
}
def featurize(x, y):
sxd = sum(int(c) for c in str(x)); syd = sum(int(c) for c in str(y))
row = {}
row.update(num_features(x, "x")); row.update(num_features(y, "y"))
row["x_gt_y"] = int(x > y)
row["digit_sum_diff_xy"] = sxd - syd
return row
def main(N=10000, k_start=2_000_000):
"""Sequential k = k_start .. k_start+N-1 (outside the 1..1M training range).
Incremental P_{k+1} = P_k + G, ~1 modular inverse per step."""
G = (Gx, Gy)
print(f"computing {N} sequential points: k = {k_start} .. {k_start+N-1}")
t0 = time.time()
P = mul(k_start - 1, G) # one full scalar mult to reach k_start-1
print(f" jump to k_start-1 done in {time.time()-t0:.1f}s; now walking +G")
t0 = time.time()
feats, labels, ks = [], [], []
for i in range(N):
k = k_start + i
P = add(P, G)
x, y = P
feats.append(featurize(x, y))
labels.append(k & 1)
ks.append(k)
print(f" walked {N} steps in {time.time()-t0:.1f}s")
# column order must match training
df_tr = pd.read_parquet("features.parquet")
drop = {"k", "k_state", "abs_x_minus_y"}
feat_cols = [c for c in df_tr.columns if c not in drop]
X = np.array([[r[c] for c in feat_cols] for r in feats], dtype=np.float32)
y = np.array(labels, dtype=np.int8)
print(f"label balance in batch: mean={y.mean():.4f} (expect ~0.5)")
# XGBoost
bst = xgb.XGBClassifier(); bst.load_model("results/xgb.json")
p_xgb = bst.predict_proba(X)[:,1]
# LightGBM
lgbm = lgb.Booster(model_file="results/lgbm.txt")
p_lgb = lgbm.predict(X)
# MLP
Xtr_full = df_tr[feat_cols].astype(np.float32).values
sc = StandardScaler().fit(Xtr_full[:int(0.7*len(Xtr_full))])
Xs = sc.transform(X)
device = "cuda" if torch.cuda.is_available() else "cpu"
D = X.shape[1]
mlp = nn.Sequential(
nn.Linear(D,512), nn.ReLU(),
nn.Linear(512,512), nn.ReLU(),
nn.Linear(512,256), nn.ReLU(),
nn.Linear(256,1)
).to(device)
mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device))
mlp.eval()
with torch.no_grad():
logits = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze(1).cpu().numpy()
p_mlp = 1/(1+np.exp(-logits))
p_ens = (p_xgb + p_lgb + p_mlp) / 3
print(f"\n--- batch evaluation on {N} fresh (x, y) outside training range ---")
def row(name, prob):
acc = accuracy_score(y, prob>0.5); auc = roc_auc_score(y, prob)
# 95% CI for accuracy via normal approx
se = (acc*(1-acc)/N)**0.5; lo, hi = acc-1.96*se, acc+1.96*se
return f" {name:9} acc={acc:.4f} (95% CI {lo:.4f}-{hi:.4f}) auc={auc:.4f}"
print(row("XGBoost", p_xgb))
print(row("LightGBM", p_lgb))
print(row("MLP", p_mlp))
print(row("Ensemble", p_ens))
print("\n50% would be coin-flip. If a CI excludes 0.5, that'd be statistically suspicious — but theory says it won't.")
if __name__ == "__main__":
main(int(sys.argv[1]) if len(sys.argv)>1 else 10000)
|