| |
| """Generate N random (x, y, true k_state) pairs OUTSIDE the training range, |
| run all trained models on them, and report accuracy.""" |
| import os, sys, time, secrets |
| import numpy as np |
| import pandas as pd |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.metrics import accuracy_score, roc_auc_score |
| import xgboost as xgb |
| import lightgbm as lgb |
| import torch, torch.nn as nn |
|
|
| p = 2**256 - 2**32 - 977 |
| n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141 |
| Gx = 55066263022277343669578718895168534326250603453777594175500187360389116729240 |
| Gy = 32670510020758816978083085130507043184471273380659243275938904335757337482424 |
|
|
| def inv(a): return pow(a, p-2, p) |
| def add(P, Q): |
| if P is None: return Q |
| if Q is None: return P |
| x1,y1=P; x2,y2=Q |
| if x1==x2 and (y1+y2)%p==0: return None |
| m=(3*x1*x1)*inv(2*y1)%p if P==Q else (y2-y1)*inv(x2-x1)%p |
| x3=(m*m-x1-x2)%p |
| return (x3,(m*(x1-x3)-y1)%p) |
| def mul(k, P): |
| R=None |
| while k: |
| if k&1: R=add(R,P) |
| P=add(P,P); k>>=1 |
| return R |
|
|
| def num_features(v, prefix): |
| s = str(v); digs = [int(c) for c in s] |
| return { |
| f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0], |
| f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100, |
| f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs), |
| f"{prefix}_digit_sum_mod_9": sum(digs) % 9, |
| f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0), |
| f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1), |
| f"{prefix}_zero_count": s.count("0"), |
| f"{prefix}_unique_digit_count": len(set(s)), |
| f"{prefix}_bit_length": v.bit_length(), |
| f"{prefix}_popcount": bin(v).count("1"), |
| f"{prefix}_state": v % 2, |
| f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5, |
| f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11, |
| f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17, |
| f"{prefix}_mod_19": v % 19, |
| } |
|
|
| def featurize(x, y): |
| sxd = sum(int(c) for c in str(x)); syd = sum(int(c) for c in str(y)) |
| row = {} |
| row.update(num_features(x, "x")); row.update(num_features(y, "y")) |
| row["x_gt_y"] = int(x > y) |
| row["digit_sum_diff_xy"] = sxd - syd |
| return row |
|
|
| def main(N=10000, k_start=2_000_000): |
| """Sequential k = k_start .. k_start+N-1 (outside the 1..1M training range). |
| Incremental P_{k+1} = P_k + G, ~1 modular inverse per step.""" |
| G = (Gx, Gy) |
| print(f"computing {N} sequential points: k = {k_start} .. {k_start+N-1}") |
| t0 = time.time() |
| P = mul(k_start - 1, G) |
| print(f" jump to k_start-1 done in {time.time()-t0:.1f}s; now walking +G") |
| t0 = time.time() |
| feats, labels, ks = [], [], [] |
| for i in range(N): |
| k = k_start + i |
| P = add(P, G) |
| x, y = P |
| feats.append(featurize(x, y)) |
| labels.append(k & 1) |
| ks.append(k) |
| print(f" walked {N} steps in {time.time()-t0:.1f}s") |
|
|
| |
| df_tr = pd.read_parquet("features.parquet") |
| drop = {"k", "k_state", "abs_x_minus_y"} |
| feat_cols = [c for c in df_tr.columns if c not in drop] |
| X = np.array([[r[c] for c in feat_cols] for r in feats], dtype=np.float32) |
| y = np.array(labels, dtype=np.int8) |
| print(f"label balance in batch: mean={y.mean():.4f} (expect ~0.5)") |
|
|
| |
| bst = xgb.XGBClassifier(); bst.load_model("results/xgb.json") |
| p_xgb = bst.predict_proba(X)[:,1] |
|
|
| |
| lgbm = lgb.Booster(model_file="results/lgbm.txt") |
| p_lgb = lgbm.predict(X) |
|
|
| |
| Xtr_full = df_tr[feat_cols].astype(np.float32).values |
| sc = StandardScaler().fit(Xtr_full[:int(0.7*len(Xtr_full))]) |
| Xs = sc.transform(X) |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| D = X.shape[1] |
| mlp = nn.Sequential( |
| nn.Linear(D,512), nn.ReLU(), |
| nn.Linear(512,512), nn.ReLU(), |
| nn.Linear(512,256), nn.ReLU(), |
| nn.Linear(256,1) |
| ).to(device) |
| mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device)) |
| mlp.eval() |
| with torch.no_grad(): |
| logits = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze(1).cpu().numpy() |
| p_mlp = 1/(1+np.exp(-logits)) |
|
|
| p_ens = (p_xgb + p_lgb + p_mlp) / 3 |
|
|
| print(f"\n--- batch evaluation on {N} fresh (x, y) outside training range ---") |
| def row(name, prob): |
| acc = accuracy_score(y, prob>0.5); auc = roc_auc_score(y, prob) |
| |
| se = (acc*(1-acc)/N)**0.5; lo, hi = acc-1.96*se, acc+1.96*se |
| return f" {name:9} acc={acc:.4f} (95% CI {lo:.4f}-{hi:.4f}) auc={auc:.4f}" |
| print(row("XGBoost", p_xgb)) |
| print(row("LightGBM", p_lgb)) |
| print(row("MLP", p_mlp)) |
| print(row("Ensemble", p_ens)) |
| print("\n50% would be coin-flip. If a CI excludes 0.5, that'd be statistically suspicious — but theory says it won't.") |
|
|
| if __name__ == "__main__": |
| main(int(sys.argv[1]) if len(sys.argv)>1 else 10000) |
|
|