hackinet's picture
Initial upload: negative-result study on secp256k1 parity prediction.
6b93c3b verified
#!/usr/bin/env python3
"""Generate N random (x, y, true k_state) pairs OUTSIDE the training range,
run all trained models on them, and report accuracy."""
import os, sys, time, secrets
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import xgboost as xgb
import lightgbm as lgb
import torch, torch.nn as nn
p = 2**256 - 2**32 - 977
n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 55066263022277343669578718895168534326250603453777594175500187360389116729240
Gy = 32670510020758816978083085130507043184471273380659243275938904335757337482424
def inv(a): return pow(a, p-2, p)
def add(P, Q):
if P is None: return Q
if Q is None: return P
x1,y1=P; x2,y2=Q
if x1==x2 and (y1+y2)%p==0: return None
m=(3*x1*x1)*inv(2*y1)%p if P==Q else (y2-y1)*inv(x2-x1)%p
x3=(m*m-x1-x2)%p
return (x3,(m*(x1-x3)-y1)%p)
def mul(k, P):
R=None
while k:
if k&1: R=add(R,P)
P=add(P,P); k>>=1
return R
def num_features(v, prefix):
s = str(v); digs = [int(c) for c in s]
return {
f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0],
f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100,
f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs),
f"{prefix}_digit_sum_mod_9": sum(digs) % 9,
f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0),
f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1),
f"{prefix}_zero_count": s.count("0"),
f"{prefix}_unique_digit_count": len(set(s)),
f"{prefix}_bit_length": v.bit_length(),
f"{prefix}_popcount": bin(v).count("1"),
f"{prefix}_state": v % 2,
f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5,
f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11,
f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17,
f"{prefix}_mod_19": v % 19,
}
def featurize(x, y):
sxd = sum(int(c) for c in str(x)); syd = sum(int(c) for c in str(y))
row = {}
row.update(num_features(x, "x")); row.update(num_features(y, "y"))
row["x_gt_y"] = int(x > y)
row["digit_sum_diff_xy"] = sxd - syd
return row
def main(N=10000, k_start=2_000_000):
"""Sequential k = k_start .. k_start+N-1 (outside the 1..1M training range).
Incremental P_{k+1} = P_k + G, ~1 modular inverse per step."""
G = (Gx, Gy)
print(f"computing {N} sequential points: k = {k_start} .. {k_start+N-1}")
t0 = time.time()
P = mul(k_start - 1, G) # one full scalar mult to reach k_start-1
print(f" jump to k_start-1 done in {time.time()-t0:.1f}s; now walking +G")
t0 = time.time()
feats, labels, ks = [], [], []
for i in range(N):
k = k_start + i
P = add(P, G)
x, y = P
feats.append(featurize(x, y))
labels.append(k & 1)
ks.append(k)
print(f" walked {N} steps in {time.time()-t0:.1f}s")
# column order must match training
df_tr = pd.read_parquet("features.parquet")
drop = {"k", "k_state", "abs_x_minus_y"}
feat_cols = [c for c in df_tr.columns if c not in drop]
X = np.array([[r[c] for c in feat_cols] for r in feats], dtype=np.float32)
y = np.array(labels, dtype=np.int8)
print(f"label balance in batch: mean={y.mean():.4f} (expect ~0.5)")
# XGBoost
bst = xgb.XGBClassifier(); bst.load_model("results/xgb.json")
p_xgb = bst.predict_proba(X)[:,1]
# LightGBM
lgbm = lgb.Booster(model_file="results/lgbm.txt")
p_lgb = lgbm.predict(X)
# MLP
Xtr_full = df_tr[feat_cols].astype(np.float32).values
sc = StandardScaler().fit(Xtr_full[:int(0.7*len(Xtr_full))])
Xs = sc.transform(X)
device = "cuda" if torch.cuda.is_available() else "cpu"
D = X.shape[1]
mlp = nn.Sequential(
nn.Linear(D,512), nn.ReLU(),
nn.Linear(512,512), nn.ReLU(),
nn.Linear(512,256), nn.ReLU(),
nn.Linear(256,1)
).to(device)
mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device))
mlp.eval()
with torch.no_grad():
logits = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze(1).cpu().numpy()
p_mlp = 1/(1+np.exp(-logits))
p_ens = (p_xgb + p_lgb + p_mlp) / 3
print(f"\n--- batch evaluation on {N} fresh (x, y) outside training range ---")
def row(name, prob):
acc = accuracy_score(y, prob>0.5); auc = roc_auc_score(y, prob)
# 95% CI for accuracy via normal approx
se = (acc*(1-acc)/N)**0.5; lo, hi = acc-1.96*se, acc+1.96*se
return f" {name:9} acc={acc:.4f} (95% CI {lo:.4f}-{hi:.4f}) auc={auc:.4f}"
print(row("XGBoost", p_xgb))
print(row("LightGBM", p_lgb))
print(row("MLP", p_mlp))
print(row("Ensemble", p_ens))
print("\n50% would be coin-flip. If a CI excludes 0.5, that'd be statistically suspicious — but theory says it won't.")
if __name__ == "__main__":
main(int(sys.argv[1]) if len(sys.argv)>1 else 10000)