Initial upload: negative-result study on secp256k1 parity prediction.

6b93c3b verified 6 days ago

5.08 kB

	#!/usr/bin/env python3
	"""Generate N random (x, y, true k_state) pairs OUTSIDE the training range,
	run all trained models on them, and report accuracy."""
	import os, sys, time, secrets
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import accuracy_score, roc_auc_score
	import xgboost as xgb
	import lightgbm as lgb
	import torch, torch.nn as nn

	p = 2256 - 232 - 977
	n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
	Gx = 55066263022277343669578718895168534326250603453777594175500187360389116729240
	Gy = 32670510020758816978083085130507043184471273380659243275938904335757337482424

	def inv(a): return pow(a, p-2, p)
	def add(P, Q):
	if P is None: return Q
	if Q is None: return P
	x1,y1=P; x2,y2=Q
	if x1==x2 and (y1+y2)%p==0: return None
	m=(3x1x1)inv(2y1)%p if P==Q else (y2-y1)*inv(x2-x1)%p
	x3=(m*m-x1-x2)%p
	return (x3,(m*(x1-x3)-y1)%p)
	def mul(k, P):
	R=None
	while k:
	if k&1: R=add(R,P)
	P=add(P,P); k>>=1
	return R

	def num_features(v, prefix):
	s = str(v); digs = [int(c) for c in s]
	return {
	f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0],
	f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100,
	f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs),
	f"{prefix}_digit_sum_mod_9": sum(digs) % 9,
	f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0),
	f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1),
	f"{prefix}_zero_count": s.count("0"),
	f"{prefix}_unique_digit_count": len(set(s)),
	f"{prefix}_bit_length": v.bit_length(),
	f"{prefix}_popcount": bin(v).count("1"),
	f"{prefix}_state": v % 2,
	f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5,
	f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11,
	f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17,
	f"{prefix}_mod_19": v % 19,
	}

	def featurize(x, y):
	sxd = sum(int(c) for c in str(x)); syd = sum(int(c) for c in str(y))
	row = {}
	row.update(num_features(x, "x")); row.update(num_features(y, "y"))
	row["x_gt_y"] = int(x > y)
	row["digit_sum_diff_xy"] = sxd - syd
	return row

	def main(N=10000, k_start=2_000_000):
	"""Sequential k = k_start .. k_start+N-1 (outside the 1..1M training range).
	Incremental P_{k+1} = P_k + G, ~1 modular inverse per step."""
	G = (Gx, Gy)
	print(f"computing {N} sequential points: k = {k_start} .. {k_start+N-1}")
	t0 = time.time()
	P = mul(k_start - 1, G) # one full scalar mult to reach k_start-1
	print(f" jump to k_start-1 done in {time.time()-t0:.1f}s; now walking +G")
	t0 = time.time()
	feats, labels, ks = [], [], []
	for i in range(N):
	k = k_start + i
	P = add(P, G)
	x, y = P
	feats.append(featurize(x, y))
	labels.append(k & 1)
	ks.append(k)
	print(f" walked {N} steps in {time.time()-t0:.1f}s")

	# column order must match training
	df_tr = pd.read_parquet("features.parquet")
	drop = {"k", "k_state", "abs_x_minus_y"}
	feat_cols = [c for c in df_tr.columns if c not in drop]
	X = np.array([[r[c] for c in feat_cols] for r in feats], dtype=np.float32)
	y = np.array(labels, dtype=np.int8)
	print(f"label balance in batch: mean={y.mean():.4f} (expect ~0.5)")

	# XGBoost
	bst = xgb.XGBClassifier(); bst.load_model("results/xgb.json")
	p_xgb = bst.predict_proba(X)[:,1]

	# LightGBM
	lgbm = lgb.Booster(model_file="results/lgbm.txt")
	p_lgb = lgbm.predict(X)

	# MLP
	Xtr_full = df_tr[feat_cols].astype(np.float32).values
	sc = StandardScaler().fit(Xtr_full[:int(0.7*len(Xtr_full))])
	Xs = sc.transform(X)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	D = X.shape[1]
	mlp = nn.Sequential(
	nn.Linear(D,512), nn.ReLU(),
	nn.Linear(512,512), nn.ReLU(),
	nn.Linear(512,256), nn.ReLU(),
	nn.Linear(256,1)
	).to(device)
	mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device))
	mlp.eval()
	with torch.no_grad():
	logits = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze(1).cpu().numpy()
	p_mlp = 1/(1+np.exp(-logits))

	p_ens = (p_xgb + p_lgb + p_mlp) / 3

	print(f"\n--- batch evaluation on {N} fresh (x, y) outside training range ---")
	def row(name, prob):
	acc = accuracy_score(y, prob>0.5); auc = roc_auc_score(y, prob)
	# 95% CI for accuracy via normal approx
	se = (acc(1-acc)/N)0.5; lo, hi = acc-1.96se, acc+1.96*se
	return f" {name:9} acc={acc:.4f} (95% CI {lo:.4f}-{hi:.4f}) auc={auc:.4f}"
	print(row("XGBoost", p_xgb))
	print(row("LightGBM", p_lgb))
	print(row("MLP", p_mlp))
	print(row("Ensemble", p_ens))
	print("\n50% would be coin-flip. If a CI excludes 0.5, that'd be statistically suspicious — but theory says it won't.")

	if __name__ == "__main__":
	main(int(sys.argv[1]) if len(sys.argv)>1 else 10000)