Initial upload: negative-result study on secp256k1 parity prediction.

6b93c3b verified 6 days ago

3.93 kB

	#!/usr/bin/env python3
	"""Predict k_state (parity of k) from (x, y) using the trained models."""
	import sys, os, json
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import StandardScaler
	import xgboost as xgb
	import lightgbm as lgb
	import torch
	import torch.nn as nn

	p = 2256 - 232 - 977

	def on_curve(x, y): return (yy - (xx*x + 7)) % p == 0

	def num_features(v, prefix):
	s = str(v); digs = [int(c) for c in s]
	return {
	f"{prefix}_num_digits": len(s),
	f"{prefix}_first_digit": digs[0],
	f"{prefix}_last_digit": digs[-1],
	f"{prefix}_last2": v % 100,
	f"{prefix}_last3": v % 1000,
	f"{prefix}_digit_sum": sum(digs),
	f"{prefix}_digit_sum_mod_9": sum(digs) % 9,
	f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0),
	f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1),
	f"{prefix}_zero_count": s.count("0"),
	f"{prefix}_unique_digit_count": len(set(s)),
	f"{prefix}_bit_length": v.bit_length(),
	f"{prefix}_popcount": bin(v).count("1"),
	f"{prefix}_state": v % 2,
	f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5,
	f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11,
	f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17,
	f"{prefix}_mod_19": v % 19,
	}

	def featurize(x, y):
	sxd = sum(int(c) for c in str(x))
	syd = sum(int(c) for c in str(y))
	row = {}
	row.update(num_features(x, "x"))
	row.update(num_features(y, "y"))
	row["x_gt_y"] = int(x > y)
	row["digit_sum_diff_xy"] = sxd - syd
	return row

	def main():
	x = int(sys.argv[1]); y = int(sys.argv[2])
	if not on_curve(x, y):
	print("⚠ (x, y) is NOT on secp256k1. Refusing to predict on an invalid point.")
	sys.exit(1)
	print(f"X = {x}")
	print(f"Y = {y}")

	# Same column order as training: must match df.drop({k, k_state, abs_x_minus_y}) order.
	# Reproduce by reading the training parquet header to be safe.
	df = pd.read_parquet("features.parquet")
	drop = {"k", "k_state", "abs_x_minus_y"}
	feat_cols = [c for c in df.columns if c not in drop]

	feat = featurize(x, y)
	Xv = np.array([[feat[c] for c in feat_cols]], dtype=np.float32)
	print(f"\nfeatures: {len(feat_cols)} columns")

	# XGBoost
	bst = xgb.XGBClassifier()
	bst.load_model("results/xgb.json")
	p_xgb = float(bst.predict_proba(Xv)[0,1])

	# LightGBM
	lgbm = lgb.Booster(model_file="results/lgbm.txt")
	p_lgb = float(lgbm.predict(Xv)[0])

	# MLP — fit a scaler from training X for parity with training
	drop2 = drop
	Xtrain = df[[c for c in df.columns if c not in drop2]].astype(np.float32).values
	sc = StandardScaler().fit(Xtrain[:int(0.7*len(Xtrain))])
	Xs = sc.transform(Xv)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	D = Xv.shape[1]
	mlp = nn.Sequential(
	nn.Linear(D,512), nn.ReLU(),
	nn.Linear(512,512), nn.ReLU(),
	nn.Linear(512,256), nn.ReLU(),
	nn.Linear(256,1)
	).to(device)
	mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device))
	mlp.eval()
	with torch.no_grad():
	logit = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze().item()
	p_mlp = 1/(1+np.exp(-logit))

	avg = (p_xgb + p_lgb + p_mlp) / 3

	print("\n--- predictions (probability that k is ODD) ---")
	print(f" XGBoost : {p_xgb:.4f} -> parity = {int(p_xgb>0.5)}")
	print(f" LightGBM : {p_lgb:.4f} -> parity = {int(p_lgb>0.5)}")
	print(f" MLP : {p_mlp:.4f} -> parity = {int(p_mlp>0.5)}")
	print(f" Ensemble : {avg:.4f} -> parity = {int(avg>0.5)}")
	print("\nReminder: all models train-time landed at 50% accuracy on held-out k's,")
	print("so any specific prediction here is essentially a coin flip with a confidence number attached.")

	if __name__ == "__main__":
	main()