hackinet's picture
Initial upload: negative-result study on secp256k1 parity prediction.
6b93c3b verified
#!/usr/bin/env python3
"""Predict k_state (parity of k) from (x, y) using the trained models."""
import sys, os, json
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
import lightgbm as lgb
import torch
import torch.nn as nn
p = 2**256 - 2**32 - 977
def on_curve(x, y): return (y*y - (x*x*x + 7)) % p == 0
def num_features(v, prefix):
s = str(v); digs = [int(c) for c in s]
return {
f"{prefix}_num_digits": len(s),
f"{prefix}_first_digit": digs[0],
f"{prefix}_last_digit": digs[-1],
f"{prefix}_last2": v % 100,
f"{prefix}_last3": v % 1000,
f"{prefix}_digit_sum": sum(digs),
f"{prefix}_digit_sum_mod_9": sum(digs) % 9,
f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0),
f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1),
f"{prefix}_zero_count": s.count("0"),
f"{prefix}_unique_digit_count": len(set(s)),
f"{prefix}_bit_length": v.bit_length(),
f"{prefix}_popcount": bin(v).count("1"),
f"{prefix}_state": v % 2,
f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5,
f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11,
f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17,
f"{prefix}_mod_19": v % 19,
}
def featurize(x, y):
sxd = sum(int(c) for c in str(x))
syd = sum(int(c) for c in str(y))
row = {}
row.update(num_features(x, "x"))
row.update(num_features(y, "y"))
row["x_gt_y"] = int(x > y)
row["digit_sum_diff_xy"] = sxd - syd
return row
def main():
x = int(sys.argv[1]); y = int(sys.argv[2])
if not on_curve(x, y):
print("⚠ (x, y) is NOT on secp256k1. Refusing to predict on an invalid point.")
sys.exit(1)
print(f"X = {x}")
print(f"Y = {y}")
# Same column order as training: must match df.drop({k, k_state, abs_x_minus_y}) order.
# Reproduce by reading the training parquet header to be safe.
df = pd.read_parquet("features.parquet")
drop = {"k", "k_state", "abs_x_minus_y"}
feat_cols = [c for c in df.columns if c not in drop]
feat = featurize(x, y)
Xv = np.array([[feat[c] for c in feat_cols]], dtype=np.float32)
print(f"\nfeatures: {len(feat_cols)} columns")
# XGBoost
bst = xgb.XGBClassifier()
bst.load_model("results/xgb.json")
p_xgb = float(bst.predict_proba(Xv)[0,1])
# LightGBM
lgbm = lgb.Booster(model_file="results/lgbm.txt")
p_lgb = float(lgbm.predict(Xv)[0])
# MLP — fit a scaler from training X for parity with training
drop2 = drop
Xtrain = df[[c for c in df.columns if c not in drop2]].astype(np.float32).values
sc = StandardScaler().fit(Xtrain[:int(0.7*len(Xtrain))])
Xs = sc.transform(Xv)
device = "cuda" if torch.cuda.is_available() else "cpu"
D = Xv.shape[1]
mlp = nn.Sequential(
nn.Linear(D,512), nn.ReLU(),
nn.Linear(512,512), nn.ReLU(),
nn.Linear(512,256), nn.ReLU(),
nn.Linear(256,1)
).to(device)
mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device))
mlp.eval()
with torch.no_grad():
logit = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze().item()
p_mlp = 1/(1+np.exp(-logit))
avg = (p_xgb + p_lgb + p_mlp) / 3
print("\n--- predictions (probability that k is ODD) ---")
print(f" XGBoost : {p_xgb:.4f} -> parity = {int(p_xgb>0.5)}")
print(f" LightGBM : {p_lgb:.4f} -> parity = {int(p_lgb>0.5)}")
print(f" MLP : {p_mlp:.4f} -> parity = {int(p_mlp>0.5)}")
print(f" Ensemble : {avg:.4f} -> parity = {int(avg>0.5)}")
print("\nReminder: all models train-time landed at 50% accuracy on held-out k's,")
print("so any specific prediction here is essentially a coin flip with a confidence number attached.")
if __name__ == "__main__":
main()