#!/usr/bin/env python3 """Predict k_state (parity of k) from (x, y) using the trained models.""" import sys, os, json import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler import xgboost as xgb import lightgbm as lgb import torch import torch.nn as nn p = 2**256 - 2**32 - 977 def on_curve(x, y): return (y*y - (x*x*x + 7)) % p == 0 def num_features(v, prefix): s = str(v); digs = [int(c) for c in s] return { f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0], f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100, f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs), f"{prefix}_digit_sum_mod_9": sum(digs) % 9, f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0), f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1), f"{prefix}_zero_count": s.count("0"), f"{prefix}_unique_digit_count": len(set(s)), f"{prefix}_bit_length": v.bit_length(), f"{prefix}_popcount": bin(v).count("1"), f"{prefix}_state": v % 2, f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5, f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11, f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17, f"{prefix}_mod_19": v % 19, } def featurize(x, y): sxd = sum(int(c) for c in str(x)) syd = sum(int(c) for c in str(y)) row = {} row.update(num_features(x, "x")) row.update(num_features(y, "y")) row["x_gt_y"] = int(x > y) row["digit_sum_diff_xy"] = sxd - syd return row def main(): x = int(sys.argv[1]); y = int(sys.argv[2]) if not on_curve(x, y): print("⚠ (x, y) is NOT on secp256k1. Refusing to predict on an invalid point.") sys.exit(1) print(f"X = {x}") print(f"Y = {y}") # Same column order as training: must match df.drop({k, k_state, abs_x_minus_y}) order. # Reproduce by reading the training parquet header to be safe. df = pd.read_parquet("features.parquet") drop = {"k", "k_state", "abs_x_minus_y"} feat_cols = [c for c in df.columns if c not in drop] feat = featurize(x, y) Xv = np.array([[feat[c] for c in feat_cols]], dtype=np.float32) print(f"\nfeatures: {len(feat_cols)} columns") # XGBoost bst = xgb.XGBClassifier() bst.load_model("results/xgb.json") p_xgb = float(bst.predict_proba(Xv)[0,1]) # LightGBM lgbm = lgb.Booster(model_file="results/lgbm.txt") p_lgb = float(lgbm.predict(Xv)[0]) # MLP — fit a scaler from training X for parity with training drop2 = drop Xtrain = df[[c for c in df.columns if c not in drop2]].astype(np.float32).values sc = StandardScaler().fit(Xtrain[:int(0.7*len(Xtrain))]) Xs = sc.transform(Xv) device = "cuda" if torch.cuda.is_available() else "cpu" D = Xv.shape[1] mlp = nn.Sequential( nn.Linear(D,512), nn.ReLU(), nn.Linear(512,512), nn.ReLU(), nn.Linear(512,256), nn.ReLU(), nn.Linear(256,1) ).to(device) mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device)) mlp.eval() with torch.no_grad(): logit = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze().item() p_mlp = 1/(1+np.exp(-logit)) avg = (p_xgb + p_lgb + p_mlp) / 3 print("\n--- predictions (probability that k is ODD) ---") print(f" XGBoost : {p_xgb:.4f} -> parity = {int(p_xgb>0.5)}") print(f" LightGBM : {p_lgb:.4f} -> parity = {int(p_lgb>0.5)}") print(f" MLP : {p_mlp:.4f} -> parity = {int(p_mlp>0.5)}") print(f" Ensemble : {avg:.4f} -> parity = {int(avg>0.5)}") print("\nReminder: all models train-time landed at 50% accuracy on held-out k's,") print("so any specific prediction here is essentially a coin flip with a confidence number attached.") if __name__ == "__main__": main()