| |
| """Predict k_state (parity of k) from (x, y) using the trained models.""" |
| import sys, os, json |
| import numpy as np |
| import pandas as pd |
| from sklearn.preprocessing import StandardScaler |
| import xgboost as xgb |
| import lightgbm as lgb |
| import torch |
| import torch.nn as nn |
|
|
| p = 2**256 - 2**32 - 977 |
|
|
| def on_curve(x, y): return (y*y - (x*x*x + 7)) % p == 0 |
|
|
| def num_features(v, prefix): |
| s = str(v); digs = [int(c) for c in s] |
| return { |
| f"{prefix}_num_digits": len(s), |
| f"{prefix}_first_digit": digs[0], |
| f"{prefix}_last_digit": digs[-1], |
| f"{prefix}_last2": v % 100, |
| f"{prefix}_last3": v % 1000, |
| f"{prefix}_digit_sum": sum(digs), |
| f"{prefix}_digit_sum_mod_9": sum(digs) % 9, |
| f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0), |
| f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1), |
| f"{prefix}_zero_count": s.count("0"), |
| f"{prefix}_unique_digit_count": len(set(s)), |
| f"{prefix}_bit_length": v.bit_length(), |
| f"{prefix}_popcount": bin(v).count("1"), |
| f"{prefix}_state": v % 2, |
| f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5, |
| f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11, |
| f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17, |
| f"{prefix}_mod_19": v % 19, |
| } |
|
|
| def featurize(x, y): |
| sxd = sum(int(c) for c in str(x)) |
| syd = sum(int(c) for c in str(y)) |
| row = {} |
| row.update(num_features(x, "x")) |
| row.update(num_features(y, "y")) |
| row["x_gt_y"] = int(x > y) |
| row["digit_sum_diff_xy"] = sxd - syd |
| return row |
|
|
| def main(): |
| x = int(sys.argv[1]); y = int(sys.argv[2]) |
| if not on_curve(x, y): |
| print("⚠ (x, y) is NOT on secp256k1. Refusing to predict on an invalid point.") |
| sys.exit(1) |
| print(f"X = {x}") |
| print(f"Y = {y}") |
|
|
| |
| |
| df = pd.read_parquet("features.parquet") |
| drop = {"k", "k_state", "abs_x_minus_y"} |
| feat_cols = [c for c in df.columns if c not in drop] |
|
|
| feat = featurize(x, y) |
| Xv = np.array([[feat[c] for c in feat_cols]], dtype=np.float32) |
| print(f"\nfeatures: {len(feat_cols)} columns") |
|
|
| |
| bst = xgb.XGBClassifier() |
| bst.load_model("results/xgb.json") |
| p_xgb = float(bst.predict_proba(Xv)[0,1]) |
|
|
| |
| lgbm = lgb.Booster(model_file="results/lgbm.txt") |
| p_lgb = float(lgbm.predict(Xv)[0]) |
|
|
| |
| drop2 = drop |
| Xtrain = df[[c for c in df.columns if c not in drop2]].astype(np.float32).values |
| sc = StandardScaler().fit(Xtrain[:int(0.7*len(Xtrain))]) |
| Xs = sc.transform(Xv) |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| D = Xv.shape[1] |
| mlp = nn.Sequential( |
| nn.Linear(D,512), nn.ReLU(), |
| nn.Linear(512,512), nn.ReLU(), |
| nn.Linear(512,256), nn.ReLU(), |
| nn.Linear(256,1) |
| ).to(device) |
| mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device)) |
| mlp.eval() |
| with torch.no_grad(): |
| logit = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze().item() |
| p_mlp = 1/(1+np.exp(-logit)) |
|
|
| avg = (p_xgb + p_lgb + p_mlp) / 3 |
|
|
| print("\n--- predictions (probability that k is ODD) ---") |
| print(f" XGBoost : {p_xgb:.4f} -> parity = {int(p_xgb>0.5)}") |
| print(f" LightGBM : {p_lgb:.4f} -> parity = {int(p_lgb>0.5)}") |
| print(f" MLP : {p_mlp:.4f} -> parity = {int(p_mlp>0.5)}") |
| print(f" Ensemble : {avg:.4f} -> parity = {int(avg>0.5)}") |
| print("\nReminder: all models train-time landed at 50% accuracy on held-out k's,") |
| print("so any specific prediction here is essentially a coin flip with a confidence number attached.") |
|
|
| if __name__ == "__main__": |
| main() |
|
|