#!/usr/bin/env python3 """Build an HTML page with model predictions for a range of k. Columns: k | MLP | XGBoost | LightGBM Each cell: 'O ✓' or 'E ✗' (O=odd, E=even ; check=correct, cross=wrong) """ import os, sys, time import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler import xgboost as xgb, lightgbm as lgb import torch, torch.nn as nn class BitTransformer(nn.Module): def __init__(self, seq_len=512, d=128, nhead=4, nlayers=4): super().__init__() self.tok = nn.Embedding(2, d) self.pos = nn.Parameter(torch.randn(1, seq_len, d) * 0.02) self.cls = nn.Parameter(torch.randn(1, 1, d) * 0.02) enc = nn.TransformerEncoderLayer(d_model=d, nhead=nhead, dim_feedforward=4*d, batch_first=True, activation="gelu", norm_first=True) self.enc = nn.TransformerEncoder(enc, num_layers=nlayers) self.head = nn.Linear(d, 1) def forward(self, x_bits): h = self.tok(x_bits) + self.pos cls = self.cls.expand(h.size(0), -1, -1) h = torch.cat([cls, h], dim=1) h = self.enc(h) return self.head(h[:, 0, :]).squeeze(1) def bits_of(x, y): arr = np.empty(512, dtype=np.int64) for j in range(256): arr[j] = (x >> (255 - j)) & 1 arr[256 + j] = (y >> (255 - j)) & 1 return arr p = 2**256 - 2**32 - 977 Gx = 55066263022277343669578718895168534326250603453777594175500187360389116729240 Gy = 32670510020758816978083085130507043184471273380659243275938904335757337482424 def inv(a): return pow(a, p-2, p) def add(P, Q): if P is None: return Q if Q is None: return P x1,y1=P; x2,y2=Q if x1==x2 and (y1+y2)%p==0: return None m=(3*x1*x1)*inv(2*y1)%p if P==Q else (y2-y1)*inv(x2-x1)%p x3=(m*m-x1-x2)%p return (x3,(m*(x1-x3)-y1)%p) def mul(k, P): R=None while k: if k&1: R=add(R,P) P=add(P,P); k>>=1 return R def num_features(v, prefix): s = str(v); digs = [int(c) for c in s] return { f"{prefix}_num_digits": len(s), f"{prefix}_first_digit": digs[0], f"{prefix}_last_digit": digs[-1], f"{prefix}_last2": v % 100, f"{prefix}_last3": v % 1000, f"{prefix}_digit_sum": sum(digs), f"{prefix}_digit_sum_mod_9": sum(digs) % 9, f"{prefix}_even_digit_count": sum(1 for d in digs if d%2==0), f"{prefix}_odd_digit_count": sum(1 for d in digs if d%2==1), f"{prefix}_zero_count": s.count("0"), f"{prefix}_unique_digit_count": len(set(s)), f"{prefix}_bit_length": v.bit_length(), f"{prefix}_popcount": bin(v).count("1"), f"{prefix}_state": v % 2, f"{prefix}_mod_3": v % 3, f"{prefix}_mod_5": v % 5, f"{prefix}_mod_7": v % 7, f"{prefix}_mod_11": v % 11, f"{prefix}_mod_13": v % 13, f"{prefix}_mod_17": v % 17, f"{prefix}_mod_19": v % 19, } def featurize(x, y): sxd = sum(int(c) for c in str(x)); syd = sum(int(c) for c in str(y)) row = {} row.update(num_features(x, "x")); row.update(num_features(y, "y")) row["x_gt_y"] = int(x > y) row["digit_sum_diff_xy"] = sxd - syd return row def main(N=2000, k_start=2_000_000, out="/tmp/predictions.html"): G = (Gx, Gy) print(f"computing kG for k = {k_start} .. {k_start+N-1}") t0 = time.time() P = mul(k_start - 1, G) feats, ks, truths, bits = [], [], [], [] for i in range(N): k = k_start + i P = add(P, G) feats.append(featurize(*P)) bits.append(bits_of(*P)) ks.append(k); truths.append(k & 1) print(f" {N} points done in {time.time()-t0:.1f}s") bits_arr = np.stack(bits).astype(np.int64) df_tr = pd.read_parquet("features.parquet") drop = {"k","k_state","abs_x_minus_y"} feat_cols = [c for c in df_tr.columns if c not in drop] X = np.array([[r[c] for c in feat_cols] for r in feats], dtype=np.float32) y = np.array(truths, dtype=np.int8) bst = xgb.XGBClassifier(); bst.load_model("results/xgb.json") p_xgb = bst.predict_proba(X)[:,1] lgbm = lgb.Booster(model_file="results/lgbm.txt") p_lgb = lgbm.predict(X) Xtr = df_tr[feat_cols].astype(np.float32).values sc = StandardScaler().fit(Xtr[:int(0.7*len(Xtr))]) Xs = sc.transform(X) device = "cuda" if torch.cuda.is_available() else "cpu" D = X.shape[1] mlp = nn.Sequential(nn.Linear(D,512),nn.ReLU(),nn.Linear(512,512),nn.ReLU(), nn.Linear(512,256),nn.ReLU(),nn.Linear(256,1)).to(device) mlp.load_state_dict(torch.load("results/mlp.pt", map_location=device)) mlp.eval() with torch.no_grad(): logits = mlp(torch.tensor(Xs, dtype=torch.float32, device=device)).squeeze(1).cpu().numpy() p_mlp = 1/(1+np.exp(-logits)) # bit-transformer bx = BitTransformer().to(device) bx.load_state_dict(torch.load("results/bit_xformer.pt", map_location=device)) bx.eval() p_bx = [] with torch.no_grad(): for i in range(0, N, 4096): chunk = torch.tensor(bits_arr[i:i+4096], dtype=torch.long, device=device) p_bx.append(torch.sigmoid(bx(chunk)).cpu().numpy()) p_bx = np.concatenate(p_bx) pred_mlp = (p_mlp > 0.5).astype(int) pred_xgb = (p_xgb > 0.5).astype(int) pred_lgb = (p_lgb > 0.5).astype(int) pred_bx = (p_bx > 0.5).astype(int) def cell(pred, truth): letter = "O" if pred == 1 else "E" ok = (pred == truth) cls = "ok" if ok else "bad" mark = "✓" if ok else "✗" return f'
truth column = actual parity of k (O=odd, E=even). Model columns show prediction + ✓ (correct) or ✗ (wrong).
| k | truth | MLP | XGBoost | LightGBM | BitXformer |
|---|