| """ |
| Train a morphism model on Eigenverse structure-preserving maps. |
| |
| Architecture: MorphismNet β a multi-head model where: |
| - Shared encoder learns the common Eigenverse structure |
| - Per-morphism heads specialize in each transformation |
| - Domain embedding distinguishes β vs GF(p) |
| - Residual prediction head learns to verify morphism properties |
| (all residuals should be β 0 when the morphism holds) |
| |
| The model learns the Eigenverse's "grammar" β the rules connecting |
| different mathematical objects through structure-preserving maps. |
| """ |
|
|
| import numpy as np |
| import torch |
| import torch.nn as nn |
| import torch.optim as optim |
| from torch.utils.data import DataLoader, TensorDataset |
| import os |
| import json |
| import time |
|
|
| |
| |
| |
|
|
| print("Loading dataset...") |
| inputs = np.load("data/inputs.npy") |
| outputs = np.load("data/outputs.npy") |
| morphism_ids = np.load("data/morphism_ids.npy") |
| domain_ids = np.load("data/domain_ids.npy") |
|
|
| N = len(inputs) |
| IN_DIM = inputs.shape[1] |
| OUT_DIM = outputs.shape[1] |
| N_MORPHISMS = 7 |
| N_DOMAINS = 2 |
|
|
| print(f"Dataset: {N} samples, in={IN_DIM}, out={OUT_DIM}") |
|
|
| |
| perm = np.random.permutation(N) |
| split = int(0.9 * N) |
| train_idx, val_idx = perm[:split], perm[split:] |
|
|
| X_train = torch.tensor(inputs[train_idx], dtype=torch.float32) |
| Y_train = torch.tensor(outputs[train_idx], dtype=torch.float32) |
| M_train = torch.tensor(morphism_ids[train_idx], dtype=torch.long) |
| D_train = torch.tensor(domain_ids[train_idx], dtype=torch.long) |
|
|
| X_val = torch.tensor(inputs[val_idx], dtype=torch.float32) |
| Y_val = torch.tensor(outputs[val_idx], dtype=torch.float32) |
| M_val = torch.tensor(morphism_ids[val_idx], dtype=torch.long) |
| D_val = torch.tensor(domain_ids[val_idx], dtype=torch.long) |
|
|
| train_ds = TensorDataset(X_train, Y_train, M_train, D_train) |
| val_ds = TensorDataset(X_val, Y_val, M_val, D_val) |
|
|
| BATCH = 512 |
| train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0) |
| val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=0) |
|
|
|
|
| |
| |
| |
|
|
| class MorphismNet(nn.Module): |
| """Multi-head network for Eigenverse morphism learning. |
| |
| Architecture: |
| - Morphism embedding (7 types) + Domain embedding (2 types) |
| - Shared encoder: input + embeddings β hidden representation |
| - Per-morphism decoder heads: hidden β output prediction |
| - Residual head: predicts whether the morphism property holds (β 0) |
| """ |
|
|
| def __init__(self, in_dim=4, out_dim=6, hidden=256, n_morphisms=7, n_domains=2): |
| super().__init__() |
| self.n_morphisms = n_morphisms |
| self.out_dim = out_dim |
|
|
| |
| self.morph_embed = nn.Embedding(n_morphisms, 32) |
| self.domain_embed = nn.Embedding(n_domains, 16) |
|
|
| |
| enc_in = in_dim + 32 + 16 |
| self.encoder = nn.Sequential( |
| nn.Linear(enc_in, hidden), |
| nn.GELU(), |
| nn.LayerNorm(hidden), |
| nn.Linear(hidden, hidden), |
| nn.GELU(), |
| nn.LayerNorm(hidden), |
| nn.Linear(hidden, hidden), |
| nn.GELU(), |
| nn.LayerNorm(hidden), |
| ) |
|
|
| |
| self.heads = nn.ModuleList([ |
| nn.Sequential( |
| nn.Linear(hidden, hidden // 2), |
| nn.GELU(), |
| nn.Linear(hidden // 2, out_dim), |
| ) |
| for _ in range(n_morphisms) |
| ]) |
|
|
| |
| |
| self.residual_head = nn.Sequential( |
| nn.Linear(hidden, 64), |
| nn.GELU(), |
| nn.Linear(64, 1), |
| nn.Sigmoid(), |
| ) |
|
|
| def forward(self, x, morph_id, domain_id): |
| |
| m_emb = self.morph_embed(morph_id) |
| d_emb = self.domain_embed(domain_id) |
|
|
| |
| h = torch.cat([x, m_emb, d_emb], dim=-1) |
|
|
| |
| h = self.encoder(h) |
|
|
| |
| out = torch.zeros(x.shape[0], self.out_dim, device=x.device) |
| for m in range(self.n_morphisms): |
| mask = (morph_id == m) |
| if mask.any(): |
| out[mask] = self.heads[m](h[mask]) |
|
|
| |
| residual_prob = self.residual_head(h).squeeze(-1) |
|
|
| return out, residual_prob |
|
|
|
|
| |
| |
| |
|
|
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| print(f"Device: {device}") |
|
|
| model = MorphismNet().to(device) |
| optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4) |
| scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50) |
|
|
| |
| mse_loss = nn.MSELoss() |
| bce_loss = nn.BCELoss() |
|
|
| |
| |
| RESIDUAL_COL = {0: 2, 1: 2, 2: 2, 3: 2, 4: 5, 5: 4, 6: 2} |
|
|
| EPOCHS = 50 |
| best_val_loss = float('inf') |
| history = [] |
|
|
| print(f"\nTraining MorphismNet ({sum(p.numel() for p in model.parameters()):,} params)") |
| print(f"Epochs: {EPOCHS}, Batch: {BATCH}") |
| print("=" * 60) |
|
|
| for epoch in range(EPOCHS): |
| model.train() |
| train_mse, train_n = 0.0, 0 |
| t0 = time.time() |
|
|
| for x, y, m, d in train_dl: |
| x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device) |
|
|
| pred, res_prob = model(x, m, d) |
|
|
| |
| loss_mse = mse_loss(pred, y) |
|
|
| |
| |
| res_labels = torch.zeros(x.shape[0], device=device) |
| for mi in range(7): |
| mask = (m == mi) |
| if mask.any(): |
| col = RESIDUAL_COL[mi] |
| if col < y.shape[1]: |
| res_labels[mask] = (y[mask, col].abs() < 0.01).float() |
|
|
| loss_res = bce_loss(res_prob, res_labels) |
|
|
| loss = loss_mse + 0.1 * loss_res |
|
|
| optimizer.zero_grad() |
| loss.backward() |
| torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) |
| optimizer.step() |
|
|
| train_mse += loss_mse.item() * x.shape[0] |
| train_n += x.shape[0] |
|
|
| scheduler.step() |
|
|
| |
| model.eval() |
| val_mse, val_res_acc, val_n = 0.0, 0.0, 0 |
| with torch.no_grad(): |
| for x, y, m, d in val_dl: |
| x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device) |
| pred, res_prob = model(x, m, d) |
| val_mse += mse_loss(pred, y).item() * x.shape[0] |
|
|
| |
| for mi in range(7): |
| mask = (m == mi) |
| if mask.any(): |
| col = RESIDUAL_COL[mi] |
| if col < y.shape[1]: |
| labels = (y[mask, col].abs() < 0.01).float() |
| preds = (res_prob[mask] > 0.5).float() |
| val_res_acc += (preds == labels).sum().item() |
| val_n += x.shape[0] |
|
|
| train_mse /= train_n |
| val_mse /= val_n |
| val_res_acc /= max(val_n, 1) |
| elapsed = time.time() - t0 |
|
|
| history.append({ |
| "epoch": epoch + 1, |
| "train_mse": train_mse, |
| "val_mse": val_mse, |
| "val_residual_acc": val_res_acc, |
| "lr": scheduler.get_last_lr()[0], |
| "time": elapsed, |
| }) |
|
|
| if val_mse < best_val_loss: |
| best_val_loss = val_mse |
| torch.save(model.state_dict(), "morphism_net.pt") |
| marker = " β
" |
| else: |
| marker = "" |
|
|
| if (epoch + 1) % 5 == 0 or epoch == 0: |
| print(f" [{epoch+1:3d}/{EPOCHS}] train_mse={train_mse:.6f} " |
| f"val_mse={val_mse:.6f} res_acc={val_res_acc:.3f} " |
| f"lr={scheduler.get_last_lr()[0]:.2e} ({elapsed:.1f}s){marker}") |
|
|
| print("=" * 60) |
| print(f"Best val MSE: {best_val_loss:.6f}") |
|
|
| |
| |
| |
|
|
| print("\nPer-morphism validation MSE:") |
| model.load_state_dict(torch.load("morphism_net.pt", weights_only=True)) |
| model.eval() |
|
|
| names = ["Β§1 coherence_even", "Β§2 palindrome_odd", "Β§3 lyapunov_bridge", |
| "Β§4 ΞΌ_isometry", "Β§5 orbit_hom", "Β§6 reality_linear", "Β§7 composition"] |
|
|
| with torch.no_grad(): |
| x_all = X_val.to(device) |
| y_all = Y_val.to(device) |
| m_all = M_val.to(device) |
| d_all = D_val.to(device) |
| pred_all, res_all = model(x_all, m_all, d_all) |
|
|
| for mi in range(7): |
| mask = (m_all == mi) |
| if mask.sum() > 0: |
| mse = ((pred_all[mask] - y_all[mask]) ** 2).mean().item() |
| |
| col = RESIDUAL_COL[mi] |
| if col < y_all.shape[1]: |
| true_res = y_all[mask, col].abs() |
| pred_res = pred_all[mask, col].abs() |
| res_mse = ((pred_res - true_res) ** 2).mean().item() |
| else: |
| res_mse = 0.0 |
| print(f" {names[mi]:25s}: MSE={mse:.6f}, residual_MSE={res_mse:.6f}, n={mask.sum().item()}") |
|
|
| |
| |
| |
|
|
| print("\nMod paradox test (Β§1 coherence_even):") |
| with torch.no_grad(): |
| mask_r = (m_all == 0) & (d_all == 0) |
| mask_gfp = (m_all == 0) & (d_all == 1) |
|
|
| if mask_r.sum() > 0: |
| mse_r = ((pred_all[mask_r] - y_all[mask_r]) ** 2).mean().item() |
| res_r = y_all[mask_r, 2].abs().mean().item() |
| pred_res_r = pred_all[mask_r, 2].abs().mean().item() |
| print(f" β domain: MSE={mse_r:.6f}, true_residual={res_r:.2e}, " |
| f"pred_residual={pred_res_r:.2e}, n={mask_r.sum().item()}") |
|
|
| if mask_gfp.sum() > 0: |
| mse_gfp = ((pred_all[mask_gfp] - y_all[mask_gfp]) ** 2).mean().item() |
| res_gfp = y_all[mask_gfp, 2].abs().mean().item() |
| pred_res_gfp = pred_all[mask_gfp, 2].abs().mean().item() |
| print(f" GF(p) domain: MSE={mse_gfp:.6f}, true_residual={res_gfp:.2e}, " |
| f"pred_residual={pred_res_gfp:.2e}, n={mask_gfp.sum().item()}") |
| print(f"\n The paradox: C(r)=C(1/r) holds exactly over β (residualβ0)") |
| print(f" but over GF(p), the 'residual' is nonzero β mod breaks symmetry.") |
| else: |
| print(f" (No GF(p) samples in validation set)") |
|
|
| |
| with open("training_history.json", "w") as f: |
| json.dump(history, f, indent=2) |
|
|
| |
| info = { |
| "name": "MorphismNet", |
| "params": sum(p.numel() for p in model.parameters()), |
| "morphisms": names, |
| "best_val_mse": best_val_loss, |
| "epochs": EPOCHS, |
| "dataset_size": N, |
| "architecture": "shared_encoder(3x256) + 7_heads(128β6) + residual_classifier", |
| } |
| with open("model_info.json", "w") as f: |
| json.dump(info, f, indent=2) |
|
|
| print(f"\nModel saved: morphism_net.pt ({sum(p.numel() for p in model.parameters()):,} params)") |
| print("Done. π§¬") |
|
|