""" Train a morphism model on Eigenverse structure-preserving maps. Architecture: MorphismNet — a multi-head model where: - Shared encoder learns the common Eigenverse structure - Per-morphism heads specialize in each transformation - Domain embedding distinguishes ℝ vs GF(p) - Residual prediction head learns to verify morphism properties (all residuals should be ≈ 0 when the morphism holds) The model learns the Eigenverse's "grammar" — the rules connecting different mathematical objects through structure-preserving maps. """ import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset import os import json import time # ════════════════════════════════════════════════════════════════════════ # Load data # ════════════════════════════════════════════════════════════════════════ print("Loading dataset...") inputs = np.load("data/inputs.npy") outputs = np.load("data/outputs.npy") morphism_ids = np.load("data/morphism_ids.npy") domain_ids = np.load("data/domain_ids.npy") N = len(inputs) IN_DIM = inputs.shape[1] # 4 OUT_DIM = outputs.shape[1] # 6 N_MORPHISMS = 7 # 0-6 N_DOMAINS = 2 # ℝ, GF(p) print(f"Dataset: {N} samples, in={IN_DIM}, out={OUT_DIM}") # Train/val split (90/10) perm = np.random.permutation(N) split = int(0.9 * N) train_idx, val_idx = perm[:split], perm[split:] X_train = torch.tensor(inputs[train_idx], dtype=torch.float32) Y_train = torch.tensor(outputs[train_idx], dtype=torch.float32) M_train = torch.tensor(morphism_ids[train_idx], dtype=torch.long) D_train = torch.tensor(domain_ids[train_idx], dtype=torch.long) X_val = torch.tensor(inputs[val_idx], dtype=torch.float32) Y_val = torch.tensor(outputs[val_idx], dtype=torch.float32) M_val = torch.tensor(morphism_ids[val_idx], dtype=torch.long) D_val = torch.tensor(domain_ids[val_idx], dtype=torch.long) train_ds = TensorDataset(X_train, Y_train, M_train, D_train) val_ds = TensorDataset(X_val, Y_val, M_val, D_val) BATCH = 512 train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0) val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=0) # ════════════════════════════════════════════════════════════════════════ # Model: MorphismNet # ════════════════════════════════════════════════════════════════════════ class MorphismNet(nn.Module): """Multi-head network for Eigenverse morphism learning. Architecture: - Morphism embedding (7 types) + Domain embedding (2 types) - Shared encoder: input + embeddings → hidden representation - Per-morphism decoder heads: hidden → output prediction - Residual head: predicts whether the morphism property holds (≈ 0) """ def __init__(self, in_dim=4, out_dim=6, hidden=256, n_morphisms=7, n_domains=2): super().__init__() self.n_morphisms = n_morphisms self.out_dim = out_dim # Embeddings self.morph_embed = nn.Embedding(n_morphisms, 32) self.domain_embed = nn.Embedding(n_domains, 16) # Shared encoder enc_in = in_dim + 32 + 16 # input + morph_embed + domain_embed self.encoder = nn.Sequential( nn.Linear(enc_in, hidden), nn.GELU(), nn.LayerNorm(hidden), nn.Linear(hidden, hidden), nn.GELU(), nn.LayerNorm(hidden), nn.Linear(hidden, hidden), nn.GELU(), nn.LayerNorm(hidden), ) # Per-morphism heads self.heads = nn.ModuleList([ nn.Sequential( nn.Linear(hidden, hidden // 2), nn.GELU(), nn.Linear(hidden // 2, out_dim), ) for _ in range(n_morphisms) ]) # Residual classifier: does the morphism property hold? # (binary: 1 = residual ≈ 0, i.e. property holds) self.residual_head = nn.Sequential( nn.Linear(hidden, 64), nn.GELU(), nn.Linear(64, 1), nn.Sigmoid(), ) def forward(self, x, morph_id, domain_id): # Embeddings m_emb = self.morph_embed(morph_id) # (B, 32) d_emb = self.domain_embed(domain_id) # (B, 16) # Concatenate h = torch.cat([x, m_emb, d_emb], dim=-1) # (B, in+48) # Encode h = self.encoder(h) # (B, hidden) # Route to per-morphism heads out = torch.zeros(x.shape[0], self.out_dim, device=x.device) for m in range(self.n_morphisms): mask = (morph_id == m) if mask.any(): out[mask] = self.heads[m](h[mask]) # Residual prediction residual_prob = self.residual_head(h).squeeze(-1) # (B,) return out, residual_prob # ════════════════════════════════════════════════════════════════════════ # Training # ════════════════════════════════════════════════════════════════════════ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}") model = MorphismNet().to(device) optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50) # Loss: MSE for output prediction + BCE for residual classification mse_loss = nn.MSELoss() bce_loss = nn.BCELoss() # For residual labels: residual columns are near 0 when morphism holds # Column indices for residual per morphism: col 2 for most, col 5 for orbit RESIDUAL_COL = {0: 2, 1: 2, 2: 2, 3: 2, 4: 5, 5: 4, 6: 2} EPOCHS = 50 best_val_loss = float('inf') history = [] print(f"\nTraining MorphismNet ({sum(p.numel() for p in model.parameters()):,} params)") print(f"Epochs: {EPOCHS}, Batch: {BATCH}") print("=" * 60) for epoch in range(EPOCHS): model.train() train_mse, train_n = 0.0, 0 t0 = time.time() for x, y, m, d in train_dl: x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device) pred, res_prob = model(x, m, d) # Output MSE loss_mse = mse_loss(pred, y) # Residual labels: 1 if morphism holds (residual near 0) # Use the actual output residuals to generate labels res_labels = torch.zeros(x.shape[0], device=device) for mi in range(7): mask = (m == mi) if mask.any(): col = RESIDUAL_COL[mi] if col < y.shape[1]: res_labels[mask] = (y[mask, col].abs() < 0.01).float() loss_res = bce_loss(res_prob, res_labels) loss = loss_mse + 0.1 * loss_res optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() train_mse += loss_mse.item() * x.shape[0] train_n += x.shape[0] scheduler.step() # Validation model.eval() val_mse, val_res_acc, val_n = 0.0, 0.0, 0 with torch.no_grad(): for x, y, m, d in val_dl: x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device) pred, res_prob = model(x, m, d) val_mse += mse_loss(pred, y).item() * x.shape[0] # Residual accuracy for mi in range(7): mask = (m == mi) if mask.any(): col = RESIDUAL_COL[mi] if col < y.shape[1]: labels = (y[mask, col].abs() < 0.01).float() preds = (res_prob[mask] > 0.5).float() val_res_acc += (preds == labels).sum().item() val_n += x.shape[0] train_mse /= train_n val_mse /= val_n val_res_acc /= max(val_n, 1) elapsed = time.time() - t0 history.append({ "epoch": epoch + 1, "train_mse": train_mse, "val_mse": val_mse, "val_residual_acc": val_res_acc, "lr": scheduler.get_last_lr()[0], "time": elapsed, }) if val_mse < best_val_loss: best_val_loss = val_mse torch.save(model.state_dict(), "morphism_net.pt") marker = " ★" else: marker = "" if (epoch + 1) % 5 == 0 or epoch == 0: print(f" [{epoch+1:3d}/{EPOCHS}] train_mse={train_mse:.6f} " f"val_mse={val_mse:.6f} res_acc={val_res_acc:.3f} " f"lr={scheduler.get_last_lr()[0]:.2e} ({elapsed:.1f}s){marker}") print("=" * 60) print(f"Best val MSE: {best_val_loss:.6f}") # ════════════════════════════════════════════════════════════════════════ # Per-morphism evaluation # ════════════════════════════════════════════════════════════════════════ print("\nPer-morphism validation MSE:") model.load_state_dict(torch.load("morphism_net.pt", weights_only=True)) model.eval() names = ["§1 coherence_even", "§2 palindrome_odd", "§3 lyapunov_bridge", "§4 μ_isometry", "§5 orbit_hom", "§6 reality_linear", "§7 composition"] with torch.no_grad(): x_all = X_val.to(device) y_all = Y_val.to(device) m_all = M_val.to(device) d_all = D_val.to(device) pred_all, res_all = model(x_all, m_all, d_all) for mi in range(7): mask = (m_all == mi) if mask.sum() > 0: mse = ((pred_all[mask] - y_all[mask]) ** 2).mean().item() # Check residual accuracy col = RESIDUAL_COL[mi] if col < y_all.shape[1]: true_res = y_all[mask, col].abs() pred_res = pred_all[mask, col].abs() res_mse = ((pred_res - true_res) ** 2).mean().item() else: res_mse = 0.0 print(f" {names[mi]:25s}: MSE={mse:.6f}, residual_MSE={res_mse:.6f}, n={mask.sum().item()}") # ════════════════════════════════════════════════════════════════════════ # Test the mod paradox: does the model distinguish ℝ from GF(p)? # ════════════════════════════════════════════════════════════════════════ print("\nMod paradox test (§1 coherence_even):") with torch.no_grad(): mask_r = (m_all == 0) & (d_all == 0) mask_gfp = (m_all == 0) & (d_all == 1) if mask_r.sum() > 0: mse_r = ((pred_all[mask_r] - y_all[mask_r]) ** 2).mean().item() res_r = y_all[mask_r, 2].abs().mean().item() pred_res_r = pred_all[mask_r, 2].abs().mean().item() print(f" ℝ domain: MSE={mse_r:.6f}, true_residual={res_r:.2e}, " f"pred_residual={pred_res_r:.2e}, n={mask_r.sum().item()}") if mask_gfp.sum() > 0: mse_gfp = ((pred_all[mask_gfp] - y_all[mask_gfp]) ** 2).mean().item() res_gfp = y_all[mask_gfp, 2].abs().mean().item() pred_res_gfp = pred_all[mask_gfp, 2].abs().mean().item() print(f" GF(p) domain: MSE={mse_gfp:.6f}, true_residual={res_gfp:.2e}, " f"pred_residual={pred_res_gfp:.2e}, n={mask_gfp.sum().item()}") print(f"\n The paradox: C(r)=C(1/r) holds exactly over ℝ (residual≈0)") print(f" but over GF(p), the 'residual' is nonzero — mod breaks symmetry.") else: print(f" (No GF(p) samples in validation set)") # Save history with open("training_history.json", "w") as f: json.dump(history, f, indent=2) # Save model info info = { "name": "MorphismNet", "params": sum(p.numel() for p in model.parameters()), "morphisms": names, "best_val_mse": best_val_loss, "epochs": EPOCHS, "dataset_size": N, "architecture": "shared_encoder(3x256) + 7_heads(128→6) + residual_classifier", } with open("model_info.json", "w") as f: json.dump(info, f, indent=2) print(f"\nModel saved: morphism_net.pt ({sum(p.numel() for p in model.parameters()):,} params)") print("Done. 🧬")