morphism-net / train.py
beanapologist's picture
MorphismNet: 399K params trained on Eigenverse morphisms, mod paradox quantified
fcc3e72 verified
"""
Train a morphism model on Eigenverse structure-preserving maps.
Architecture: MorphismNet β€” a multi-head model where:
- Shared encoder learns the common Eigenverse structure
- Per-morphism heads specialize in each transformation
- Domain embedding distinguishes ℝ vs GF(p)
- Residual prediction head learns to verify morphism properties
(all residuals should be β‰ˆ 0 when the morphism holds)
The model learns the Eigenverse's "grammar" β€” the rules connecting
different mathematical objects through structure-preserving maps.
"""
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os
import json
import time
# ════════════════════════════════════════════════════════════════════════
# Load data
# ════════════════════════════════════════════════════════════════════════
print("Loading dataset...")
inputs = np.load("data/inputs.npy")
outputs = np.load("data/outputs.npy")
morphism_ids = np.load("data/morphism_ids.npy")
domain_ids = np.load("data/domain_ids.npy")
N = len(inputs)
IN_DIM = inputs.shape[1] # 4
OUT_DIM = outputs.shape[1] # 6
N_MORPHISMS = 7 # 0-6
N_DOMAINS = 2 # ℝ, GF(p)
print(f"Dataset: {N} samples, in={IN_DIM}, out={OUT_DIM}")
# Train/val split (90/10)
perm = np.random.permutation(N)
split = int(0.9 * N)
train_idx, val_idx = perm[:split], perm[split:]
X_train = torch.tensor(inputs[train_idx], dtype=torch.float32)
Y_train = torch.tensor(outputs[train_idx], dtype=torch.float32)
M_train = torch.tensor(morphism_ids[train_idx], dtype=torch.long)
D_train = torch.tensor(domain_ids[train_idx], dtype=torch.long)
X_val = torch.tensor(inputs[val_idx], dtype=torch.float32)
Y_val = torch.tensor(outputs[val_idx], dtype=torch.float32)
M_val = torch.tensor(morphism_ids[val_idx], dtype=torch.long)
D_val = torch.tensor(domain_ids[val_idx], dtype=torch.long)
train_ds = TensorDataset(X_train, Y_train, M_train, D_train)
val_ds = TensorDataset(X_val, Y_val, M_val, D_val)
BATCH = 512
train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0)
val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=0)
# ════════════════════════════════════════════════════════════════════════
# Model: MorphismNet
# ════════════════════════════════════════════════════════════════════════
class MorphismNet(nn.Module):
"""Multi-head network for Eigenverse morphism learning.
Architecture:
- Morphism embedding (7 types) + Domain embedding (2 types)
- Shared encoder: input + embeddings β†’ hidden representation
- Per-morphism decoder heads: hidden β†’ output prediction
- Residual head: predicts whether the morphism property holds (β‰ˆ 0)
"""
def __init__(self, in_dim=4, out_dim=6, hidden=256, n_morphisms=7, n_domains=2):
super().__init__()
self.n_morphisms = n_morphisms
self.out_dim = out_dim
# Embeddings
self.morph_embed = nn.Embedding(n_morphisms, 32)
self.domain_embed = nn.Embedding(n_domains, 16)
# Shared encoder
enc_in = in_dim + 32 + 16 # input + morph_embed + domain_embed
self.encoder = nn.Sequential(
nn.Linear(enc_in, hidden),
nn.GELU(),
nn.LayerNorm(hidden),
nn.Linear(hidden, hidden),
nn.GELU(),
nn.LayerNorm(hidden),
nn.Linear(hidden, hidden),
nn.GELU(),
nn.LayerNorm(hidden),
)
# Per-morphism heads
self.heads = nn.ModuleList([
nn.Sequential(
nn.Linear(hidden, hidden // 2),
nn.GELU(),
nn.Linear(hidden // 2, out_dim),
)
for _ in range(n_morphisms)
])
# Residual classifier: does the morphism property hold?
# (binary: 1 = residual β‰ˆ 0, i.e. property holds)
self.residual_head = nn.Sequential(
nn.Linear(hidden, 64),
nn.GELU(),
nn.Linear(64, 1),
nn.Sigmoid(),
)
def forward(self, x, morph_id, domain_id):
# Embeddings
m_emb = self.morph_embed(morph_id) # (B, 32)
d_emb = self.domain_embed(domain_id) # (B, 16)
# Concatenate
h = torch.cat([x, m_emb, d_emb], dim=-1) # (B, in+48)
# Encode
h = self.encoder(h) # (B, hidden)
# Route to per-morphism heads
out = torch.zeros(x.shape[0], self.out_dim, device=x.device)
for m in range(self.n_morphisms):
mask = (morph_id == m)
if mask.any():
out[mask] = self.heads[m](h[mask])
# Residual prediction
residual_prob = self.residual_head(h).squeeze(-1) # (B,)
return out, residual_prob
# ════════════════════════════════════════════════════════════════════════
# Training
# ════════════════════════════════════════════════════════════════════════
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
model = MorphismNet().to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
# Loss: MSE for output prediction + BCE for residual classification
mse_loss = nn.MSELoss()
bce_loss = nn.BCELoss()
# For residual labels: residual columns are near 0 when morphism holds
# Column indices for residual per morphism: col 2 for most, col 5 for orbit
RESIDUAL_COL = {0: 2, 1: 2, 2: 2, 3: 2, 4: 5, 5: 4, 6: 2}
EPOCHS = 50
best_val_loss = float('inf')
history = []
print(f"\nTraining MorphismNet ({sum(p.numel() for p in model.parameters()):,} params)")
print(f"Epochs: {EPOCHS}, Batch: {BATCH}")
print("=" * 60)
for epoch in range(EPOCHS):
model.train()
train_mse, train_n = 0.0, 0
t0 = time.time()
for x, y, m, d in train_dl:
x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device)
pred, res_prob = model(x, m, d)
# Output MSE
loss_mse = mse_loss(pred, y)
# Residual labels: 1 if morphism holds (residual near 0)
# Use the actual output residuals to generate labels
res_labels = torch.zeros(x.shape[0], device=device)
for mi in range(7):
mask = (m == mi)
if mask.any():
col = RESIDUAL_COL[mi]
if col < y.shape[1]:
res_labels[mask] = (y[mask, col].abs() < 0.01).float()
loss_res = bce_loss(res_prob, res_labels)
loss = loss_mse + 0.1 * loss_res
optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
train_mse += loss_mse.item() * x.shape[0]
train_n += x.shape[0]
scheduler.step()
# Validation
model.eval()
val_mse, val_res_acc, val_n = 0.0, 0.0, 0
with torch.no_grad():
for x, y, m, d in val_dl:
x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device)
pred, res_prob = model(x, m, d)
val_mse += mse_loss(pred, y).item() * x.shape[0]
# Residual accuracy
for mi in range(7):
mask = (m == mi)
if mask.any():
col = RESIDUAL_COL[mi]
if col < y.shape[1]:
labels = (y[mask, col].abs() < 0.01).float()
preds = (res_prob[mask] > 0.5).float()
val_res_acc += (preds == labels).sum().item()
val_n += x.shape[0]
train_mse /= train_n
val_mse /= val_n
val_res_acc /= max(val_n, 1)
elapsed = time.time() - t0
history.append({
"epoch": epoch + 1,
"train_mse": train_mse,
"val_mse": val_mse,
"val_residual_acc": val_res_acc,
"lr": scheduler.get_last_lr()[0],
"time": elapsed,
})
if val_mse < best_val_loss:
best_val_loss = val_mse
torch.save(model.state_dict(), "morphism_net.pt")
marker = " β˜…"
else:
marker = ""
if (epoch + 1) % 5 == 0 or epoch == 0:
print(f" [{epoch+1:3d}/{EPOCHS}] train_mse={train_mse:.6f} "
f"val_mse={val_mse:.6f} res_acc={val_res_acc:.3f} "
f"lr={scheduler.get_last_lr()[0]:.2e} ({elapsed:.1f}s){marker}")
print("=" * 60)
print(f"Best val MSE: {best_val_loss:.6f}")
# ════════════════════════════════════════════════════════════════════════
# Per-morphism evaluation
# ════════════════════════════════════════════════════════════════════════
print("\nPer-morphism validation MSE:")
model.load_state_dict(torch.load("morphism_net.pt", weights_only=True))
model.eval()
names = ["Β§1 coherence_even", "Β§2 palindrome_odd", "Β§3 lyapunov_bridge",
"Β§4 ΞΌ_isometry", "Β§5 orbit_hom", "Β§6 reality_linear", "Β§7 composition"]
with torch.no_grad():
x_all = X_val.to(device)
y_all = Y_val.to(device)
m_all = M_val.to(device)
d_all = D_val.to(device)
pred_all, res_all = model(x_all, m_all, d_all)
for mi in range(7):
mask = (m_all == mi)
if mask.sum() > 0:
mse = ((pred_all[mask] - y_all[mask]) ** 2).mean().item()
# Check residual accuracy
col = RESIDUAL_COL[mi]
if col < y_all.shape[1]:
true_res = y_all[mask, col].abs()
pred_res = pred_all[mask, col].abs()
res_mse = ((pred_res - true_res) ** 2).mean().item()
else:
res_mse = 0.0
print(f" {names[mi]:25s}: MSE={mse:.6f}, residual_MSE={res_mse:.6f}, n={mask.sum().item()}")
# ════════════════════════════════════════════════════════════════════════
# Test the mod paradox: does the model distinguish ℝ from GF(p)?
# ════════════════════════════════════════════════════════════════════════
print("\nMod paradox test (Β§1 coherence_even):")
with torch.no_grad():
mask_r = (m_all == 0) & (d_all == 0)
mask_gfp = (m_all == 0) & (d_all == 1)
if mask_r.sum() > 0:
mse_r = ((pred_all[mask_r] - y_all[mask_r]) ** 2).mean().item()
res_r = y_all[mask_r, 2].abs().mean().item()
pred_res_r = pred_all[mask_r, 2].abs().mean().item()
print(f" ℝ domain: MSE={mse_r:.6f}, true_residual={res_r:.2e}, "
f"pred_residual={pred_res_r:.2e}, n={mask_r.sum().item()}")
if mask_gfp.sum() > 0:
mse_gfp = ((pred_all[mask_gfp] - y_all[mask_gfp]) ** 2).mean().item()
res_gfp = y_all[mask_gfp, 2].abs().mean().item()
pred_res_gfp = pred_all[mask_gfp, 2].abs().mean().item()
print(f" GF(p) domain: MSE={mse_gfp:.6f}, true_residual={res_gfp:.2e}, "
f"pred_residual={pred_res_gfp:.2e}, n={mask_gfp.sum().item()}")
print(f"\n The paradox: C(r)=C(1/r) holds exactly over ℝ (residualβ‰ˆ0)")
print(f" but over GF(p), the 'residual' is nonzero β€” mod breaks symmetry.")
else:
print(f" (No GF(p) samples in validation set)")
# Save history
with open("training_history.json", "w") as f:
json.dump(history, f, indent=2)
# Save model info
info = {
"name": "MorphismNet",
"params": sum(p.numel() for p in model.parameters()),
"morphisms": names,
"best_val_mse": best_val_loss,
"epochs": EPOCHS,
"dataset_size": N,
"architecture": "shared_encoder(3x256) + 7_heads(128β†’6) + residual_classifier",
}
with open("model_info.json", "w") as f:
json.dump(info, f, indent=2)
print(f"\nModel saved: morphism_net.pt ({sum(p.numel() for p in model.parameters()):,} params)")
print("Done. 🧬")