MorphismNet: 399K params trained on Eigenverse morphisms, mod paradox quantified

fcc3e72 verified 13 days ago

13.1 kB

	"""
	Train a morphism model on Eigenverse structure-preserving maps.

	Architecture: MorphismNet — a multi-head model where:
	- Shared encoder learns the common Eigenverse structure
	- Per-morphism heads specialize in each transformation
	- Domain embedding distinguishes ℝ vs GF(p)
	- Residual prediction head learns to verify morphism properties
	(all residuals should be ≈ 0 when the morphism holds)

	The model learns the Eigenverse's "grammar" — the rules connecting
	different mathematical objects through structure-preserving maps.
	"""

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader, TensorDataset
	import os
	import json
	import time

	# ════════════════════════════════════════════════════════════════════════
	# Load data
	# ════════════════════════════════════════════════════════════════════════

	print("Loading dataset...")
	inputs = np.load("data/inputs.npy")
	outputs = np.load("data/outputs.npy")
	morphism_ids = np.load("data/morphism_ids.npy")
	domain_ids = np.load("data/domain_ids.npy")

	N = len(inputs)
	IN_DIM = inputs.shape[1] # 4
	OUT_DIM = outputs.shape[1] # 6
	N_MORPHISMS = 7 # 0-6
	N_DOMAINS = 2 # ℝ, GF(p)

	print(f"Dataset: {N} samples, in={IN_DIM}, out={OUT_DIM}")

	# Train/val split (90/10)
	perm = np.random.permutation(N)
	split = int(0.9 * N)
	train_idx, val_idx = perm[:split], perm[split:]

	X_train = torch.tensor(inputs[train_idx], dtype=torch.float32)
	Y_train = torch.tensor(outputs[train_idx], dtype=torch.float32)
	M_train = torch.tensor(morphism_ids[train_idx], dtype=torch.long)
	D_train = torch.tensor(domain_ids[train_idx], dtype=torch.long)

	X_val = torch.tensor(inputs[val_idx], dtype=torch.float32)
	Y_val = torch.tensor(outputs[val_idx], dtype=torch.float32)
	M_val = torch.tensor(morphism_ids[val_idx], dtype=torch.long)
	D_val = torch.tensor(domain_ids[val_idx], dtype=torch.long)

	train_ds = TensorDataset(X_train, Y_train, M_train, D_train)
	val_ds = TensorDataset(X_val, Y_val, M_val, D_val)

	BATCH = 512
	train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0)
	val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=0)


	# ════════════════════════════════════════════════════════════════════════
	# Model: MorphismNet
	# ════════════════════════════════════════════════════════════════════════

	class MorphismNet(nn.Module):
	"""Multi-head network for Eigenverse morphism learning.

	Architecture:
	- Morphism embedding (7 types) + Domain embedding (2 types)
	- Shared encoder: input + embeddings → hidden representation
	- Per-morphism decoder heads: hidden → output prediction
	- Residual head: predicts whether the morphism property holds (≈ 0)
	"""

	def __init__(self, in_dim=4, out_dim=6, hidden=256, n_morphisms=7, n_domains=2):
	super().__init__()
	self.n_morphisms = n_morphisms
	self.out_dim = out_dim

	# Embeddings
	self.morph_embed = nn.Embedding(n_morphisms, 32)
	self.domain_embed = nn.Embedding(n_domains, 16)

	# Shared encoder
	enc_in = in_dim + 32 + 16 # input + morph_embed + domain_embed
	self.encoder = nn.Sequential(
	nn.Linear(enc_in, hidden),
	nn.GELU(),
	nn.LayerNorm(hidden),
	nn.Linear(hidden, hidden),
	nn.GELU(),
	nn.LayerNorm(hidden),
	nn.Linear(hidden, hidden),
	nn.GELU(),
	nn.LayerNorm(hidden),
	)

	# Per-morphism heads
	self.heads = nn.ModuleList([
	nn.Sequential(
	nn.Linear(hidden, hidden // 2),
	nn.GELU(),
	nn.Linear(hidden // 2, out_dim),
	)
	for _ in range(n_morphisms)
	])

	# Residual classifier: does the morphism property hold?
	# (binary: 1 = residual ≈ 0, i.e. property holds)
	self.residual_head = nn.Sequential(
	nn.Linear(hidden, 64),
	nn.GELU(),
	nn.Linear(64, 1),
	nn.Sigmoid(),
	)

	def forward(self, x, morph_id, domain_id):
	# Embeddings
	m_emb = self.morph_embed(morph_id) # (B, 32)
	d_emb = self.domain_embed(domain_id) # (B, 16)

	# Concatenate
	h = torch.cat([x, m_emb, d_emb], dim=-1) # (B, in+48)

	# Encode
	h = self.encoder(h) # (B, hidden)

	# Route to per-morphism heads
	out = torch.zeros(x.shape[0], self.out_dim, device=x.device)
	for m in range(self.n_morphisms):
	mask = (morph_id == m)
	if mask.any():
	out[mask] = self.heads[m](h[mask])

	# Residual prediction
	residual_prob = self.residual_head(h).squeeze(-1) # (B,)

	return out, residual_prob


	# ════════════════════════════════════════════════════════════════════════
	# Training
	# ════════════════════════════════════════════════════════════════════════

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Device: {device}")

	model = MorphismNet().to(device)
	optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
	scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

	# Loss: MSE for output prediction + BCE for residual classification
	mse_loss = nn.MSELoss()
	bce_loss = nn.BCELoss()

	# For residual labels: residual columns are near 0 when morphism holds
	# Column indices for residual per morphism: col 2 for most, col 5 for orbit
	RESIDUAL_COL = {0: 2, 1: 2, 2: 2, 3: 2, 4: 5, 5: 4, 6: 2}

	EPOCHS = 50
	best_val_loss = float('inf')
	history = []

	print(f"\nTraining MorphismNet ({sum(p.numel() for p in model.parameters()):,} params)")
	print(f"Epochs: {EPOCHS}, Batch: {BATCH}")
	print("=" * 60)

	for epoch in range(EPOCHS):
	model.train()
	train_mse, train_n = 0.0, 0
	t0 = time.time()

	for x, y, m, d in train_dl:
	x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device)

	pred, res_prob = model(x, m, d)

	# Output MSE
	loss_mse = mse_loss(pred, y)

	# Residual labels: 1 if morphism holds (residual near 0)
	# Use the actual output residuals to generate labels
	res_labels = torch.zeros(x.shape[0], device=device)
	for mi in range(7):
	mask = (m == mi)
	if mask.any():
	col = RESIDUAL_COL[mi]
	if col < y.shape[1]:
	res_labels[mask] = (y[mask, col].abs() < 0.01).float()

	loss_res = bce_loss(res_prob, res_labels)

	loss = loss_mse + 0.1 * loss_res

	optimizer.zero_grad()
	loss.backward()
	torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
	optimizer.step()

	train_mse += loss_mse.item() * x.shape[0]
	train_n += x.shape[0]

	scheduler.step()

	# Validation
	model.eval()
	val_mse, val_res_acc, val_n = 0.0, 0.0, 0
	with torch.no_grad():
	for x, y, m, d in val_dl:
	x, y, m, d = x.to(device), y.to(device), m.to(device), d.to(device)
	pred, res_prob = model(x, m, d)
	val_mse += mse_loss(pred, y).item() * x.shape[0]

	# Residual accuracy
	for mi in range(7):
	mask = (m == mi)
	if mask.any():
	col = RESIDUAL_COL[mi]
	if col < y.shape[1]:
	labels = (y[mask, col].abs() < 0.01).float()
	preds = (res_prob[mask] > 0.5).float()
	val_res_acc += (preds == labels).sum().item()
	val_n += x.shape[0]

	train_mse /= train_n
	val_mse /= val_n
	val_res_acc /= max(val_n, 1)
	elapsed = time.time() - t0

	history.append({
	"epoch": epoch + 1,
	"train_mse": train_mse,
	"val_mse": val_mse,
	"val_residual_acc": val_res_acc,
	"lr": scheduler.get_last_lr()[0],
	"time": elapsed,
	})

	if val_mse < best_val_loss:
	best_val_loss = val_mse
	torch.save(model.state_dict(), "morphism_net.pt")
	marker = " ★"
	else:
	marker = ""

	if (epoch + 1) % 5 == 0 or epoch == 0:
	print(f" [{epoch+1:3d}/{EPOCHS}] train_mse={train_mse:.6f} "
	f"val_mse={val_mse:.6f} res_acc={val_res_acc:.3f} "
	f"lr={scheduler.get_last_lr()[0]:.2e} ({elapsed:.1f}s){marker}")

	print("=" * 60)
	print(f"Best val MSE: {best_val_loss:.6f}")

	# ════════════════════════════════════════════════════════════════════════
	# Per-morphism evaluation
	# ════════════════════════════════════════════════════════════════════════

	print("\nPer-morphism validation MSE:")
	model.load_state_dict(torch.load("morphism_net.pt", weights_only=True))
	model.eval()

	names = ["§1 coherence_even", "§2 palindrome_odd", "§3 lyapunov_bridge",
	"§4 μ_isometry", "§5 orbit_hom", "§6 reality_linear", "§7 composition"]

	with torch.no_grad():
	x_all = X_val.to(device)
	y_all = Y_val.to(device)
	m_all = M_val.to(device)
	d_all = D_val.to(device)
	pred_all, res_all = model(x_all, m_all, d_all)

	for mi in range(7):
	mask = (m_all == mi)
	if mask.sum() > 0:
	mse = ((pred_all[mask] - y_all[mask]) ** 2).mean().item()
	# Check residual accuracy
	col = RESIDUAL_COL[mi]
	if col < y_all.shape[1]:
	true_res = y_all[mask, col].abs()
	pred_res = pred_all[mask, col].abs()
	res_mse = ((pred_res - true_res) ** 2).mean().item()
	else:
	res_mse = 0.0
	print(f" {names[mi]:25s}: MSE={mse:.6f}, residual_MSE={res_mse:.6f}, n={mask.sum().item()}")

	# ════════════════════════════════════════════════════════════════════════
	# Test the mod paradox: does the model distinguish ℝ from GF(p)?
	# ════════════════════════════════════════════════════════════════════════

	print("\nMod paradox test (§1 coherence_even):")
	with torch.no_grad():
	mask_r = (m_all == 0) & (d_all == 0)
	mask_gfp = (m_all == 0) & (d_all == 1)

	if mask_r.sum() > 0:
	mse_r = ((pred_all[mask_r] - y_all[mask_r]) ** 2).mean().item()
	res_r = y_all[mask_r, 2].abs().mean().item()
	pred_res_r = pred_all[mask_r, 2].abs().mean().item()
	print(f" ℝ domain: MSE={mse_r:.6f}, true_residual={res_r:.2e}, "
	f"pred_residual={pred_res_r:.2e}, n={mask_r.sum().item()}")

	if mask_gfp.sum() > 0:
	mse_gfp = ((pred_all[mask_gfp] - y_all[mask_gfp]) ** 2).mean().item()
	res_gfp = y_all[mask_gfp, 2].abs().mean().item()
	pred_res_gfp = pred_all[mask_gfp, 2].abs().mean().item()
	print(f" GF(p) domain: MSE={mse_gfp:.6f}, true_residual={res_gfp:.2e}, "
	f"pred_residual={pred_res_gfp:.2e}, n={mask_gfp.sum().item()}")
	print(f"\n The paradox: C(r)=C(1/r) holds exactly over ℝ (residual≈0)")
	print(f" but over GF(p), the 'residual' is nonzero — mod breaks symmetry.")
	else:
	print(f" (No GF(p) samples in validation set)")

	# Save history
	with open("training_history.json", "w") as f:
	json.dump(history, f, indent=2)

	# Save model info
	info = {
	"name": "MorphismNet",
	"params": sum(p.numel() for p in model.parameters()),
	"morphisms": names,
	"best_val_mse": best_val_loss,
	"epochs": EPOCHS,
	"dataset_size": N,
	"architecture": "shared_encoder(3x256) + 7_heads(128→6) + residual_classifier",
	}
	with open("model_info.json", "w") as f:
	json.dump(info, f, indent=2)

	print(f"\nModel saved: morphism_net.pt ({sum(p.numel() for p in model.parameters()):,} params)")
	print("Done. 🧬")