morphism-net / generate_dataset.py

MorphismNet: 399K params trained on Eigenverse morphisms, mod paradox quantified

fcc3e72 verified 17 days ago

9.25 kB

	"""
	Generate training data from the six Eigenverse morphism families.
	Each sample: (morphism_id, input_features, output_features, domain)
	Domain: 0 = ℝ, 1 = GF(p)
	"""

	import numpy as np
	import json
	import os

	np.random.seed(42)

	# Eigenverse constants
	ETA = 1 / np.sqrt(2)
	MU = np.exp(1j * 3 * np.pi / 4)
	DELTA_S = 1 + np.sqrt(2)
	PHI = (1 + np.sqrt(5)) / 2

	# GF(p) prime
	P = 65537 # small prime for training, p ≡ 1 mod 8

	def C(r):
	"""Coherence function."""
	if r <= 0:
	return 0.0
	return 2 * r / (1 + r ** 2)

	def Res(r):
	"""Palindrome residual."""
	if r <= 0:
	return 0.0
	return (r - 1/r) / DELTA_S

	def C_mod(r, p):
	"""C(r) in GF(p): (2r * inv(1 + r^2)) mod p."""
	r = r % p
	denom = (1 + r * r) % p
	if denom == 0:
	return None
	inv_denom = pow(denom, p - 2, p)
	return (2 * r * inv_denom) % p

	def mu_pow_mod(n, p):
	"""μ^n in GF(p) via 8-periodicity. Returns (re, im) mod p."""
	# μ^k for k=0..7 on unit circle, embedded as scaled integers
	# Use angle = k * 3π/4, scale by 10000 for integer approx
	n = n % 8
	angle = n * 3 * np.pi / 4
	re = np.cos(angle)
	im = np.sin(angle)
	return re, im


	# ════════════════════════════════════════════════════════════════════════
	# Dataset generation
	# ════════════════════════════════════════════════════════════════════════

	N_SAMPLES_PER_MORPHISM = 50000
	samples = []

	print("Generating morphism training data...")

	# §1 COHERENCE EVEN: C(r) = C(1/r)
	# Input: r > 0
	# Output: (C(r), C(1/r), C(r) - C(1/r))
	# The model should learn the residual is always 0
	print(" §1 Coherence even...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	r = np.random.exponential(2.0) + 0.01 # r > 0
	cr = C(r)
	cr_inv = C(1/r)
	samples.append({
	"morphism": 0,
	"input": [r, 1/r],
	"output": [cr, cr_inv, cr - cr_inv], # residual should be 0
	"domain": 0,
	"label": "coherence_even"
	})
	# GF(p) version
	r_int = int(r * 1000) % P
	if r_int > 0:
	cr_mod = C_mod(r_int, P)
	inv_r = pow(r_int, P - 2, P)
	cr_inv_mod = C_mod(inv_r, P)
	if cr_mod is not None and cr_inv_mod is not None:
	samples.append({
	"morphism": 0,
	"input": [r_int / P, inv_r / P], # normalized
	"output": [cr_mod / P, cr_inv_mod / P, (cr_mod - cr_inv_mod) % P / P],
	"domain": 1,
	"label": "coherence_even_gfp"
	})

	# §2 PALINDROME ODD: Res(1/r) = -Res(r)
	print(" §2 Palindrome odd...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	r = np.random.exponential(2.0) + 0.01
	res_r = Res(r)
	res_inv = Res(1/r)
	samples.append({
	"morphism": 1,
	"input": [r, 1/r],
	"output": [res_r, res_inv, res_r + res_inv], # sum should be 0
	"domain": 0,
	"label": "palindrome_odd"
	})

	# §3 LYAPUNOV BRIDGE: C(exp(λ)) = sech(λ)
	print(" §3 Lyapunov bridge...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	lam = np.random.uniform(-5, 5)
	c_exp = C(np.exp(lam))
	sech = 1 / np.cosh(lam)
	samples.append({
	"morphism": 2,
	"input": [lam, np.exp(lam)],
	"output": [c_exp, sech, c_exp - sech], # residual should be 0
	"domain": 0,
	"label": "lyapunov_bridge"
	})

	# §4 μ-ISOMETRY: \|μ·z\| = \|z\|
	print(" §4 μ-isometry...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	z = np.random.randn() + 1j * np.random.randn()
	mu_z = MU * z
	abs_z = abs(z)
	abs_mu_z = abs(mu_z)
	samples.append({
	"morphism": 3,
	"input": [z.real, z.imag, mu_z.real, mu_z.imag],
	"output": [abs_z, abs_mu_z, abs_z - abs_mu_z], # residual 0
	"domain": 0,
	"label": "mu_isometry"
	})

	# §5 ORBIT HOMOMORPHISM: μ^(a+b) = μ^a · μ^b, period 8
	print(" §5 Orbit homomorphism...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	a = np.random.randint(0, 100)
	b = np.random.randint(0, 100)
	mu_ab = MU ** (a + b)
	mu_a_mu_b = (MU ** a) * (MU ** b)
	# Also encode the period-8 structure
	a_mod8 = a % 8
	b_mod8 = b % 8
	ab_mod8 = (a + b) % 8
	samples.append({
	"morphism": 4,
	"input": [a / 100, b / 100, a_mod8 / 8, b_mod8 / 8],
	"output": [
	mu_ab.real, mu_ab.imag,
	mu_a_mu_b.real, mu_a_mu_b.imag,
	ab_mod8 / 8,
	abs(mu_ab - mu_a_mu_b) # should be ~0
	],
	"domain": 0,
	"label": "orbit_homomorphism"
	})

	# §6 REALITY ℝ-LINEAR: F(s,t) = t + is, F(η,-η) = μ
	print(" §6 Reality ℝ-linear...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	s = np.random.randn()
	t = np.random.randn()
	z = complex(t, s) # reality(s, t) = t + is
	# Additivity: F(s1+s2, t1+t2) = F(s1,t1) + F(s2,t2)
	s2 = np.random.randn()
	t2 = np.random.randn()
	z_sum = complex(t + t2, s + s2)
	z1_plus_z2 = complex(t, s) + complex(t2, s2)
	# Distance from μ-embedding point
	mu_dist = abs(z - MU)
	balance_dist = abs(s - ETA) + abs(t - (-ETA)) # distance from (η, -η)
	samples.append({
	"morphism": 5,
	"input": [s, t, s2, t2],
	"output": [
	z.real, z.imag,
	mu_dist,
	balance_dist,
	abs(z_sum - z1_plus_z2) # additivity residual, should be 0
	],
	"domain": 0,
	"label": "reality_linear"
	})

	# ════════════════════════════════════════════════════════════════════════
	# Composition samples: S∘F∘T chains
	# ════════════════════════════════════════════════════════════════════════
	print(" Compositions (S∘F∘T)...")
	for _ in range(N_SAMPLES_PER_MORPHISM):
	s = np.random.randn()
	t = np.random.randn()
	# T: reality map
	z = complex(t, s)
	# F: coherence of \|z\|
	r = abs(z)
	f_val = C(r)
	# S: Lyapunov (at balance point S(0) = 1, off-balance S preserves C value)
	# Full chain output
	samples.append({
	"morphism": 6, # composition
	"input": [s, t, r, f_val],
	"output": [
	f_val,
	C(1), # reference: kernel maximum
	abs(f_val - 1), # distance from maximum (balance)
	1.0 if abs(s - ETA) < 0.01 and abs(t + ETA) < 0.01 else 0.0 # near balance point?
	],
	"domain": 0,
	"label": "composition_SFT"
	})

	print(f"\nTotal samples: {len(samples)}")

	# ════════════════════════════════════════════════════════════════════════
	# Save dataset
	# ════════════════════════════════════════════════════════════════════════

	# Normalize to fixed-width tensors for training
	# Max input dim = 4, max output dim = 6
	MAX_IN = 4
	MAX_OUT = 6

	inputs = []
	outputs = []
	morphism_ids = []
	domain_ids = []

	for s in samples:
	inp = s["input"][:MAX_IN] + [0.0] * (MAX_IN - len(s["input"][:MAX_IN]))
	out = s["output"][:MAX_OUT] + [0.0] * (MAX_OUT - len(s["output"][:MAX_OUT]))
	inputs.append(inp)
	outputs.append(out)
	morphism_ids.append(s["morphism"])
	domain_ids.append(s["domain"])

	inputs = np.array(inputs, dtype=np.float32)
	outputs = np.array(outputs, dtype=np.float32)
	morphism_ids = np.array(morphism_ids, dtype=np.int64)
	domain_ids = np.array(domain_ids, dtype=np.int64)

	# Replace NaN/Inf
	inputs = np.nan_to_num(inputs, nan=0.0, posinf=10.0, neginf=-10.0)
	outputs = np.nan_to_num(outputs, nan=0.0, posinf=10.0, neginf=-10.0)

	# Clip extremes
	inputs = np.clip(inputs, -100, 100)
	outputs = np.clip(outputs, -100, 100)

	os.makedirs("data", exist_ok=True)
	np.save("data/inputs.npy", inputs)
	np.save("data/outputs.npy", outputs)
	np.save("data/morphism_ids.npy", morphism_ids)
	np.save("data/domain_ids.npy", domain_ids)

	print(f"Saved: inputs {inputs.shape}, outputs {outputs.shape}")
	print(f"Morphism distribution: {np.bincount(morphism_ids)}")
	print(f"Domain distribution: ℝ={np.sum(domain_ids==0)}, GF(p)={np.sum(domain_ids==1)}")

	# Stats
	for m in range(7):
	mask = morphism_ids == m
	if mask.sum() > 0:
	names = ["coherence_even", "palindrome_odd", "lyapunov_bridge",
	"mu_isometry", "orbit_hom", "reality_linear", "composition"]
	residual_col = 2 if m < 4 else (5 if m == 4 else (4 if m == 5 else 2))
	res = outputs[mask, min(residual_col, MAX_OUT-1)]
	print(f" §{m+1} {names[m]:20s}: n={mask.sum():6d}, "
	f"residual mean={np.mean(np.abs(res)):.2e}, max={np.max(np.abs(res)):.2e}")