morphism-net / generate_dataset.py
beanapologist's picture
MorphismNet: 399K params trained on Eigenverse morphisms, mod paradox quantified
fcc3e72 verified
"""
Generate training data from the six Eigenverse morphism families.
Each sample: (morphism_id, input_features, output_features, domain)
Domain: 0 = ℝ, 1 = GF(p)
"""
import numpy as np
import json
import os
np.random.seed(42)
# Eigenverse constants
ETA = 1 / np.sqrt(2)
MU = np.exp(1j * 3 * np.pi / 4)
DELTA_S = 1 + np.sqrt(2)
PHI = (1 + np.sqrt(5)) / 2
# GF(p) prime
P = 65537 # small prime for training, p ≑ 1 mod 8
def C(r):
"""Coherence function."""
if r <= 0:
return 0.0
return 2 * r / (1 + r ** 2)
def Res(r):
"""Palindrome residual."""
if r <= 0:
return 0.0
return (r - 1/r) / DELTA_S
def C_mod(r, p):
"""C(r) in GF(p): (2r * inv(1 + r^2)) mod p."""
r = r % p
denom = (1 + r * r) % p
if denom == 0:
return None
inv_denom = pow(denom, p - 2, p)
return (2 * r * inv_denom) % p
def mu_pow_mod(n, p):
"""ΞΌ^n in GF(p) via 8-periodicity. Returns (re, im) mod p."""
# ΞΌ^k for k=0..7 on unit circle, embedded as scaled integers
# Use angle = k * 3Ο€/4, scale by 10000 for integer approx
n = n % 8
angle = n * 3 * np.pi / 4
re = np.cos(angle)
im = np.sin(angle)
return re, im
# ════════════════════════════════════════════════════════════════════════
# Dataset generation
# ════════════════════════════════════════════════════════════════════════
N_SAMPLES_PER_MORPHISM = 50000
samples = []
print("Generating morphism training data...")
# Β§1 COHERENCE EVEN: C(r) = C(1/r)
# Input: r > 0
# Output: (C(r), C(1/r), C(r) - C(1/r))
# The model should learn the residual is always 0
print(" Β§1 Coherence even...")
for _ in range(N_SAMPLES_PER_MORPHISM):
r = np.random.exponential(2.0) + 0.01 # r > 0
cr = C(r)
cr_inv = C(1/r)
samples.append({
"morphism": 0,
"input": [r, 1/r],
"output": [cr, cr_inv, cr - cr_inv], # residual should be 0
"domain": 0,
"label": "coherence_even"
})
# GF(p) version
r_int = int(r * 1000) % P
if r_int > 0:
cr_mod = C_mod(r_int, P)
inv_r = pow(r_int, P - 2, P)
cr_inv_mod = C_mod(inv_r, P)
if cr_mod is not None and cr_inv_mod is not None:
samples.append({
"morphism": 0,
"input": [r_int / P, inv_r / P], # normalized
"output": [cr_mod / P, cr_inv_mod / P, (cr_mod - cr_inv_mod) % P / P],
"domain": 1,
"label": "coherence_even_gfp"
})
# Β§2 PALINDROME ODD: Res(1/r) = -Res(r)
print(" Β§2 Palindrome odd...")
for _ in range(N_SAMPLES_PER_MORPHISM):
r = np.random.exponential(2.0) + 0.01
res_r = Res(r)
res_inv = Res(1/r)
samples.append({
"morphism": 1,
"input": [r, 1/r],
"output": [res_r, res_inv, res_r + res_inv], # sum should be 0
"domain": 0,
"label": "palindrome_odd"
})
# Β§3 LYAPUNOV BRIDGE: C(exp(Ξ»)) = sech(Ξ»)
print(" Β§3 Lyapunov bridge...")
for _ in range(N_SAMPLES_PER_MORPHISM):
lam = np.random.uniform(-5, 5)
c_exp = C(np.exp(lam))
sech = 1 / np.cosh(lam)
samples.append({
"morphism": 2,
"input": [lam, np.exp(lam)],
"output": [c_exp, sech, c_exp - sech], # residual should be 0
"domain": 0,
"label": "lyapunov_bridge"
})
# Β§4 ΞΌ-ISOMETRY: |ΞΌΒ·z| = |z|
print(" Β§4 ΞΌ-isometry...")
for _ in range(N_SAMPLES_PER_MORPHISM):
z = np.random.randn() + 1j * np.random.randn()
mu_z = MU * z
abs_z = abs(z)
abs_mu_z = abs(mu_z)
samples.append({
"morphism": 3,
"input": [z.real, z.imag, mu_z.real, mu_z.imag],
"output": [abs_z, abs_mu_z, abs_z - abs_mu_z], # residual 0
"domain": 0,
"label": "mu_isometry"
})
# Β§5 ORBIT HOMOMORPHISM: ΞΌ^(a+b) = ΞΌ^a Β· ΞΌ^b, period 8
print(" Β§5 Orbit homomorphism...")
for _ in range(N_SAMPLES_PER_MORPHISM):
a = np.random.randint(0, 100)
b = np.random.randint(0, 100)
mu_ab = MU ** (a + b)
mu_a_mu_b = (MU ** a) * (MU ** b)
# Also encode the period-8 structure
a_mod8 = a % 8
b_mod8 = b % 8
ab_mod8 = (a + b) % 8
samples.append({
"morphism": 4,
"input": [a / 100, b / 100, a_mod8 / 8, b_mod8 / 8],
"output": [
mu_ab.real, mu_ab.imag,
mu_a_mu_b.real, mu_a_mu_b.imag,
ab_mod8 / 8,
abs(mu_ab - mu_a_mu_b) # should be ~0
],
"domain": 0,
"label": "orbit_homomorphism"
})
# Β§6 REALITY ℝ-LINEAR: F(s,t) = t + is, F(Ξ·,-Ξ·) = ΞΌ
print(" Β§6 Reality ℝ-linear...")
for _ in range(N_SAMPLES_PER_MORPHISM):
s = np.random.randn()
t = np.random.randn()
z = complex(t, s) # reality(s, t) = t + is
# Additivity: F(s1+s2, t1+t2) = F(s1,t1) + F(s2,t2)
s2 = np.random.randn()
t2 = np.random.randn()
z_sum = complex(t + t2, s + s2)
z1_plus_z2 = complex(t, s) + complex(t2, s2)
# Distance from ΞΌ-embedding point
mu_dist = abs(z - MU)
balance_dist = abs(s - ETA) + abs(t - (-ETA)) # distance from (Ξ·, -Ξ·)
samples.append({
"morphism": 5,
"input": [s, t, s2, t2],
"output": [
z.real, z.imag,
mu_dist,
balance_dist,
abs(z_sum - z1_plus_z2) # additivity residual, should be 0
],
"domain": 0,
"label": "reality_linear"
})
# ════════════════════════════════════════════════════════════════════════
# Composition samples: S∘F∘T chains
# ════════════════════════════════════════════════════════════════════════
print(" Compositions (S∘F∘T)...")
for _ in range(N_SAMPLES_PER_MORPHISM):
s = np.random.randn()
t = np.random.randn()
# T: reality map
z = complex(t, s)
# F: coherence of |z|
r = abs(z)
f_val = C(r)
# S: Lyapunov (at balance point S(0) = 1, off-balance S preserves C value)
# Full chain output
samples.append({
"morphism": 6, # composition
"input": [s, t, r, f_val],
"output": [
f_val,
C(1), # reference: kernel maximum
abs(f_val - 1), # distance from maximum (balance)
1.0 if abs(s - ETA) < 0.01 and abs(t + ETA) < 0.01 else 0.0 # near balance point?
],
"domain": 0,
"label": "composition_SFT"
})
print(f"\nTotal samples: {len(samples)}")
# ════════════════════════════════════════════════════════════════════════
# Save dataset
# ════════════════════════════════════════════════════════════════════════
# Normalize to fixed-width tensors for training
# Max input dim = 4, max output dim = 6
MAX_IN = 4
MAX_OUT = 6
inputs = []
outputs = []
morphism_ids = []
domain_ids = []
for s in samples:
inp = s["input"][:MAX_IN] + [0.0] * (MAX_IN - len(s["input"][:MAX_IN]))
out = s["output"][:MAX_OUT] + [0.0] * (MAX_OUT - len(s["output"][:MAX_OUT]))
inputs.append(inp)
outputs.append(out)
morphism_ids.append(s["morphism"])
domain_ids.append(s["domain"])
inputs = np.array(inputs, dtype=np.float32)
outputs = np.array(outputs, dtype=np.float32)
morphism_ids = np.array(morphism_ids, dtype=np.int64)
domain_ids = np.array(domain_ids, dtype=np.int64)
# Replace NaN/Inf
inputs = np.nan_to_num(inputs, nan=0.0, posinf=10.0, neginf=-10.0)
outputs = np.nan_to_num(outputs, nan=0.0, posinf=10.0, neginf=-10.0)
# Clip extremes
inputs = np.clip(inputs, -100, 100)
outputs = np.clip(outputs, -100, 100)
os.makedirs("data", exist_ok=True)
np.save("data/inputs.npy", inputs)
np.save("data/outputs.npy", outputs)
np.save("data/morphism_ids.npy", morphism_ids)
np.save("data/domain_ids.npy", domain_ids)
print(f"Saved: inputs {inputs.shape}, outputs {outputs.shape}")
print(f"Morphism distribution: {np.bincount(morphism_ids)}")
print(f"Domain distribution: ℝ={np.sum(domain_ids==0)}, GF(p)={np.sum(domain_ids==1)}")
# Stats
for m in range(7):
mask = morphism_ids == m
if mask.sum() > 0:
names = ["coherence_even", "palindrome_odd", "lyapunov_bridge",
"mu_isometry", "orbit_hom", "reality_linear", "composition"]
residual_col = 2 if m < 4 else (5 if m == 4 else (4 if m == 5 else 2))
res = outputs[mask, min(residual_col, MAX_OUT-1)]
print(f" Β§{m+1} {names[m]:20s}: n={mask.sum():6d}, "
f"residual mean={np.mean(np.abs(res)):.2e}, max={np.max(np.abs(res)):.2e}")