# 5M Symbiogenesis config — multi-organelle sequence mixing # 8 blocks with 3 organelles per block: CausalConv + Monarch + LongConv # Inspired by biological symbiogenesis (organism fusion) [model] arch = "symbiogenesis" embed_dim = 256 n_layers = 8 n_heads = 4 # unused by Symbio, kept for struct compat head_dim = 64 # unused by Symbio n_monarch_heads = 1 # single-head Monarch per block conv_kernel_size = 4 ffn_mult = 4 context_length = 256 dropout = 0.0 bias = false weight_tying = true free_energy_beta = 0.001 [training] optimizer = "adamw" lr = 6e-4 min_lr = 6e-5 warmup_steps = 500 max_steps = 12305 batch_size = 32 grad_clip = 1.0 precision = "f16" eval_interval = 500 eval_steps = 25 checkpoint_interval = 2000 seed = 42 [training.curriculum] enabled = false [training.coreset] enabled = false [training.gelation] enabled = true cusum_threshold = 5.0 window_size = 10 [data] train_path = "../text-pipeline/output/train.txt" val_path = "../text-pipeline/output/val.txt" tokenizer_dir = "../text-pipeline/output" [inference] precision = "f16" compile = false temperature = 0.8 top_k = 40 max_new_tokens = 500