File size: 1,148 Bytes
86a68fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# 5M Symbiogenesis config — multi-organelle sequence mixing
# 8 blocks with 3 organelles per block: CausalConv + Monarch + LongConv
# Inspired by biological symbiogenesis (organism fusion)

[model]
arch = "symbiogenesis"
embed_dim = 256
n_layers = 8
n_heads = 4           # unused by Symbio, kept for struct compat
head_dim = 64          # unused by Symbio
n_monarch_heads = 1    # single-head Monarch per block
conv_kernel_size = 4
ffn_mult = 4
context_length = 256
dropout = 0.0
bias = false
weight_tying = true
free_energy_beta = 0.001

[training]
optimizer = "adamw"
lr = 6e-4
min_lr = 6e-5
warmup_steps = 500
max_steps = 12305
batch_size = 32
grad_clip = 1.0
precision = "f16"
eval_interval = 500
eval_steps = 25
checkpoint_interval = 2000
seed = 42

[training.curriculum]
enabled = false

[training.coreset]
enabled = false

[training.gelation]
enabled = true
cusum_threshold = 5.0
window_size = 10

[data]
train_path = "../text-pipeline/output/train.txt"
val_path = "../text-pipeline/output/val.txt"
tokenizer_dir = "../text-pipeline/output"

[inference]
precision = "f16"
compile = false
temperature = 0.8
top_k = 40
max_new_tokens = 500