LisaMegaWatts commited on
Commit
86a68fc
·
verified ·
1 Parent(s): 3531355

Upload config.toml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.toml +55 -0
config.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 5M Symbiogenesis config — multi-organelle sequence mixing
2
+ # 8 blocks with 3 organelles per block: CausalConv + Monarch + LongConv
3
+ # Inspired by biological symbiogenesis (organism fusion)
4
+
5
+ [model]
6
+ arch = "symbiogenesis"
7
+ embed_dim = 256
8
+ n_layers = 8
9
+ n_heads = 4 # unused by Symbio, kept for struct compat
10
+ head_dim = 64 # unused by Symbio
11
+ n_monarch_heads = 1 # single-head Monarch per block
12
+ conv_kernel_size = 4
13
+ ffn_mult = 4
14
+ context_length = 256
15
+ dropout = 0.0
16
+ bias = false
17
+ weight_tying = true
18
+ free_energy_beta = 0.001
19
+
20
+ [training]
21
+ optimizer = "adamw"
22
+ lr = 6e-4
23
+ min_lr = 6e-5
24
+ warmup_steps = 500
25
+ max_steps = 12305
26
+ batch_size = 32
27
+ grad_clip = 1.0
28
+ precision = "f16"
29
+ eval_interval = 500
30
+ eval_steps = 25
31
+ checkpoint_interval = 2000
32
+ seed = 42
33
+
34
+ [training.curriculum]
35
+ enabled = false
36
+
37
+ [training.coreset]
38
+ enabled = false
39
+
40
+ [training.gelation]
41
+ enabled = true
42
+ cusum_threshold = 5.0
43
+ window_size = 10
44
+
45
+ [data]
46
+ train_path = "../text-pipeline/output/train.txt"
47
+ val_path = "../text-pipeline/output/val.txt"
48
+ tokenizer_dir = "../text-pipeline/output"
49
+
50
+ [inference]
51
+ precision = "f16"
52
+ compile = false
53
+ temperature = 0.8
54
+ top_k = 40
55
+ max_new_tokens = 500