File size: 986 Bytes
32f9824 48bc772 32f9824 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | {
"att_dropout": 0.0,
"att_heads": 16,
"embed_dim": 2048,
"ff_dim": 6144,
"ff_dropout": 0.0,
"head_dim": 128,
"kv_heads": 8,
"layer_types": [
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful",
"stateless",
"stateful"
],
"memory_gate_type": "linear",
"num_layers": 28,
"padding_idx": 151669,
"rope_base": 1000000,
"seq_len": 8192,
"skip_stm": false,
"stm_batch_size": 1,
"stm_size": 4096,
"tie_embeddings": true,
"training_cache": true,
"use_flash_attention": false,
"use_memory_gate": true,
"use_separate_memory_projections": true,
"vocab_size": 151936
} |