aether-mind-v6.2 / config.json
BlockArtica's picture
v6.2 release: cosine LR fine-tune of v6.1, mean loss 10.18 → 8.43 (-17%)
2dcc491 verified
{
"num_layers": 24,
"hidden_size": 896,
"num_attention_heads": 14,
"num_sephirot_heads": 10,
"num_generalist_heads": 2,
"num_sink_heads": 2,
"head_dim": 64,
"intermediate_size": 4864,
"vocab_size": 151936,
"max_position_embeddings": 32768,
"rope_theta": 1000000.0,
"rms_norm_eps": 1e-6,
"bos_token_id": 151643,
"eos_token_id": 151645,
"pad_token_id": 151643,
"nsa": {
"compression_block_size": 64,
"selected_top_k": 2048,
"sliding_window_size": 512,
"num_sink_tokens": 4,
"sephirot_top_k": 256
},
"eviction_order": [
"Malkuth",
"Yesod",
"Hod",
"Netzach",
"Gevurah",
"Chesed",
"Binah",
"Chochmah",
"Tiferet",
"Keter"
]
}