darkmind-v1-chat-25k / config.json
ferxalb's picture
Upload trained model folder
29d4e2a verified
{
"vocab_size": 256,
"hidden_dim": 64,
"patch_encoder_dim": 32,
"n_layers": 3,
"n_heads": 4,
"n_experts": 4,
"top_k_experts": 2,
"expert_hidden_mult": 2,
"patch_encoder_layers": 1,
"max_seq_length": 512,
"max_patches": 64,
"learning_rate": 0.001,
"weight_decay": 0.01,
"beta1": 0.9,
"beta2": 0.98,
"epsilon": 1e-08,
"batch_size": 256,
"micro_batch_size": 4,
"gradient_clip": 0.5,
"load_balance_coefficient": 0.01,
"dropout": 0.0,
"use_prenorm": true,
"entropy_threshold": 0.4,
"min_patch_size": 4,
"max_patch_size": 32
}