TTTMLP-1.5B-Base / config.json
b-re-w's picture
Upload TTTMLPForCausalLM
3195e91 verified
{
"adapt_base_lr": 1.0,
"architectures": [
"TTTMLPForCausalLM"
],
"bos_token_id": 151645,
"chunk_size": 16,
"conv_kernel": 4,
"eos_token_id": 151645,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 5504,
"max_position_embeddings": 4096,
"memory_depth": 2,
"mini_batch_eps": 1e-06,
"mlp_bias": false,
"model_type": "ttt_mlp",
"num_attention_heads": 32,
"num_hidden_layers": 24,
"pad_token_id": 151643,
"pretraining_tp": 1,
"qkv_conv": false,
"rms_norm_eps": 1e-06,
"rope_scaling": {
"attention_factor": 0.5,
"beta_fast": 32.0,
"beta_slow": 1.0,
"factor": 32.0,
"original_max_position_embeddings": 4096,
"rope_type": "yarn"
},
"rope_theta": 100000.0,
"scan_checkpoint_group_size": 0,
"torch_dtype": "float32",
"transformers_version": "4.55.0.dev0",
"use_cache": true,
"vocab_size": 151936
}