{ "adapt_base_lr": 1.0, "architectures": [ "TTTMLPForCausalLM" ], "bos_token_id": 151645, "chunk_size": 16, "conv_kernel": 4, "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5504, "max_position_embeddings": 4096, "memory_depth": 2, "mini_batch_eps": 1e-06, "mlp_bias": false, "model_type": "ttt_mlp", "num_attention_heads": 32, "num_hidden_layers": 24, "pad_token_id": 151643, "pretraining_tp": 1, "qkv_conv": false, "rms_norm_eps": 1e-06, "rope_scaling": { "attention_factor": 0.5, "beta_fast": 32.0, "beta_slow": 1.0, "factor": 32.0, "original_max_position_embeddings": 4096, "rope_type": "yarn" }, "rope_theta": 100000.0, "scan_checkpoint_group_size": 0, "torch_dtype": "float32", "transformers_version": "4.55.0.dev0", "use_cache": true, "vocab_size": 151936 }