{ "model_type": "blt", "vocab_size": 260, "max_position_embeddings": 4096, "patch_in_forward": true, "realtime_patching": true, "patching_mode": "entropy", "patch_size": 4, "patching_threshold": 1.335442066192627, "patching_threshold_add": null, "max_patch_length": null, "patching_batch_size": 1, "patching_device": "cuda", "monotonicity": false, "cross_attn_k": 2, "encoder_hash_byte_group_size": [ 3, 4, 5, 6, 7, 8 ], "encoder_hash_byte_group_vocab": 500002, "encoder_hash_byte_group_nb_functions": 1, "pm_size": 0, "patcher_config": { "vocab_size": 260, "hidden_size": 768, "num_hidden_layers": 14, "num_attention_heads": 12, "num_key_value_heads": null, "max_position_embeddings": 8192, "rms_norm_eps": 1e-05, "dropout": 0.0, "rope_theta": 10000.0, "attn_impl": "xformers", "attn_bias_type": "local_block_causal", "intermediate_size": 2048 }, "encoder_config": { "vocab_size": 260, "cross_attn_all_layers": false, "cross_attn_k": 2, "hidden_size_global": 2048, "pm_size": 0, "hidden_size": 1024, "num_attention_heads": 16, "num_key_value_heads": null, "num_hidden_layers": 1, "rms_norm_eps": 1e-05, "dropout": 0.0, "max_position_embeddings": 24576, "rope_theta": 500000.0, "rope_scaling": { "rope_type": "default" }, "hidden_act": "silu", "_attn_implementation": "sdpa", "intermediate_size": 2816 }, "decoder_config": { "vocab_size": 260, "cross_attn_all_layers": true, "cross_attn_k": 2, "hidden_size_global": 2048, "hidden_size": 1024, "num_attention_heads": 16, "num_key_value_heads": null, "num_hidden_layers": 9, "rms_norm_eps": 1e-05, "dropout": 0.0, "max_position_embeddings": 24576, "rope_theta": 500000.0, "rope_scaling": { "rope_type": "default" }, "hidden_act": "silu", "_attn_implementation": "sdpa", "intermediate_size": 2816 }, "global_config": { "hidden_size": 2048, "num_attention_heads": 16, "num_key_value_heads": null, "num_hidden_layers": 25, "rms_norm_eps": 1e-05, "dropout": 0.0, "max_position_embeddings": 4096, "rope_theta": 500000.0, "rope_scaling": { "rope_type": "default" }, "hidden_act": "silu", "_attn_implementation": "sdpa", "intermediate_size": 5632 }, "tie_word_embeddings": false }