{ "vocab_size": 100278, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "max_position_embeddings": 32768, "head_dim": 128, "hidden_act": "silu", "norm_type": "rmsnorm", "norm_eps": 1e-06, "norm_has_bias": false, "use_pre_attn_norm": false, "use_pre_mlp_norm": false, "use_post_attn_norm": true, "use_post_mlp_norm": true, "mlp_type": "gated", "attention_bias": false, "mlp_bias": false, "position_embedding_type": "rope", "rope_theta": 500000, "rope_scaling": { "attention_factor": 1.2079441541679836, "beta_fast": 32, "beta_slow": 1, "factor": 8.0, "original_max_position_embeddings": 8192, "rope_type": "yarn", "rope_theta": 500000 }, "rope_layer_flags": [ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true ], "no_rope_layer_interval": null, "rope_type_overrides": {}, "layer_types": [ "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention" ], "layer_layouts": [ "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm", "decoder_postnorm" ], "sliding_window": 4096, "linear_num_key_heads": null, "linear_num_value_heads": null, "linear_key_head_dim": null, "linear_value_head_dim": null, "linear_a_log_min": null, "linear_a_log_max": null, "linear_dt_min": null, "linear_dt_max": null, "linear_dt_init_floor": null, "linear_conv_kernel_dim": null, "linear_allow_neg_eigval": null, "use_qk_norm": true, "qk_norm_type": "rmsnorm", "attention_dropout": 0.0, "resid_dropout": 0.0, "embd_dropout": 0.0, "initializer_range": 0.02, "bos_token_id": 100257, "eos_token_id": 100257, "pad_token_id": 100277, "use_cache": true, "tie_word_embeddings": false, "model_type": "lizzy", "architectures": [ "LizzyForCausalLM" ], "auto_map": { "AutoConfig": "configuration_lizzy.LizzyConfig", "AutoModel": "modeling_lizzy.LizzyModel", "AutoModelForCausalLM": "modeling_lizzy.LizzyForCausalLM", "AutoTokenizer": "tokenization_lizzy.LizzyTokenizerFast" }, "tokenizer_class": "LizzyTokenizerFast", "transformers_version": "5.4.0" }