{ "activation_func": "swiglu", "activation_func_fp8_input_store": false, "add_bias_linear": false, "architectures": [ "ZayaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bias_activation_fusion": true, "bos_token_id": 2, "cca": true, "cca_time0": 2, "cca_time1": 2, "clamp_temp": false, "dtype": "bfloat16", "eos_token_id": 106, "ffn_hidden_size": 64, "gated_linear_unit": true, "head_dim": 32, "hidden_size": 32, "kv_channels": 128, "lm_head_bias": false, "mamba_cache_dtype": "float32", "max_position_embeddings": 2048, "model_type": "zaya", "moe_router_topk": 1, "norm_epsilon": 1e-05, "normalization": "RMSNorm", "num_attention_heads": 2, "num_experts": 2, "num_hidden_layers": 4, "num_key_value_heads": 1, "num_query_groups": 1, "pad_token_id": 0, "partial_rotary_factor": 0.5, "residual_in_fp32": true, "rope_parameters": { "factor": 1.0, "partial_rotary_factor": 0.5, "rope_theta": 5000000, "rope_type": "linear" }, "rope_theta": 5000000, "scale_residual_merge": true, "sliding_window": null, "swa_layers": null, "swa_rotary_base": null, "tie_word_embeddings": true, "transformers_version": "5.8.0.dev0", "use_cache": true, "vocab_size": 262272, "zaya_high_prec": true, "zaya_mlp_expansion": 16, "zaya_use_eda": true, "zaya_use_mod": true, "transformers.js_config": { "use_external_data_format": true } }