robbiemu's picture
mlx-lm conversion and quantization artifacts
1f30715
{
"architectures": [
"Llama4ForCausalLM"
],
"attention_bias": false,
"attention_chunk_size": 32768,
"attention_dropout": 0.0,
"attn_scale": 0.1,
"attn_temperature_tuning": false,
"bos_token_id": 128000,
"eos_token_id": [
128001,
128008,
128009
],
"floor_scale": 8192,
"for_llm_compressor": false,
"head_dim": 64,
"hidden_act": "silu",
"hidden_size": 1536,
"initializer_range": 0.02,
"interleave_moe_layer_step": 0,
"intermediate_size": 8192,
"intermediate_size_mlp": 6144,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 32768,
"model_type": "llama4_text",
"moe_layers": [],
"no_rope_layers": [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
],
"num_attention_heads": 24,
"num_experts_per_tok": 0,
"num_hidden_layers": 22,
"num_key_value_heads": 6,
"num_local_experts": 0,
"output_router_logits": false,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 8000000.0,
"router_aux_loss_coef": 0.001,
"router_jitter_noise": 0.0,
"tie_word_embeddings": true,
"torch_dtype": "float32",
"transformers_version": "4.55.0",
"use_cache": true,
"use_qk_norm": true,
"vocab_size": 128256,
"quantization": {
"group_size": 64,
"method": "mixed_precision_dynamic",
"per_layer_bits": {
"layers.0.attention.q_proj": 4,
"layers.0.attention.k_proj": 4,
"layers.0.attention.v_proj": 4,
"layers.0.attention.o_proj": 8,
"layers.0.feed_forward.gate_proj": 4,
"layers.0.feed_forward.up_proj": 4,
"layers.0.feed_forward.down_proj": 4,
"layers.1.attention.q_proj": 4,
"layers.1.attention.k_proj": 4,
"layers.1.attention.v_proj": 4,
"layers.1.attention.o_proj": 4,
"layers.1.feed_forward.gate_proj": 4,
"layers.1.feed_forward.up_proj": 4,
"layers.1.feed_forward.down_proj": 4,
"layers.2.attention.q_proj": 4,
"layers.2.attention.k_proj": 4,
"layers.2.attention.v_proj": 4,
"layers.2.attention.o_proj": 4,
"layers.2.feed_forward.gate_proj": 4,
"layers.2.feed_forward.up_proj": 4,
"layers.2.feed_forward.down_proj": 4,
"layers.3.attention.q_proj": 4,
"layers.3.attention.k_proj": 4,
"layers.3.attention.v_proj": 4,
"layers.3.attention.o_proj": 4,
"layers.3.feed_forward.gate_proj": 4,
"layers.3.feed_forward.up_proj": 4,
"layers.3.feed_forward.down_proj": 4,
"layers.4.attention.q_proj": 4,
"layers.4.attention.k_proj": 4,
"layers.4.attention.v_proj": 4,
"layers.4.attention.o_proj": 4,
"layers.4.feed_forward.gate_proj": 4,
"layers.4.feed_forward.up_proj": 4,
"layers.4.feed_forward.down_proj": 4,
"layers.5.attention.q_proj": 4,
"layers.5.attention.k_proj": 4,
"layers.5.attention.v_proj": 4,
"layers.5.attention.o_proj": 4,
"layers.5.feed_forward.gate_proj": 4,
"layers.5.feed_forward.up_proj": 4,
"layers.5.feed_forward.down_proj": 4,
"layers.6.attention.q_proj": 4,
"layers.6.attention.k_proj": 4,
"layers.6.attention.v_proj": 4,
"layers.6.attention.o_proj": 4,
"layers.6.feed_forward.gate_proj": 4,
"layers.6.feed_forward.up_proj": 4,
"layers.6.feed_forward.down_proj": 4,
"layers.7.attention.q_proj": 4,
"layers.7.attention.k_proj": 4,
"layers.7.attention.v_proj": 4,
"layers.7.attention.o_proj": 4,
"layers.7.feed_forward.gate_proj": 4,
"layers.7.feed_forward.up_proj": 4,
"layers.7.feed_forward.down_proj": 4,
"layers.8.attention.q_proj": 4,
"layers.8.attention.k_proj": 4,
"layers.8.attention.v_proj": 4,
"layers.8.attention.o_proj": 4,
"layers.8.feed_forward.gate_proj": 4,
"layers.8.feed_forward.up_proj": 4,
"layers.8.feed_forward.down_proj": 4,
"layers.9.attention.q_proj": 4,
"layers.9.attention.k_proj": 4,
"layers.9.attention.v_proj": 4,
"layers.9.attention.o_proj": 4,
"layers.9.feed_forward.gate_proj": 4,
"layers.9.feed_forward.up_proj": 4,
"layers.9.feed_forward.down_proj": 4,
"layers.10.attention.q_proj": 4,
"layers.10.attention.k_proj": 4,
"layers.10.attention.v_proj": 4,
"layers.10.attention.o_proj": 4,
"layers.10.feed_forward.gate_proj": 4,
"layers.10.feed_forward.up_proj": 4,
"layers.10.feed_forward.down_proj": 4,
"layers.11.attention.q_proj": 4,
"layers.11.attention.k_proj": 4,
"layers.11.attention.v_proj": 4,
"layers.11.attention.o_proj": 4,
"layers.11.feed_forward.gate_proj": 4,
"layers.11.feed_forward.up_proj": 4,
"layers.11.feed_forward.down_proj": 4,
"layers.12.attention.q_proj": 4,
"layers.12.attention.k_proj": 4,
"layers.12.attention.v_proj": 4,
"layers.12.attention.o_proj": 4,
"layers.12.feed_forward.gate_proj": 4,
"layers.12.feed_forward.up_proj": 4,
"layers.12.feed_forward.down_proj": 4,
"layers.13.attention.q_proj": 4,
"layers.13.attention.k_proj": 4,
"layers.13.attention.v_proj": 4,
"layers.13.attention.o_proj": 4,
"layers.13.feed_forward.gate_proj": 4,
"layers.13.feed_forward.up_proj": 4,
"layers.13.feed_forward.down_proj": 4,
"layers.14.attention.q_proj": 4,
"layers.14.attention.k_proj": 4,
"layers.14.attention.v_proj": 4,
"layers.14.attention.o_proj": 4,
"layers.14.feed_forward.gate_proj": 4,
"layers.14.feed_forward.up_proj": 4,
"layers.14.feed_forward.down_proj": 4,
"layers.15.attention.q_proj": 4,
"layers.15.attention.k_proj": 4,
"layers.15.attention.v_proj": 4,
"layers.15.attention.o_proj": 4,
"layers.15.feed_forward.gate_proj": 4,
"layers.15.feed_forward.up_proj": 4,
"layers.15.feed_forward.down_proj": 4,
"layers.16.attention.q_proj": 4,
"layers.16.attention.k_proj": 4,
"layers.16.attention.v_proj": 4,
"layers.16.attention.o_proj": 4,
"layers.16.feed_forward.gate_proj": 4,
"layers.16.feed_forward.up_proj": 4,
"layers.16.feed_forward.down_proj": 4,
"layers.17.attention.q_proj": 4,
"layers.17.attention.k_proj": 4,
"layers.17.attention.v_proj": 4,
"layers.17.attention.o_proj": 4,
"layers.17.feed_forward.gate_proj": 4,
"layers.17.feed_forward.up_proj": 4,
"layers.17.feed_forward.down_proj": 4,
"layers.18.attention.q_proj": 4,
"layers.18.attention.k_proj": 4,
"layers.18.attention.v_proj": 4,
"layers.18.attention.o_proj": 4,
"layers.18.feed_forward.gate_proj": 4,
"layers.18.feed_forward.up_proj": 4,
"layers.18.feed_forward.down_proj": 4,
"layers.19.attention.q_proj": 4,
"layers.19.attention.k_proj": 4,
"layers.19.attention.v_proj": 4,
"layers.19.attention.o_proj": 4,
"layers.19.feed_forward.gate_proj": 4,
"layers.19.feed_forward.up_proj": 4,
"layers.19.feed_forward.down_proj": 4,
"layers.20.attention.q_proj": 4,
"layers.20.attention.k_proj": 4,
"layers.20.attention.v_proj": 4,
"layers.20.attention.o_proj": 4,
"layers.20.feed_forward.gate_proj": 4,
"layers.20.feed_forward.up_proj": 4,
"layers.20.feed_forward.down_proj": 4,
"layers.21.attention.q_proj": 4,
"layers.21.attention.k_proj": 4,
"layers.21.attention.v_proj": 4,
"layers.21.attention.o_proj": 4,
"layers.21.feed_forward.gate_proj": 4,
"layers.21.feed_forward.up_proj": 4,
"layers.21.feed_forward.down_proj": 4
}
}
}