{ "fp8": { "model\\.layers\\.[0-2]": [ "mlp.down_proj", "mlp.gate_proj", "mlp.up_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj", "self_attn.o_proj", "self_attn.q_a_proj", "self_attn.q_b_proj" ], "model\\.layers\\.(?![0-2](?:[^0-9]|$))\\d+\\.self_attn": [ "kv_a_proj_with_mqa", "kv_b_proj", "o_proj", "q_a_proj", "q_b_proj" ], "model\\.layers\\.(?![0-2](?:[^0-9]|$))\\d+\\.mlp\\.shared_experts": [ "down_proj", "gate_proj", "up_proj" ], "model\\.layers\\.(?![0-2](?:[^0-9]|$))\\d+\\.mlp\\.experts\\.\\d+": [ "down_proj", "gate_proj", "up_proj" ] }, "bf16": { "model\\.embed_tokens": [], "lm_head": [] } }