{ "dfloat11_config": { "bytes_per_thread": 8, "pattern_dict": { "distilled_guidance_layer": [ "in_proj", "layers.0.linear_1", "layers.0.linear_2", "layers.1.linear_1", "layers.1.linear_2", "layers.2.linear_1", "layers.2.linear_2", "layers.3.linear_1", "layers.3.linear_2", "layers.4.linear_1", "layers.4.linear_2", "out_proj" ], "transformer_blocks\\.\\d+": [ "attn.to_q", "attn.to_k", "attn.to_v", "attn.add_k_proj", "attn.add_v_proj", "attn.add_q_proj", "attn.to_out.0", "attn.to_add_out", "ff.net.0.proj", "ff.net.2", "ff_context.net.0.proj", "ff_context.net.2" ], "single_transformer_blocks\\.\\d+": [ "proj_mlp", "proj_out", "attn.to_q", "attn.to_k", "attn.to_v" ] }, "threads_per_block": [ 512 ], "version": "0.2.0" }, "model_type": "llama" }