File size: 1,053 Bytes
0825617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
{
  "dfloat11_config": {
    "bytes_per_thread": 8,
    "pattern_dict": {
      "distilled_guidance_layer": [
        "in_proj",
        "layers.0.linear_1",
        "layers.0.linear_2",
        "layers.1.linear_1",
        "layers.1.linear_2",
        "layers.2.linear_1",
        "layers.2.linear_2",
        "layers.3.linear_1",
        "layers.3.linear_2",
        "layers.4.linear_1",
        "layers.4.linear_2",
        "out_proj"
      ],
      "transformer_blocks\\.\\d+": [
        "attn.to_q",
        "attn.to_k",
        "attn.to_v",
        "attn.add_k_proj",
        "attn.add_v_proj",
        "attn.add_q_proj",
        "attn.to_out.0",
        "attn.to_add_out",
        "ff.net.0.proj",
        "ff.net.2",
        "ff_context.net.0.proj",
        "ff_context.net.2"
      ],
      "single_transformer_blocks\\.\\d+": [
        "proj_mlp",
        "proj_out",
        "attn.to_q",
        "attn.to_k",
        "attn.to_v"
      ]
    },
    "threads_per_block": [
      512
    ],
    "version": "0.2.0"
  },
  "model_type": "llama"
}