ChenHe727 commited on
Commit
862bcab
·
verified ·
1 Parent(s): 8ca4e25

Upload per-layer bit-width assignment metadata

Browse files
Files changed (1) hide show
  1. mp_quant_metadata.json +276 -0
mp_quant_metadata.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "distill_step": 10000,
3
+ "distill_loss": 1.0463419775873422,
4
+ "is_ema": true,
5
+ "quantization": {
6
+ "method": "mixed_precision_gptq",
7
+ "group_size": 128,
8
+ "damping": 0.01,
9
+ "assignment": {
10
+ "down_blocks.0.attentions.0.proj_in": "int4",
11
+ "down_blocks.0.attentions.0.proj_out": "int4",
12
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": "int4",
13
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": "int4",
14
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": "int4",
15
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": "int4",
16
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
17
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": "int8",
18
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": "int4",
19
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
20
+ "down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": "int4",
21
+ "down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": "fp16",
22
+ "down_blocks.0.attentions.1.proj_in": "int4",
23
+ "down_blocks.0.attentions.1.proj_out": "int4",
24
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": "int4",
25
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": "int4",
26
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": "int4",
27
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": "int4",
28
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": "int8",
29
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": "int8",
30
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": "int4",
31
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": "int8",
32
+ "down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": "int4",
33
+ "down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": "int8",
34
+ "down_blocks.1.attentions.0.proj_in": "fp16",
35
+ "down_blocks.1.attentions.0.proj_out": "fp16",
36
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "int8",
37
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "fp16",
38
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "int8",
39
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "fp16",
40
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
41
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "int8",
42
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "int8",
43
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
44
+ "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "fp16",
45
+ "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "fp16",
46
+ "down_blocks.1.attentions.1.proj_in": "fp16",
47
+ "down_blocks.1.attentions.1.proj_out": "fp16",
48
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "int8",
49
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "int8",
50
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "int8",
51
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "int8",
52
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "int8",
53
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "int8",
54
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "int8",
55
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "int8",
56
+ "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "int8",
57
+ "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "int8",
58
+ "down_blocks.2.attentions.0.proj_in": "int8",
59
+ "down_blocks.2.attentions.0.proj_out": "int8",
60
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "int8",
61
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "int8",
62
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "int8",
63
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "int8",
64
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
65
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "int4",
66
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "int8",
67
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
68
+ "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "fp16",
69
+ "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "int8",
70
+ "down_blocks.2.attentions.1.proj_in": "int8",
71
+ "down_blocks.2.attentions.1.proj_out": "int8",
72
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "int8",
73
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "int8",
74
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "int8",
75
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "int8",
76
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "int8",
77
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "int4",
78
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "int8",
79
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "int8",
80
+ "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "int8",
81
+ "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "int8",
82
+ "mid_block.attentions.0.proj_in": "int8",
83
+ "mid_block.attentions.0.proj_out": "int8",
84
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_k": "int8",
85
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": "int8",
86
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_q": "int8",
87
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_v": "int8",
88
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
89
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": "int8",
90
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_q": "int8",
91
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
92
+ "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": "int8",
93
+ "mid_block.attentions.0.transformer_blocks.0.ff.net.2": "int8",
94
+ "up_blocks.1.attentions.0.proj_in": "int8",
95
+ "up_blocks.1.attentions.0.proj_out": "int8",
96
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": "int8",
97
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": "int8",
98
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": "int8",
99
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": "int8",
100
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
101
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": "int4",
102
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": "int8",
103
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
104
+ "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": "int8",
105
+ "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": "int8",
106
+ "up_blocks.1.attentions.1.proj_in": "int8",
107
+ "up_blocks.1.attentions.1.proj_out": "int8",
108
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": "int8",
109
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": "int8",
110
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": "int8",
111
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": "int8",
112
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": "int8",
113
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": "int4",
114
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": "int8",
115
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": "int8",
116
+ "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": "int8",
117
+ "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": "int8",
118
+ "up_blocks.1.attentions.2.proj_in": "fp16",
119
+ "up_blocks.1.attentions.2.proj_out": "fp16",
120
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": "int8",
121
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": "fp16",
122
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": "int8",
123
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": "fp16",
124
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": "int8",
125
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": "int4",
126
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": "int8",
127
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": "int8",
128
+ "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": "fp16",
129
+ "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": "int8",
130
+ "up_blocks.2.attentions.0.proj_in": "fp16",
131
+ "up_blocks.2.attentions.0.proj_out": "fp16",
132
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": "int8",
133
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": "fp16",
134
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": "int8",
135
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": "int8",
136
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
137
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": "int8",
138
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": "int8",
139
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
140
+ "up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": "fp16",
141
+ "up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": "fp16",
142
+ "up_blocks.2.attentions.1.proj_in": "fp16",
143
+ "up_blocks.2.attentions.1.proj_out": "fp16",
144
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": "int8",
145
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": "fp16",
146
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": "int8",
147
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": "fp16",
148
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": "int8",
149
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": "int8",
150
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": "int8",
151
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": "fp16",
152
+ "up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": "fp16",
153
+ "up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": "fp16",
154
+ "up_blocks.2.attentions.2.proj_in": "fp16",
155
+ "up_blocks.2.attentions.2.proj_out": "fp16",
156
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k": "int8",
157
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0": "fp16",
158
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q": "int8",
159
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v": "fp16",
160
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k": "int8",
161
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0": "fp16",
162
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q": "int8",
163
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v": "int8",
164
+ "up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj": "fp16",
165
+ "up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2": "fp16",
166
+ "up_blocks.3.attentions.0.proj_in": "int4",
167
+ "up_blocks.3.attentions.0.proj_out": "int4",
168
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k": "int4",
169
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0": "int4",
170
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q": "int4",
171
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v": "int4",
172
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k": "int8",
173
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0": "int8",
174
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q": "int4",
175
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v": "int8",
176
+ "up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj": "int4",
177
+ "up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2": "fp16",
178
+ "up_blocks.3.attentions.1.proj_in": "int4",
179
+ "up_blocks.3.attentions.1.proj_out": "int4",
180
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k": "int4",
181
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0": "int4",
182
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q": "int4",
183
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v": "int4",
184
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k": "int8",
185
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0": "fp16",
186
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q": "int4",
187
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v": "fp16",
188
+ "up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj": "int4",
189
+ "up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2": "fp16",
190
+ "up_blocks.3.attentions.2.proj_in": "int4",
191
+ "up_blocks.3.attentions.2.proj_out": "int4",
192
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k": "int4",
193
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0": "int4",
194
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q": "int4",
195
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v": "int4",
196
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k": "int8",
197
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0": "int8",
198
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q": "int4",
199
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v": "fp16",
200
+ "up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj": "int4",
201
+ "up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2": "fp16"
202
+ },
203
+ "size_reduction": 0.3753969219677904,
204
+ "model_dtype": "bfloat16"
205
+ },
206
+ "model_config": {
207
+ "sample_size": 64,
208
+ "in_channels": 4,
209
+ "out_channels": 4,
210
+ "center_input_sample": false,
211
+ "flip_sin_to_cos": true,
212
+ "freq_shift": 0,
213
+ "down_block_types": [
214
+ "CrossAttnDownBlock2D",
215
+ "CrossAttnDownBlock2D",
216
+ "CrossAttnDownBlock2D",
217
+ "DownBlock2D"
218
+ ],
219
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
220
+ "up_block_types": [
221
+ "UpBlock2D",
222
+ "CrossAttnUpBlock2D",
223
+ "CrossAttnUpBlock2D",
224
+ "CrossAttnUpBlock2D"
225
+ ],
226
+ "only_cross_attention": false,
227
+ "block_out_channels": [
228
+ 320,
229
+ 640,
230
+ 1280,
231
+ 1280
232
+ ],
233
+ "layers_per_block": 2,
234
+ "downsample_padding": 1,
235
+ "mid_block_scale_factor": 1,
236
+ "dropout": 0.0,
237
+ "act_fn": "silu",
238
+ "norm_num_groups": 32,
239
+ "norm_eps": 1e-05,
240
+ "cross_attention_dim": 1024,
241
+ "transformer_layers_per_block": 1,
242
+ "reverse_transformer_layers_per_block": null,
243
+ "encoder_hid_dim": null,
244
+ "encoder_hid_dim_type": null,
245
+ "attention_head_dim": [
246
+ 5,
247
+ 10,
248
+ 20,
249
+ 20
250
+ ],
251
+ "num_attention_heads": null,
252
+ "dual_cross_attention": false,
253
+ "use_linear_projection": true,
254
+ "class_embed_type": null,
255
+ "addition_embed_type": null,
256
+ "addition_time_embed_dim": null,
257
+ "num_class_embeds": null,
258
+ "upcast_attention": null,
259
+ "resnet_time_scale_shift": "default",
260
+ "resnet_skip_time_act": false,
261
+ "resnet_out_scale_factor": 1.0,
262
+ "time_embedding_type": "positional",
263
+ "time_embedding_dim": null,
264
+ "time_embedding_act_fn": null,
265
+ "timestep_post_act": null,
266
+ "time_cond_proj_dim": null,
267
+ "conv_in_kernel": 3,
268
+ "conv_out_kernel": 3,
269
+ "projection_class_embeddings_input_dim": null,
270
+ "attention_type": "default",
271
+ "class_embeddings_concat": false,
272
+ "mid_block_only_cross_attention": null,
273
+ "cross_attention_norm": null,
274
+ "addition_embed_type_num_heads": 64
275
+ }
276
+ }