ChenHe727 commited on
Commit
746fb1b
·
verified ·
1 Parent(s): e213182

Upload pruned_unet.config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. pruned_unet.config.json +1908 -0
pruned_unet.config.json ADDED
@@ -0,0 +1,1908 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "_use_default_values": [
4
+ "resnet_out_scale_factor",
5
+ "use_linear_projection",
6
+ "projection_class_embeddings_input_dim",
7
+ "resnet_time_scale_shift",
8
+ "time_embedding_dim",
9
+ "class_embeddings_concat",
10
+ "encoder_hid_dim_type",
11
+ "num_attention_heads",
12
+ "transformer_layers_per_block",
13
+ "addition_embed_type",
14
+ "encoder_hid_dim",
15
+ "conv_out_kernel",
16
+ "upcast_attention",
17
+ "only_cross_attention",
18
+ "dropout",
19
+ "timestep_post_act",
20
+ "dual_cross_attention",
21
+ "addition_embed_type_num_heads",
22
+ "time_cond_proj_dim",
23
+ "num_class_embeds",
24
+ "time_embedding_act_fn",
25
+ "attention_type",
26
+ "class_embed_type",
27
+ "resnet_skip_time_act",
28
+ "reverse_transformer_layers_per_block",
29
+ "mid_block_only_cross_attention",
30
+ "cross_attention_norm",
31
+ "mid_block_type",
32
+ "time_embedding_type",
33
+ "conv_in_kernel",
34
+ "addition_time_embed_dim"
35
+ ],
36
+ "_class_name": "UNet2DConditionModel",
37
+ "_diffusers_version": "0.6.0",
38
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
39
+ "sample_size": 64,
40
+ "in_channels": 4,
41
+ "out_channels": 4,
42
+ "center_input_sample": false,
43
+ "flip_sin_to_cos": true,
44
+ "freq_shift": 0,
45
+ "down_block_types": [
46
+ "CrossAttnDownBlock2D",
47
+ "CrossAttnDownBlock2D",
48
+ "CrossAttnDownBlock2D",
49
+ "DownBlock2D"
50
+ ],
51
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
52
+ "up_block_types": [
53
+ "UpBlock2D",
54
+ "CrossAttnUpBlock2D",
55
+ "CrossAttnUpBlock2D",
56
+ "CrossAttnUpBlock2D"
57
+ ],
58
+ "only_cross_attention": false,
59
+ "block_out_channels": [
60
+ 320,
61
+ 640,
62
+ 1280,
63
+ 1280
64
+ ],
65
+ "layers_per_block": 2,
66
+ "downsample_padding": 1,
67
+ "mid_block_scale_factor": 1,
68
+ "dropout": 0.0,
69
+ "act_fn": "silu",
70
+ "norm_num_groups": 32,
71
+ "norm_eps": 1e-05,
72
+ "cross_attention_dim": 768,
73
+ "transformer_layers_per_block": 1,
74
+ "reverse_transformer_layers_per_block": null,
75
+ "encoder_hid_dim": null,
76
+ "encoder_hid_dim_type": null,
77
+ "attention_head_dim": 8,
78
+ "num_attention_heads": null,
79
+ "dual_cross_attention": false,
80
+ "use_linear_projection": false,
81
+ "class_embed_type": null,
82
+ "addition_embed_type": null,
83
+ "addition_time_embed_dim": null,
84
+ "num_class_embeds": null,
85
+ "upcast_attention": false,
86
+ "resnet_time_scale_shift": "default",
87
+ "resnet_skip_time_act": false,
88
+ "resnet_out_scale_factor": 1.0,
89
+ "time_embedding_type": "positional",
90
+ "time_embedding_dim": null,
91
+ "time_embedding_act_fn": null,
92
+ "timestep_post_act": null,
93
+ "time_cond_proj_dim": null,
94
+ "conv_in_kernel": 3,
95
+ "conv_out_kernel": 3,
96
+ "projection_class_embeddings_input_dim": null,
97
+ "attention_type": "default",
98
+ "class_embeddings_concat": false,
99
+ "mid_block_only_cross_attention": null,
100
+ "cross_attention_norm": null,
101
+ "addition_embed_type_num_heads": 64
102
+ },
103
+ "module_shapes": {
104
+ "conv_in": {
105
+ "type": "conv2d",
106
+ "out_channels": 320,
107
+ "in_channels": 4,
108
+ "kernel_size": [
109
+ 3,
110
+ 3
111
+ ]
112
+ },
113
+ "time_embedding.linear_1": {
114
+ "type": "linear",
115
+ "out_features": 1280,
116
+ "in_features": 320
117
+ },
118
+ "time_embedding.linear_2": {
119
+ "type": "linear",
120
+ "out_features": 1280,
121
+ "in_features": 1280
122
+ },
123
+ "down_blocks.0.attentions.0.proj_in": {
124
+ "type": "conv2d",
125
+ "out_channels": 320,
126
+ "in_channels": 320,
127
+ "kernel_size": [
128
+ 1,
129
+ 1
130
+ ]
131
+ },
132
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": {
133
+ "type": "linear",
134
+ "out_features": 320,
135
+ "in_features": 320
136
+ },
137
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": {
138
+ "type": "linear",
139
+ "out_features": 320,
140
+ "in_features": 320
141
+ },
142
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": {
143
+ "type": "linear",
144
+ "out_features": 320,
145
+ "in_features": 320
146
+ },
147
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": {
148
+ "type": "linear",
149
+ "out_features": 320,
150
+ "in_features": 320
151
+ },
152
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": {
153
+ "type": "linear",
154
+ "out_features": 200,
155
+ "in_features": 320
156
+ },
157
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": {
158
+ "type": "linear",
159
+ "out_features": 200,
160
+ "in_features": 768
161
+ },
162
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": {
163
+ "type": "linear",
164
+ "out_features": 200,
165
+ "in_features": 768
166
+ },
167
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": {
168
+ "type": "linear",
169
+ "out_features": 320,
170
+ "in_features": 200
171
+ },
172
+ "down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": {
173
+ "type": "linear",
174
+ "out_features": 2560,
175
+ "in_features": 320
176
+ },
177
+ "down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": {
178
+ "type": "linear",
179
+ "out_features": 320,
180
+ "in_features": 1280
181
+ },
182
+ "down_blocks.0.attentions.0.proj_out": {
183
+ "type": "conv2d",
184
+ "out_channels": 320,
185
+ "in_channels": 320,
186
+ "kernel_size": [
187
+ 1,
188
+ 1
189
+ ]
190
+ },
191
+ "down_blocks.0.attentions.1.proj_in": {
192
+ "type": "conv2d",
193
+ "out_channels": 320,
194
+ "in_channels": 320,
195
+ "kernel_size": [
196
+ 1,
197
+ 1
198
+ ]
199
+ },
200
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": {
201
+ "type": "linear",
202
+ "out_features": 320,
203
+ "in_features": 320
204
+ },
205
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": {
206
+ "type": "linear",
207
+ "out_features": 320,
208
+ "in_features": 320
209
+ },
210
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": {
211
+ "type": "linear",
212
+ "out_features": 320,
213
+ "in_features": 320
214
+ },
215
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": {
216
+ "type": "linear",
217
+ "out_features": 320,
218
+ "in_features": 320
219
+ },
220
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": {
221
+ "type": "linear",
222
+ "out_features": 200,
223
+ "in_features": 320
224
+ },
225
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": {
226
+ "type": "linear",
227
+ "out_features": 200,
228
+ "in_features": 768
229
+ },
230
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": {
231
+ "type": "linear",
232
+ "out_features": 200,
233
+ "in_features": 768
234
+ },
235
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": {
236
+ "type": "linear",
237
+ "out_features": 320,
238
+ "in_features": 200
239
+ },
240
+ "down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": {
241
+ "type": "linear",
242
+ "out_features": 2560,
243
+ "in_features": 320
244
+ },
245
+ "down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": {
246
+ "type": "linear",
247
+ "out_features": 320,
248
+ "in_features": 1280
249
+ },
250
+ "down_blocks.0.attentions.1.proj_out": {
251
+ "type": "conv2d",
252
+ "out_channels": 320,
253
+ "in_channels": 320,
254
+ "kernel_size": [
255
+ 1,
256
+ 1
257
+ ]
258
+ },
259
+ "down_blocks.0.resnets.0.conv1": {
260
+ "type": "conv2d",
261
+ "out_channels": 320,
262
+ "in_channels": 320,
263
+ "kernel_size": [
264
+ 3,
265
+ 3
266
+ ]
267
+ },
268
+ "down_blocks.0.resnets.0.time_emb_proj": {
269
+ "type": "linear",
270
+ "out_features": 320,
271
+ "in_features": 1280
272
+ },
273
+ "down_blocks.0.resnets.0.conv2": {
274
+ "type": "conv2d",
275
+ "out_channels": 320,
276
+ "in_channels": 320,
277
+ "kernel_size": [
278
+ 3,
279
+ 3
280
+ ]
281
+ },
282
+ "down_blocks.0.resnets.1.conv1": {
283
+ "type": "conv2d",
284
+ "out_channels": 320,
285
+ "in_channels": 320,
286
+ "kernel_size": [
287
+ 3,
288
+ 3
289
+ ]
290
+ },
291
+ "down_blocks.0.resnets.1.time_emb_proj": {
292
+ "type": "linear",
293
+ "out_features": 320,
294
+ "in_features": 1280
295
+ },
296
+ "down_blocks.0.resnets.1.conv2": {
297
+ "type": "conv2d",
298
+ "out_channels": 320,
299
+ "in_channels": 320,
300
+ "kernel_size": [
301
+ 3,
302
+ 3
303
+ ]
304
+ },
305
+ "down_blocks.0.downsamplers.0.conv": {
306
+ "type": "conv2d",
307
+ "out_channels": 320,
308
+ "in_channels": 320,
309
+ "kernel_size": [
310
+ 3,
311
+ 3
312
+ ]
313
+ },
314
+ "down_blocks.1.attentions.0.proj_in": {
315
+ "type": "conv2d",
316
+ "out_channels": 640,
317
+ "in_channels": 640,
318
+ "kernel_size": [
319
+ 1,
320
+ 1
321
+ ]
322
+ },
323
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
324
+ "type": "linear",
325
+ "out_features": 640,
326
+ "in_features": 640
327
+ },
328
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
329
+ "type": "linear",
330
+ "out_features": 640,
331
+ "in_features": 640
332
+ },
333
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
334
+ "type": "linear",
335
+ "out_features": 640,
336
+ "in_features": 640
337
+ },
338
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
339
+ "type": "linear",
340
+ "out_features": 640,
341
+ "in_features": 640
342
+ },
343
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
344
+ "type": "linear",
345
+ "out_features": 400,
346
+ "in_features": 640
347
+ },
348
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
349
+ "type": "linear",
350
+ "out_features": 400,
351
+ "in_features": 768
352
+ },
353
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
354
+ "type": "linear",
355
+ "out_features": 400,
356
+ "in_features": 768
357
+ },
358
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
359
+ "type": "linear",
360
+ "out_features": 640,
361
+ "in_features": 400
362
+ },
363
+ "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
364
+ "type": "linear",
365
+ "out_features": 5120,
366
+ "in_features": 640
367
+ },
368
+ "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
369
+ "type": "linear",
370
+ "out_features": 640,
371
+ "in_features": 2560
372
+ },
373
+ "down_blocks.1.attentions.0.proj_out": {
374
+ "type": "conv2d",
375
+ "out_channels": 640,
376
+ "in_channels": 640,
377
+ "kernel_size": [
378
+ 1,
379
+ 1
380
+ ]
381
+ },
382
+ "down_blocks.1.attentions.1.proj_in": {
383
+ "type": "conv2d",
384
+ "out_channels": 640,
385
+ "in_channels": 640,
386
+ "kernel_size": [
387
+ 1,
388
+ 1
389
+ ]
390
+ },
391
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
392
+ "type": "linear",
393
+ "out_features": 640,
394
+ "in_features": 640
395
+ },
396
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
397
+ "type": "linear",
398
+ "out_features": 640,
399
+ "in_features": 640
400
+ },
401
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
402
+ "type": "linear",
403
+ "out_features": 640,
404
+ "in_features": 640
405
+ },
406
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
407
+ "type": "linear",
408
+ "out_features": 640,
409
+ "in_features": 640
410
+ },
411
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
412
+ "type": "linear",
413
+ "out_features": 400,
414
+ "in_features": 640
415
+ },
416
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
417
+ "type": "linear",
418
+ "out_features": 400,
419
+ "in_features": 768
420
+ },
421
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
422
+ "type": "linear",
423
+ "out_features": 400,
424
+ "in_features": 768
425
+ },
426
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
427
+ "type": "linear",
428
+ "out_features": 640,
429
+ "in_features": 400
430
+ },
431
+ "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
432
+ "type": "linear",
433
+ "out_features": 5120,
434
+ "in_features": 640
435
+ },
436
+ "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
437
+ "type": "linear",
438
+ "out_features": 640,
439
+ "in_features": 2560
440
+ },
441
+ "down_blocks.1.attentions.1.proj_out": {
442
+ "type": "conv2d",
443
+ "out_channels": 640,
444
+ "in_channels": 640,
445
+ "kernel_size": [
446
+ 1,
447
+ 1
448
+ ]
449
+ },
450
+ "down_blocks.1.resnets.0.conv1": {
451
+ "type": "conv2d",
452
+ "out_channels": 640,
453
+ "in_channels": 320,
454
+ "kernel_size": [
455
+ 3,
456
+ 3
457
+ ]
458
+ },
459
+ "down_blocks.1.resnets.0.time_emb_proj": {
460
+ "type": "linear",
461
+ "out_features": 640,
462
+ "in_features": 1280
463
+ },
464
+ "down_blocks.1.resnets.0.conv2": {
465
+ "type": "conv2d",
466
+ "out_channels": 640,
467
+ "in_channels": 640,
468
+ "kernel_size": [
469
+ 3,
470
+ 3
471
+ ]
472
+ },
473
+ "down_blocks.1.resnets.0.conv_shortcut": {
474
+ "type": "conv2d",
475
+ "out_channels": 640,
476
+ "in_channels": 320,
477
+ "kernel_size": [
478
+ 1,
479
+ 1
480
+ ]
481
+ },
482
+ "down_blocks.1.resnets.1.conv1": {
483
+ "type": "conv2d",
484
+ "out_channels": 640,
485
+ "in_channels": 640,
486
+ "kernel_size": [
487
+ 3,
488
+ 3
489
+ ]
490
+ },
491
+ "down_blocks.1.resnets.1.time_emb_proj": {
492
+ "type": "linear",
493
+ "out_features": 640,
494
+ "in_features": 1280
495
+ },
496
+ "down_blocks.1.resnets.1.conv2": {
497
+ "type": "conv2d",
498
+ "out_channels": 640,
499
+ "in_channels": 640,
500
+ "kernel_size": [
501
+ 3,
502
+ 3
503
+ ]
504
+ },
505
+ "down_blocks.1.downsamplers.0.conv": {
506
+ "type": "conv2d",
507
+ "out_channels": 640,
508
+ "in_channels": 640,
509
+ "kernel_size": [
510
+ 3,
511
+ 3
512
+ ]
513
+ },
514
+ "down_blocks.2.attentions.0.proj_in": {
515
+ "type": "conv2d",
516
+ "out_channels": 1280,
517
+ "in_channels": 1280,
518
+ "kernel_size": [
519
+ 1,
520
+ 1
521
+ ]
522
+ },
523
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": {
524
+ "type": "linear",
525
+ "out_features": 800,
526
+ "in_features": 1280
527
+ },
528
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": {
529
+ "type": "linear",
530
+ "out_features": 800,
531
+ "in_features": 1280
532
+ },
533
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": {
534
+ "type": "linear",
535
+ "out_features": 800,
536
+ "in_features": 1280
537
+ },
538
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": {
539
+ "type": "linear",
540
+ "out_features": 1280,
541
+ "in_features": 800
542
+ },
543
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": {
544
+ "type": "linear",
545
+ "out_features": 480,
546
+ "in_features": 1280
547
+ },
548
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": {
549
+ "type": "linear",
550
+ "out_features": 480,
551
+ "in_features": 768
552
+ },
553
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": {
554
+ "type": "linear",
555
+ "out_features": 480,
556
+ "in_features": 768
557
+ },
558
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": {
559
+ "type": "linear",
560
+ "out_features": 1280,
561
+ "in_features": 480
562
+ },
563
+ "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": {
564
+ "type": "linear",
565
+ "out_features": 10240,
566
+ "in_features": 1280
567
+ },
568
+ "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": {
569
+ "type": "linear",
570
+ "out_features": 1280,
571
+ "in_features": 5120
572
+ },
573
+ "down_blocks.2.attentions.0.proj_out": {
574
+ "type": "conv2d",
575
+ "out_channels": 1280,
576
+ "in_channels": 1280,
577
+ "kernel_size": [
578
+ 1,
579
+ 1
580
+ ]
581
+ },
582
+ "down_blocks.2.attentions.1.proj_in": {
583
+ "type": "conv2d",
584
+ "out_channels": 1280,
585
+ "in_channels": 1280,
586
+ "kernel_size": [
587
+ 1,
588
+ 1
589
+ ]
590
+ },
591
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": {
592
+ "type": "linear",
593
+ "out_features": 800,
594
+ "in_features": 1280
595
+ },
596
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": {
597
+ "type": "linear",
598
+ "out_features": 800,
599
+ "in_features": 1280
600
+ },
601
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": {
602
+ "type": "linear",
603
+ "out_features": 800,
604
+ "in_features": 1280
605
+ },
606
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": {
607
+ "type": "linear",
608
+ "out_features": 1280,
609
+ "in_features": 800
610
+ },
611
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": {
612
+ "type": "linear",
613
+ "out_features": 480,
614
+ "in_features": 1280
615
+ },
616
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": {
617
+ "type": "linear",
618
+ "out_features": 480,
619
+ "in_features": 768
620
+ },
621
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": {
622
+ "type": "linear",
623
+ "out_features": 480,
624
+ "in_features": 768
625
+ },
626
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": {
627
+ "type": "linear",
628
+ "out_features": 1280,
629
+ "in_features": 480
630
+ },
631
+ "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": {
632
+ "type": "linear",
633
+ "out_features": 10240,
634
+ "in_features": 1280
635
+ },
636
+ "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": {
637
+ "type": "linear",
638
+ "out_features": 1280,
639
+ "in_features": 5120
640
+ },
641
+ "down_blocks.2.attentions.1.proj_out": {
642
+ "type": "conv2d",
643
+ "out_channels": 1280,
644
+ "in_channels": 1280,
645
+ "kernel_size": [
646
+ 1,
647
+ 1
648
+ ]
649
+ },
650
+ "down_blocks.2.resnets.0.conv1": {
651
+ "type": "conv2d",
652
+ "out_channels": 1280,
653
+ "in_channels": 640,
654
+ "kernel_size": [
655
+ 3,
656
+ 3
657
+ ]
658
+ },
659
+ "down_blocks.2.resnets.0.time_emb_proj": {
660
+ "type": "linear",
661
+ "out_features": 1280,
662
+ "in_features": 1280
663
+ },
664
+ "down_blocks.2.resnets.0.conv2": {
665
+ "type": "conv2d",
666
+ "out_channels": 1280,
667
+ "in_channels": 1280,
668
+ "kernel_size": [
669
+ 3,
670
+ 3
671
+ ]
672
+ },
673
+ "down_blocks.2.resnets.0.conv_shortcut": {
674
+ "type": "conv2d",
675
+ "out_channels": 1280,
676
+ "in_channels": 640,
677
+ "kernel_size": [
678
+ 1,
679
+ 1
680
+ ]
681
+ },
682
+ "down_blocks.2.resnets.1.conv1": {
683
+ "type": "conv2d",
684
+ "out_channels": 1280,
685
+ "in_channels": 1280,
686
+ "kernel_size": [
687
+ 3,
688
+ 3
689
+ ]
690
+ },
691
+ "down_blocks.2.resnets.1.time_emb_proj": {
692
+ "type": "linear",
693
+ "out_features": 1280,
694
+ "in_features": 1280
695
+ },
696
+ "down_blocks.2.resnets.1.conv2": {
697
+ "type": "conv2d",
698
+ "out_channels": 1280,
699
+ "in_channels": 1280,
700
+ "kernel_size": [
701
+ 3,
702
+ 3
703
+ ]
704
+ },
705
+ "down_blocks.2.downsamplers.0.conv": {
706
+ "type": "conv2d",
707
+ "out_channels": 1280,
708
+ "in_channels": 1280,
709
+ "kernel_size": [
710
+ 3,
711
+ 3
712
+ ]
713
+ },
714
+ "down_blocks.3.resnets.0.conv1": {
715
+ "type": "conv2d",
716
+ "out_channels": 480,
717
+ "in_channels": 1280,
718
+ "kernel_size": [
719
+ 3,
720
+ 3
721
+ ]
722
+ },
723
+ "down_blocks.3.resnets.0.time_emb_proj": {
724
+ "type": "linear",
725
+ "out_features": 480,
726
+ "in_features": 1280
727
+ },
728
+ "down_blocks.3.resnets.0.conv2": {
729
+ "type": "conv2d",
730
+ "out_channels": 1280,
731
+ "in_channels": 480,
732
+ "kernel_size": [
733
+ 3,
734
+ 3
735
+ ]
736
+ },
737
+ "down_blocks.3.resnets.1.conv1": {
738
+ "type": "conv2d",
739
+ "out_channels": 480,
740
+ "in_channels": 1280,
741
+ "kernel_size": [
742
+ 3,
743
+ 3
744
+ ]
745
+ },
746
+ "down_blocks.3.resnets.1.time_emb_proj": {
747
+ "type": "linear",
748
+ "out_features": 480,
749
+ "in_features": 1280
750
+ },
751
+ "down_blocks.3.resnets.1.conv2": {
752
+ "type": "conv2d",
753
+ "out_channels": 1280,
754
+ "in_channels": 480,
755
+ "kernel_size": [
756
+ 3,
757
+ 3
758
+ ]
759
+ },
760
+ "up_blocks.0.resnets.0.conv1": {
761
+ "type": "conv2d",
762
+ "out_channels": 960,
763
+ "in_channels": 2560,
764
+ "kernel_size": [
765
+ 3,
766
+ 3
767
+ ]
768
+ },
769
+ "up_blocks.0.resnets.0.time_emb_proj": {
770
+ "type": "linear",
771
+ "out_features": 960,
772
+ "in_features": 1280
773
+ },
774
+ "up_blocks.0.resnets.0.conv2": {
775
+ "type": "conv2d",
776
+ "out_channels": 1280,
777
+ "in_channels": 960,
778
+ "kernel_size": [
779
+ 3,
780
+ 3
781
+ ]
782
+ },
783
+ "up_blocks.0.resnets.0.conv_shortcut": {
784
+ "type": "conv2d",
785
+ "out_channels": 1280,
786
+ "in_channels": 2560,
787
+ "kernel_size": [
788
+ 1,
789
+ 1
790
+ ]
791
+ },
792
+ "up_blocks.0.resnets.1.conv1": {
793
+ "type": "conv2d",
794
+ "out_channels": 960,
795
+ "in_channels": 2560,
796
+ "kernel_size": [
797
+ 3,
798
+ 3
799
+ ]
800
+ },
801
+ "up_blocks.0.resnets.1.time_emb_proj": {
802
+ "type": "linear",
803
+ "out_features": 960,
804
+ "in_features": 1280
805
+ },
806
+ "up_blocks.0.resnets.1.conv2": {
807
+ "type": "conv2d",
808
+ "out_channels": 1280,
809
+ "in_channels": 960,
810
+ "kernel_size": [
811
+ 3,
812
+ 3
813
+ ]
814
+ },
815
+ "up_blocks.0.resnets.1.conv_shortcut": {
816
+ "type": "conv2d",
817
+ "out_channels": 1280,
818
+ "in_channels": 2560,
819
+ "kernel_size": [
820
+ 1,
821
+ 1
822
+ ]
823
+ },
824
+ "up_blocks.0.resnets.2.conv1": {
825
+ "type": "conv2d",
826
+ "out_channels": 960,
827
+ "in_channels": 2560,
828
+ "kernel_size": [
829
+ 3,
830
+ 3
831
+ ]
832
+ },
833
+ "up_blocks.0.resnets.2.time_emb_proj": {
834
+ "type": "linear",
835
+ "out_features": 960,
836
+ "in_features": 1280
837
+ },
838
+ "up_blocks.0.resnets.2.conv2": {
839
+ "type": "conv2d",
840
+ "out_channels": 1280,
841
+ "in_channels": 960,
842
+ "kernel_size": [
843
+ 3,
844
+ 3
845
+ ]
846
+ },
847
+ "up_blocks.0.resnets.2.conv_shortcut": {
848
+ "type": "conv2d",
849
+ "out_channels": 1280,
850
+ "in_channels": 2560,
851
+ "kernel_size": [
852
+ 1,
853
+ 1
854
+ ]
855
+ },
856
+ "up_blocks.0.upsamplers.0.conv": {
857
+ "type": "conv2d",
858
+ "out_channels": 1280,
859
+ "in_channels": 1280,
860
+ "kernel_size": [
861
+ 3,
862
+ 3
863
+ ]
864
+ },
865
+ "up_blocks.1.attentions.0.proj_in": {
866
+ "type": "conv2d",
867
+ "out_channels": 1280,
868
+ "in_channels": 1280,
869
+ "kernel_size": [
870
+ 1,
871
+ 1
872
+ ]
873
+ },
874
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
875
+ "type": "linear",
876
+ "out_features": 800,
877
+ "in_features": 1280
878
+ },
879
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
880
+ "type": "linear",
881
+ "out_features": 800,
882
+ "in_features": 1280
883
+ },
884
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
885
+ "type": "linear",
886
+ "out_features": 800,
887
+ "in_features": 1280
888
+ },
889
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
890
+ "type": "linear",
891
+ "out_features": 1280,
892
+ "in_features": 800
893
+ },
894
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
895
+ "type": "linear",
896
+ "out_features": 480,
897
+ "in_features": 1280
898
+ },
899
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
900
+ "type": "linear",
901
+ "out_features": 480,
902
+ "in_features": 768
903
+ },
904
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
905
+ "type": "linear",
906
+ "out_features": 480,
907
+ "in_features": 768
908
+ },
909
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
910
+ "type": "linear",
911
+ "out_features": 1280,
912
+ "in_features": 480
913
+ },
914
+ "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
915
+ "type": "linear",
916
+ "out_features": 10240,
917
+ "in_features": 1280
918
+ },
919
+ "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
920
+ "type": "linear",
921
+ "out_features": 1280,
922
+ "in_features": 5120
923
+ },
924
+ "up_blocks.1.attentions.0.proj_out": {
925
+ "type": "conv2d",
926
+ "out_channels": 1280,
927
+ "in_channels": 1280,
928
+ "kernel_size": [
929
+ 1,
930
+ 1
931
+ ]
932
+ },
933
+ "up_blocks.1.attentions.1.proj_in": {
934
+ "type": "conv2d",
935
+ "out_channels": 1280,
936
+ "in_channels": 1280,
937
+ "kernel_size": [
938
+ 1,
939
+ 1
940
+ ]
941
+ },
942
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
943
+ "type": "linear",
944
+ "out_features": 800,
945
+ "in_features": 1280
946
+ },
947
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
948
+ "type": "linear",
949
+ "out_features": 800,
950
+ "in_features": 1280
951
+ },
952
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
953
+ "type": "linear",
954
+ "out_features": 800,
955
+ "in_features": 1280
956
+ },
957
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
958
+ "type": "linear",
959
+ "out_features": 1280,
960
+ "in_features": 800
961
+ },
962
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
963
+ "type": "linear",
964
+ "out_features": 480,
965
+ "in_features": 1280
966
+ },
967
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
968
+ "type": "linear",
969
+ "out_features": 480,
970
+ "in_features": 768
971
+ },
972
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
973
+ "type": "linear",
974
+ "out_features": 480,
975
+ "in_features": 768
976
+ },
977
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
978
+ "type": "linear",
979
+ "out_features": 1280,
980
+ "in_features": 480
981
+ },
982
+ "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
983
+ "type": "linear",
984
+ "out_features": 10240,
985
+ "in_features": 1280
986
+ },
987
+ "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
988
+ "type": "linear",
989
+ "out_features": 1280,
990
+ "in_features": 5120
991
+ },
992
+ "up_blocks.1.attentions.1.proj_out": {
993
+ "type": "conv2d",
994
+ "out_channels": 1280,
995
+ "in_channels": 1280,
996
+ "kernel_size": [
997
+ 1,
998
+ 1
999
+ ]
1000
+ },
1001
+ "up_blocks.1.attentions.2.proj_in": {
1002
+ "type": "conv2d",
1003
+ "out_channels": 1280,
1004
+ "in_channels": 1280,
1005
+ "kernel_size": [
1006
+ 1,
1007
+ 1
1008
+ ]
1009
+ },
1010
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": {
1011
+ "type": "linear",
1012
+ "out_features": 800,
1013
+ "in_features": 1280
1014
+ },
1015
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": {
1016
+ "type": "linear",
1017
+ "out_features": 800,
1018
+ "in_features": 1280
1019
+ },
1020
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": {
1021
+ "type": "linear",
1022
+ "out_features": 800,
1023
+ "in_features": 1280
1024
+ },
1025
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": {
1026
+ "type": "linear",
1027
+ "out_features": 1280,
1028
+ "in_features": 800
1029
+ },
1030
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": {
1031
+ "type": "linear",
1032
+ "out_features": 480,
1033
+ "in_features": 1280
1034
+ },
1035
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": {
1036
+ "type": "linear",
1037
+ "out_features": 480,
1038
+ "in_features": 768
1039
+ },
1040
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": {
1041
+ "type": "linear",
1042
+ "out_features": 480,
1043
+ "in_features": 768
1044
+ },
1045
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": {
1046
+ "type": "linear",
1047
+ "out_features": 1280,
1048
+ "in_features": 480
1049
+ },
1050
+ "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": {
1051
+ "type": "linear",
1052
+ "out_features": 10240,
1053
+ "in_features": 1280
1054
+ },
1055
+ "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": {
1056
+ "type": "linear",
1057
+ "out_features": 1280,
1058
+ "in_features": 5120
1059
+ },
1060
+ "up_blocks.1.attentions.2.proj_out": {
1061
+ "type": "conv2d",
1062
+ "out_channels": 1280,
1063
+ "in_channels": 1280,
1064
+ "kernel_size": [
1065
+ 1,
1066
+ 1
1067
+ ]
1068
+ },
1069
+ "up_blocks.1.resnets.0.conv1": {
1070
+ "type": "conv2d",
1071
+ "out_channels": 1280,
1072
+ "in_channels": 2560,
1073
+ "kernel_size": [
1074
+ 3,
1075
+ 3
1076
+ ]
1077
+ },
1078
+ "up_blocks.1.resnets.0.time_emb_proj": {
1079
+ "type": "linear",
1080
+ "out_features": 1280,
1081
+ "in_features": 1280
1082
+ },
1083
+ "up_blocks.1.resnets.0.conv2": {
1084
+ "type": "conv2d",
1085
+ "out_channels": 1280,
1086
+ "in_channels": 1280,
1087
+ "kernel_size": [
1088
+ 3,
1089
+ 3
1090
+ ]
1091
+ },
1092
+ "up_blocks.1.resnets.0.conv_shortcut": {
1093
+ "type": "conv2d",
1094
+ "out_channels": 1280,
1095
+ "in_channels": 2560,
1096
+ "kernel_size": [
1097
+ 1,
1098
+ 1
1099
+ ]
1100
+ },
1101
+ "up_blocks.1.resnets.1.conv1": {
1102
+ "type": "conv2d",
1103
+ "out_channels": 1280,
1104
+ "in_channels": 2560,
1105
+ "kernel_size": [
1106
+ 3,
1107
+ 3
1108
+ ]
1109
+ },
1110
+ "up_blocks.1.resnets.1.time_emb_proj": {
1111
+ "type": "linear",
1112
+ "out_features": 1280,
1113
+ "in_features": 1280
1114
+ },
1115
+ "up_blocks.1.resnets.1.conv2": {
1116
+ "type": "conv2d",
1117
+ "out_channels": 1280,
1118
+ "in_channels": 1280,
1119
+ "kernel_size": [
1120
+ 3,
1121
+ 3
1122
+ ]
1123
+ },
1124
+ "up_blocks.1.resnets.1.conv_shortcut": {
1125
+ "type": "conv2d",
1126
+ "out_channels": 1280,
1127
+ "in_channels": 2560,
1128
+ "kernel_size": [
1129
+ 1,
1130
+ 1
1131
+ ]
1132
+ },
1133
+ "up_blocks.1.resnets.2.conv1": {
1134
+ "type": "conv2d",
1135
+ "out_channels": 1280,
1136
+ "in_channels": 1920,
1137
+ "kernel_size": [
1138
+ 3,
1139
+ 3
1140
+ ]
1141
+ },
1142
+ "up_blocks.1.resnets.2.time_emb_proj": {
1143
+ "type": "linear",
1144
+ "out_features": 1280,
1145
+ "in_features": 1280
1146
+ },
1147
+ "up_blocks.1.resnets.2.conv2": {
1148
+ "type": "conv2d",
1149
+ "out_channels": 1280,
1150
+ "in_channels": 1280,
1151
+ "kernel_size": [
1152
+ 3,
1153
+ 3
1154
+ ]
1155
+ },
1156
+ "up_blocks.1.resnets.2.conv_shortcut": {
1157
+ "type": "conv2d",
1158
+ "out_channels": 1280,
1159
+ "in_channels": 1920,
1160
+ "kernel_size": [
1161
+ 1,
1162
+ 1
1163
+ ]
1164
+ },
1165
+ "up_blocks.1.upsamplers.0.conv": {
1166
+ "type": "conv2d",
1167
+ "out_channels": 1280,
1168
+ "in_channels": 1280,
1169
+ "kernel_size": [
1170
+ 3,
1171
+ 3
1172
+ ]
1173
+ },
1174
+ "up_blocks.2.attentions.0.proj_in": {
1175
+ "type": "conv2d",
1176
+ "out_channels": 640,
1177
+ "in_channels": 640,
1178
+ "kernel_size": [
1179
+ 1,
1180
+ 1
1181
+ ]
1182
+ },
1183
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": {
1184
+ "type": "linear",
1185
+ "out_features": 640,
1186
+ "in_features": 640
1187
+ },
1188
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": {
1189
+ "type": "linear",
1190
+ "out_features": 640,
1191
+ "in_features": 640
1192
+ },
1193
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": {
1194
+ "type": "linear",
1195
+ "out_features": 640,
1196
+ "in_features": 640
1197
+ },
1198
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": {
1199
+ "type": "linear",
1200
+ "out_features": 640,
1201
+ "in_features": 640
1202
+ },
1203
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": {
1204
+ "type": "linear",
1205
+ "out_features": 400,
1206
+ "in_features": 640
1207
+ },
1208
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": {
1209
+ "type": "linear",
1210
+ "out_features": 400,
1211
+ "in_features": 768
1212
+ },
1213
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": {
1214
+ "type": "linear",
1215
+ "out_features": 400,
1216
+ "in_features": 768
1217
+ },
1218
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": {
1219
+ "type": "linear",
1220
+ "out_features": 640,
1221
+ "in_features": 400
1222
+ },
1223
+ "up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": {
1224
+ "type": "linear",
1225
+ "out_features": 5120,
1226
+ "in_features": 640
1227
+ },
1228
+ "up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": {
1229
+ "type": "linear",
1230
+ "out_features": 640,
1231
+ "in_features": 2560
1232
+ },
1233
+ "up_blocks.2.attentions.0.proj_out": {
1234
+ "type": "conv2d",
1235
+ "out_channels": 640,
1236
+ "in_channels": 640,
1237
+ "kernel_size": [
1238
+ 1,
1239
+ 1
1240
+ ]
1241
+ },
1242
+ "up_blocks.2.attentions.1.proj_in": {
1243
+ "type": "conv2d",
1244
+ "out_channels": 640,
1245
+ "in_channels": 640,
1246
+ "kernel_size": [
1247
+ 1,
1248
+ 1
1249
+ ]
1250
+ },
1251
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": {
1252
+ "type": "linear",
1253
+ "out_features": 640,
1254
+ "in_features": 640
1255
+ },
1256
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": {
1257
+ "type": "linear",
1258
+ "out_features": 640,
1259
+ "in_features": 640
1260
+ },
1261
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": {
1262
+ "type": "linear",
1263
+ "out_features": 640,
1264
+ "in_features": 640
1265
+ },
1266
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": {
1267
+ "type": "linear",
1268
+ "out_features": 640,
1269
+ "in_features": 640
1270
+ },
1271
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": {
1272
+ "type": "linear",
1273
+ "out_features": 400,
1274
+ "in_features": 640
1275
+ },
1276
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": {
1277
+ "type": "linear",
1278
+ "out_features": 400,
1279
+ "in_features": 768
1280
+ },
1281
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": {
1282
+ "type": "linear",
1283
+ "out_features": 400,
1284
+ "in_features": 768
1285
+ },
1286
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": {
1287
+ "type": "linear",
1288
+ "out_features": 640,
1289
+ "in_features": 400
1290
+ },
1291
+ "up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": {
1292
+ "type": "linear",
1293
+ "out_features": 5120,
1294
+ "in_features": 640
1295
+ },
1296
+ "up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": {
1297
+ "type": "linear",
1298
+ "out_features": 640,
1299
+ "in_features": 2560
1300
+ },
1301
+ "up_blocks.2.attentions.1.proj_out": {
1302
+ "type": "conv2d",
1303
+ "out_channels": 640,
1304
+ "in_channels": 640,
1305
+ "kernel_size": [
1306
+ 1,
1307
+ 1
1308
+ ]
1309
+ },
1310
+ "up_blocks.2.attentions.2.proj_in": {
1311
+ "type": "conv2d",
1312
+ "out_channels": 640,
1313
+ "in_channels": 640,
1314
+ "kernel_size": [
1315
+ 1,
1316
+ 1
1317
+ ]
1318
+ },
1319
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q": {
1320
+ "type": "linear",
1321
+ "out_features": 640,
1322
+ "in_features": 640
1323
+ },
1324
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k": {
1325
+ "type": "linear",
1326
+ "out_features": 640,
1327
+ "in_features": 640
1328
+ },
1329
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v": {
1330
+ "type": "linear",
1331
+ "out_features": 640,
1332
+ "in_features": 640
1333
+ },
1334
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0": {
1335
+ "type": "linear",
1336
+ "out_features": 640,
1337
+ "in_features": 640
1338
+ },
1339
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q": {
1340
+ "type": "linear",
1341
+ "out_features": 400,
1342
+ "in_features": 640
1343
+ },
1344
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k": {
1345
+ "type": "linear",
1346
+ "out_features": 400,
1347
+ "in_features": 768
1348
+ },
1349
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v": {
1350
+ "type": "linear",
1351
+ "out_features": 400,
1352
+ "in_features": 768
1353
+ },
1354
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0": {
1355
+ "type": "linear",
1356
+ "out_features": 640,
1357
+ "in_features": 400
1358
+ },
1359
+ "up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj": {
1360
+ "type": "linear",
1361
+ "out_features": 5120,
1362
+ "in_features": 640
1363
+ },
1364
+ "up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2": {
1365
+ "type": "linear",
1366
+ "out_features": 640,
1367
+ "in_features": 2560
1368
+ },
1369
+ "up_blocks.2.attentions.2.proj_out": {
1370
+ "type": "conv2d",
1371
+ "out_channels": 640,
1372
+ "in_channels": 640,
1373
+ "kernel_size": [
1374
+ 1,
1375
+ 1
1376
+ ]
1377
+ },
1378
+ "up_blocks.2.resnets.0.conv1": {
1379
+ "type": "conv2d",
1380
+ "out_channels": 640,
1381
+ "in_channels": 1920,
1382
+ "kernel_size": [
1383
+ 3,
1384
+ 3
1385
+ ]
1386
+ },
1387
+ "up_blocks.2.resnets.0.time_emb_proj": {
1388
+ "type": "linear",
1389
+ "out_features": 640,
1390
+ "in_features": 1280
1391
+ },
1392
+ "up_blocks.2.resnets.0.conv2": {
1393
+ "type": "conv2d",
1394
+ "out_channels": 640,
1395
+ "in_channels": 640,
1396
+ "kernel_size": [
1397
+ 3,
1398
+ 3
1399
+ ]
1400
+ },
1401
+ "up_blocks.2.resnets.0.conv_shortcut": {
1402
+ "type": "conv2d",
1403
+ "out_channels": 640,
1404
+ "in_channels": 1920,
1405
+ "kernel_size": [
1406
+ 1,
1407
+ 1
1408
+ ]
1409
+ },
1410
+ "up_blocks.2.resnets.1.conv1": {
1411
+ "type": "conv2d",
1412
+ "out_channels": 640,
1413
+ "in_channels": 1280,
1414
+ "kernel_size": [
1415
+ 3,
1416
+ 3
1417
+ ]
1418
+ },
1419
+ "up_blocks.2.resnets.1.time_emb_proj": {
1420
+ "type": "linear",
1421
+ "out_features": 640,
1422
+ "in_features": 1280
1423
+ },
1424
+ "up_blocks.2.resnets.1.conv2": {
1425
+ "type": "conv2d",
1426
+ "out_channels": 640,
1427
+ "in_channels": 640,
1428
+ "kernel_size": [
1429
+ 3,
1430
+ 3
1431
+ ]
1432
+ },
1433
+ "up_blocks.2.resnets.1.conv_shortcut": {
1434
+ "type": "conv2d",
1435
+ "out_channels": 640,
1436
+ "in_channels": 1280,
1437
+ "kernel_size": [
1438
+ 1,
1439
+ 1
1440
+ ]
1441
+ },
1442
+ "up_blocks.2.resnets.2.conv1": {
1443
+ "type": "conv2d",
1444
+ "out_channels": 640,
1445
+ "in_channels": 960,
1446
+ "kernel_size": [
1447
+ 3,
1448
+ 3
1449
+ ]
1450
+ },
1451
+ "up_blocks.2.resnets.2.time_emb_proj": {
1452
+ "type": "linear",
1453
+ "out_features": 640,
1454
+ "in_features": 1280
1455
+ },
1456
+ "up_blocks.2.resnets.2.conv2": {
1457
+ "type": "conv2d",
1458
+ "out_channels": 640,
1459
+ "in_channels": 640,
1460
+ "kernel_size": [
1461
+ 3,
1462
+ 3
1463
+ ]
1464
+ },
1465
+ "up_blocks.2.resnets.2.conv_shortcut": {
1466
+ "type": "conv2d",
1467
+ "out_channels": 640,
1468
+ "in_channels": 960,
1469
+ "kernel_size": [
1470
+ 1,
1471
+ 1
1472
+ ]
1473
+ },
1474
+ "up_blocks.2.upsamplers.0.conv": {
1475
+ "type": "conv2d",
1476
+ "out_channels": 640,
1477
+ "in_channels": 640,
1478
+ "kernel_size": [
1479
+ 3,
1480
+ 3
1481
+ ]
1482
+ },
1483
+ "up_blocks.3.attentions.0.proj_in": {
1484
+ "type": "conv2d",
1485
+ "out_channels": 320,
1486
+ "in_channels": 320,
1487
+ "kernel_size": [
1488
+ 1,
1489
+ 1
1490
+ ]
1491
+ },
1492
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q": {
1493
+ "type": "linear",
1494
+ "out_features": 320,
1495
+ "in_features": 320
1496
+ },
1497
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k": {
1498
+ "type": "linear",
1499
+ "out_features": 320,
1500
+ "in_features": 320
1501
+ },
1502
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v": {
1503
+ "type": "linear",
1504
+ "out_features": 320,
1505
+ "in_features": 320
1506
+ },
1507
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0": {
1508
+ "type": "linear",
1509
+ "out_features": 320,
1510
+ "in_features": 320
1511
+ },
1512
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q": {
1513
+ "type": "linear",
1514
+ "out_features": 200,
1515
+ "in_features": 320
1516
+ },
1517
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k": {
1518
+ "type": "linear",
1519
+ "out_features": 200,
1520
+ "in_features": 768
1521
+ },
1522
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v": {
1523
+ "type": "linear",
1524
+ "out_features": 200,
1525
+ "in_features": 768
1526
+ },
1527
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0": {
1528
+ "type": "linear",
1529
+ "out_features": 320,
1530
+ "in_features": 200
1531
+ },
1532
+ "up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj": {
1533
+ "type": "linear",
1534
+ "out_features": 2560,
1535
+ "in_features": 320
1536
+ },
1537
+ "up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2": {
1538
+ "type": "linear",
1539
+ "out_features": 320,
1540
+ "in_features": 1280
1541
+ },
1542
+ "up_blocks.3.attentions.0.proj_out": {
1543
+ "type": "conv2d",
1544
+ "out_channels": 320,
1545
+ "in_channels": 320,
1546
+ "kernel_size": [
1547
+ 1,
1548
+ 1
1549
+ ]
1550
+ },
1551
+ "up_blocks.3.attentions.1.proj_in": {
1552
+ "type": "conv2d",
1553
+ "out_channels": 320,
1554
+ "in_channels": 320,
1555
+ "kernel_size": [
1556
+ 1,
1557
+ 1
1558
+ ]
1559
+ },
1560
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q": {
1561
+ "type": "linear",
1562
+ "out_features": 320,
1563
+ "in_features": 320
1564
+ },
1565
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k": {
1566
+ "type": "linear",
1567
+ "out_features": 320,
1568
+ "in_features": 320
1569
+ },
1570
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v": {
1571
+ "type": "linear",
1572
+ "out_features": 320,
1573
+ "in_features": 320
1574
+ },
1575
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0": {
1576
+ "type": "linear",
1577
+ "out_features": 320,
1578
+ "in_features": 320
1579
+ },
1580
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q": {
1581
+ "type": "linear",
1582
+ "out_features": 200,
1583
+ "in_features": 320
1584
+ },
1585
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k": {
1586
+ "type": "linear",
1587
+ "out_features": 200,
1588
+ "in_features": 768
1589
+ },
1590
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v": {
1591
+ "type": "linear",
1592
+ "out_features": 200,
1593
+ "in_features": 768
1594
+ },
1595
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0": {
1596
+ "type": "linear",
1597
+ "out_features": 320,
1598
+ "in_features": 200
1599
+ },
1600
+ "up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj": {
1601
+ "type": "linear",
1602
+ "out_features": 2560,
1603
+ "in_features": 320
1604
+ },
1605
+ "up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2": {
1606
+ "type": "linear",
1607
+ "out_features": 320,
1608
+ "in_features": 1280
1609
+ },
1610
+ "up_blocks.3.attentions.1.proj_out": {
1611
+ "type": "conv2d",
1612
+ "out_channels": 320,
1613
+ "in_channels": 320,
1614
+ "kernel_size": [
1615
+ 1,
1616
+ 1
1617
+ ]
1618
+ },
1619
+ "up_blocks.3.attentions.2.proj_in": {
1620
+ "type": "conv2d",
1621
+ "out_channels": 320,
1622
+ "in_channels": 320,
1623
+ "kernel_size": [
1624
+ 1,
1625
+ 1
1626
+ ]
1627
+ },
1628
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q": {
1629
+ "type": "linear",
1630
+ "out_features": 320,
1631
+ "in_features": 320
1632
+ },
1633
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k": {
1634
+ "type": "linear",
1635
+ "out_features": 320,
1636
+ "in_features": 320
1637
+ },
1638
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v": {
1639
+ "type": "linear",
1640
+ "out_features": 320,
1641
+ "in_features": 320
1642
+ },
1643
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0": {
1644
+ "type": "linear",
1645
+ "out_features": 320,
1646
+ "in_features": 320
1647
+ },
1648
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q": {
1649
+ "type": "linear",
1650
+ "out_features": 200,
1651
+ "in_features": 320
1652
+ },
1653
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k": {
1654
+ "type": "linear",
1655
+ "out_features": 200,
1656
+ "in_features": 768
1657
+ },
1658
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v": {
1659
+ "type": "linear",
1660
+ "out_features": 200,
1661
+ "in_features": 768
1662
+ },
1663
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0": {
1664
+ "type": "linear",
1665
+ "out_features": 320,
1666
+ "in_features": 200
1667
+ },
1668
+ "up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj": {
1669
+ "type": "linear",
1670
+ "out_features": 2560,
1671
+ "in_features": 320
1672
+ },
1673
+ "up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2": {
1674
+ "type": "linear",
1675
+ "out_features": 320,
1676
+ "in_features": 1280
1677
+ },
1678
+ "up_blocks.3.attentions.2.proj_out": {
1679
+ "type": "conv2d",
1680
+ "out_channels": 320,
1681
+ "in_channels": 320,
1682
+ "kernel_size": [
1683
+ 1,
1684
+ 1
1685
+ ]
1686
+ },
1687
+ "up_blocks.3.resnets.0.conv1": {
1688
+ "type": "conv2d",
1689
+ "out_channels": 320,
1690
+ "in_channels": 960,
1691
+ "kernel_size": [
1692
+ 3,
1693
+ 3
1694
+ ]
1695
+ },
1696
+ "up_blocks.3.resnets.0.time_emb_proj": {
1697
+ "type": "linear",
1698
+ "out_features": 320,
1699
+ "in_features": 1280
1700
+ },
1701
+ "up_blocks.3.resnets.0.conv2": {
1702
+ "type": "conv2d",
1703
+ "out_channels": 320,
1704
+ "in_channels": 320,
1705
+ "kernel_size": [
1706
+ 3,
1707
+ 3
1708
+ ]
1709
+ },
1710
+ "up_blocks.3.resnets.0.conv_shortcut": {
1711
+ "type": "conv2d",
1712
+ "out_channels": 320,
1713
+ "in_channels": 960,
1714
+ "kernel_size": [
1715
+ 1,
1716
+ 1
1717
+ ]
1718
+ },
1719
+ "up_blocks.3.resnets.1.conv1": {
1720
+ "type": "conv2d",
1721
+ "out_channels": 320,
1722
+ "in_channels": 640,
1723
+ "kernel_size": [
1724
+ 3,
1725
+ 3
1726
+ ]
1727
+ },
1728
+ "up_blocks.3.resnets.1.time_emb_proj": {
1729
+ "type": "linear",
1730
+ "out_features": 320,
1731
+ "in_features": 1280
1732
+ },
1733
+ "up_blocks.3.resnets.1.conv2": {
1734
+ "type": "conv2d",
1735
+ "out_channels": 320,
1736
+ "in_channels": 320,
1737
+ "kernel_size": [
1738
+ 3,
1739
+ 3
1740
+ ]
1741
+ },
1742
+ "up_blocks.3.resnets.1.conv_shortcut": {
1743
+ "type": "conv2d",
1744
+ "out_channels": 320,
1745
+ "in_channels": 640,
1746
+ "kernel_size": [
1747
+ 1,
1748
+ 1
1749
+ ]
1750
+ },
1751
+ "up_blocks.3.resnets.2.conv1": {
1752
+ "type": "conv2d",
1753
+ "out_channels": 320,
1754
+ "in_channels": 640,
1755
+ "kernel_size": [
1756
+ 3,
1757
+ 3
1758
+ ]
1759
+ },
1760
+ "up_blocks.3.resnets.2.time_emb_proj": {
1761
+ "type": "linear",
1762
+ "out_features": 320,
1763
+ "in_features": 1280
1764
+ },
1765
+ "up_blocks.3.resnets.2.conv2": {
1766
+ "type": "conv2d",
1767
+ "out_channels": 320,
1768
+ "in_channels": 320,
1769
+ "kernel_size": [
1770
+ 3,
1771
+ 3
1772
+ ]
1773
+ },
1774
+ "up_blocks.3.resnets.2.conv_shortcut": {
1775
+ "type": "conv2d",
1776
+ "out_channels": 320,
1777
+ "in_channels": 640,
1778
+ "kernel_size": [
1779
+ 1,
1780
+ 1
1781
+ ]
1782
+ },
1783
+ "mid_block.attentions.0.proj_in": {
1784
+ "type": "conv2d",
1785
+ "out_channels": 1280,
1786
+ "in_channels": 1280,
1787
+ "kernel_size": [
1788
+ 1,
1789
+ 1
1790
+ ]
1791
+ },
1792
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_q": {
1793
+ "type": "linear",
1794
+ "out_features": 480,
1795
+ "in_features": 1280
1796
+ },
1797
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_k": {
1798
+ "type": "linear",
1799
+ "out_features": 480,
1800
+ "in_features": 1280
1801
+ },
1802
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_v": {
1803
+ "type": "linear",
1804
+ "out_features": 480,
1805
+ "in_features": 1280
1806
+ },
1807
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": {
1808
+ "type": "linear",
1809
+ "out_features": 1280,
1810
+ "in_features": 480
1811
+ },
1812
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_q": {
1813
+ "type": "linear",
1814
+ "out_features": 480,
1815
+ "in_features": 1280
1816
+ },
1817
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_k": {
1818
+ "type": "linear",
1819
+ "out_features": 480,
1820
+ "in_features": 768
1821
+ },
1822
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_v": {
1823
+ "type": "linear",
1824
+ "out_features": 480,
1825
+ "in_features": 768
1826
+ },
1827
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": {
1828
+ "type": "linear",
1829
+ "out_features": 1280,
1830
+ "in_features": 480
1831
+ },
1832
+ "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": {
1833
+ "type": "linear",
1834
+ "out_features": 1024,
1835
+ "in_features": 1280
1836
+ },
1837
+ "mid_block.attentions.0.transformer_blocks.0.ff.net.2": {
1838
+ "type": "linear",
1839
+ "out_features": 1280,
1840
+ "in_features": 512
1841
+ },
1842
+ "mid_block.attentions.0.proj_out": {
1843
+ "type": "conv2d",
1844
+ "out_channels": 1280,
1845
+ "in_channels": 1280,
1846
+ "kernel_size": [
1847
+ 1,
1848
+ 1
1849
+ ]
1850
+ },
1851
+ "mid_block.resnets.0.conv1": {
1852
+ "type": "conv2d",
1853
+ "out_channels": 608,
1854
+ "in_channels": 1280,
1855
+ "kernel_size": [
1856
+ 3,
1857
+ 3
1858
+ ]
1859
+ },
1860
+ "mid_block.resnets.0.time_emb_proj": {
1861
+ "type": "linear",
1862
+ "out_features": 608,
1863
+ "in_features": 1280
1864
+ },
1865
+ "mid_block.resnets.0.conv2": {
1866
+ "type": "conv2d",
1867
+ "out_channels": 1280,
1868
+ "in_channels": 608,
1869
+ "kernel_size": [
1870
+ 3,
1871
+ 3
1872
+ ]
1873
+ },
1874
+ "mid_block.resnets.1.conv1": {
1875
+ "type": "conv2d",
1876
+ "out_channels": 608,
1877
+ "in_channels": 1280,
1878
+ "kernel_size": [
1879
+ 3,
1880
+ 3
1881
+ ]
1882
+ },
1883
+ "mid_block.resnets.1.time_emb_proj": {
1884
+ "type": "linear",
1885
+ "out_features": 608,
1886
+ "in_features": 1280
1887
+ },
1888
+ "mid_block.resnets.1.conv2": {
1889
+ "type": "conv2d",
1890
+ "out_channels": 1280,
1891
+ "in_channels": 608,
1892
+ "kernel_size": [
1893
+ 3,
1894
+ 3
1895
+ ]
1896
+ },
1897
+ "conv_out": {
1898
+ "type": "conv2d",
1899
+ "out_channels": 4,
1900
+ "in_channels": 320,
1901
+ "kernel_size": [
1902
+ 3,
1903
+ 3
1904
+ ]
1905
+ }
1906
+ },
1907
+ "torch_version": "2.9.1+cu130"
1908
+ }