| { | |
| "decoder.conv_in.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.conv_in.weight": { | |
| "shape": [ | |
| 512, | |
| 16, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.conv_out.bias": { | |
| "shape": [ | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.conv_out.weight": { | |
| "shape": [ | |
| 3, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.k.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.k.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.norm.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.norm.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.proj_out.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.proj_out.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.q.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.q.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.v.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.attn_1.v.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_1.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.mid.block_2.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.norm_out.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.norm_out.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.conv1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.conv1.weight": { | |
| "shape": [ | |
| 128, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.conv2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.conv2.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.nin_shortcut.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.nin_shortcut.weight": { | |
| "shape": [ | |
| 128, | |
| 256, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.norm1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.norm1.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.norm2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.0.norm2.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.conv1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.conv1.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.conv2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.conv2.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.norm1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.norm1.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.norm2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.1.norm2.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.conv1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.conv1.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.conv2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.conv2.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.norm1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.norm1.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.norm2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.0.block.2.norm2.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.conv1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.conv1.weight": { | |
| "shape": [ | |
| 256, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.conv2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.conv2.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.nin_shortcut.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.nin_shortcut.weight": { | |
| "shape": [ | |
| 256, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.norm2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.0.norm2.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.conv1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.conv1.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.conv2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.conv2.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.norm1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.norm1.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.norm2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.1.norm2.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.conv1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.conv1.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.conv2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.conv2.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.norm1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.norm1.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.norm2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.block.2.norm2.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.upsample.conv.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.1.upsample.conv.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.0.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.1.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.block.2.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.upsample.conv.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.2.upsample.conv.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.0.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.1.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.block.2.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.upsample.conv.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "decoder.up.3.upsample.conv.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.conv_in.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.conv_in.weight": { | |
| "shape": [ | |
| 128, | |
| 3, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.conv_out.bias": { | |
| "shape": [ | |
| 32 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.conv_out.weight": { | |
| "shape": [ | |
| 32, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.conv1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.conv1.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.conv2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.conv2.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.norm1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.norm1.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.norm2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.0.norm2.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.conv1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.conv1.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.conv2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.conv2.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.norm1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.norm1.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.norm2.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.block.1.norm2.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.downsample.conv.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.0.downsample.conv.weight": { | |
| "shape": [ | |
| 128, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.conv1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.conv1.weight": { | |
| "shape": [ | |
| 256, | |
| 128, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.conv2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.conv2.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.nin_shortcut.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.nin_shortcut.weight": { | |
| "shape": [ | |
| 256, | |
| 128, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.norm1.bias": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.norm1.weight": { | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.norm2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.0.norm2.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.conv1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.conv1.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.conv2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.conv2.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.norm1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.norm1.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.norm2.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.block.1.norm2.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.downsample.conv.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.1.downsample.conv.weight": { | |
| "shape": [ | |
| 256, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 256, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.nin_shortcut.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.nin_shortcut.weight": { | |
| "shape": [ | |
| 512, | |
| 256, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.norm1.bias": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.norm1.weight": { | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.0.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.block.1.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.downsample.conv.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.2.downsample.conv.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.0.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.down.3.block.1.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.k.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.k.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.norm.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.norm.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.proj_out.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.proj_out.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.q.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.q.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.v.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.attn_1.v.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 1, | |
| 1 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_1.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.conv1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.conv1.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.conv2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.conv2.weight": { | |
| "shape": [ | |
| 512, | |
| 512, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.norm1.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.norm1.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.norm2.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.mid.block_2.norm2.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.norm_out.bias": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| "encoder.norm_out.weight": { | |
| "shape": [ | |
| 512 | |
| ], | |
| "dtype": "torch.float32" | |
| } | |
| } |