| { | |
| "_class_name": "Encoder", | |
| "_diffusers_version": "0.36.0", | |
| "convolution_dimensions": 3, | |
| "encoder_blocks": [ | |
| [ | |
| "res_x", | |
| { | |
| "num_layers": 4 | |
| } | |
| ], | |
| [ | |
| "compress_space_res", | |
| { | |
| "multiplier": 2 | |
| } | |
| ], | |
| [ | |
| "res_x", | |
| { | |
| "num_layers": 6 | |
| } | |
| ], | |
| [ | |
| "compress_time_res", | |
| { | |
| "multiplier": 2 | |
| } | |
| ], | |
| [ | |
| "res_x", | |
| { | |
| "num_layers": 6 | |
| } | |
| ], | |
| [ | |
| "compress_all_res", | |
| { | |
| "multiplier": 2 | |
| } | |
| ], | |
| [ | |
| "res_x", | |
| { | |
| "num_layers": 2 | |
| } | |
| ], | |
| [ | |
| "compress_all_res", | |
| { | |
| "multiplier": 2 | |
| } | |
| ], | |
| [ | |
| "res_x", | |
| { | |
| "num_layers": 2 | |
| } | |
| ] | |
| ], | |
| "encoder_spatial_padding_mode": "zeros", | |
| "in_channels": 3, | |
| "latent_log_var": "uniform", | |
| "norm_layer": "pixel_norm", | |
| "out_channels": 128, | |
| "patch_size": 4 | |
| } | |