| { | |
| "_class_name": "CVVAEModel", | |
| "_diffusers_version": "0.26.3", | |
| "attn_resolutions": [], | |
| "causal_decoder": false, | |
| "causal_encoder": true, | |
| "ch": 128, | |
| "ch_mult": [ | |
| 1, | |
| 2, | |
| 4, | |
| 4 | |
| ], | |
| "decoder_attn_type": "spatial-temporal-xformer", | |
| "double_z": true, | |
| "dropout": 0.0, | |
| "en_de_n_frames_a_time": 16, | |
| "encoder_attn_type": "vanilla-xformers", | |
| "half_3d": true, | |
| "in_channels": 3, | |
| "num_res_blocks": 2, | |
| "num_video_frames": null, | |
| "out_ch": 3, | |
| "scaling_factor": 0.18215, | |
| "force_upcast": true, | |
| "reshape_x_dim_to_4": false, | |
| "reshape_z_dim_to_4": false, | |
| "spatial_n_compress": 8, | |
| "tile_overlap_ratio": 0.2222, | |
| "tile_spatial_size": 576, | |
| "time_n_compress": 4, | |
| "use_3d_conv": true, | |
| "z_channels": 4 | |
| } | |