| embed_dim: 8 |
| lossconfig: |
| type: LPIPS |
| params: |
| kl_weight: 1.0e-06 |
| perceptual_weight: 1.0 |
| ddconfig: |
| double_z: true |
| z_channels: 8 |
| t_frames: 17 |
| in_channels: 3 |
| out_ch: 3 |
| ch: 128 |
| ch_mult: |
| - 1 |
| - 2 |
| - 4 |
| - 4 |
| spatial_stride: |
| - 2 |
| - 2 |
| - 2 |
| - 1 |
| temporal_stride: |
| - 2 |
| - 2 |
| - 1 |
| - 1 |
| num_res_blocks: 4 |
| attn_resolutions: [] |
| dropout: 0.0 |
| attn_type: causual_spatial_temporal |
| revise_norm_bug: True |
|
|
| fps_ds: 8 |
| resolution_video: 256 |
| resolution_image: 256 |
| model_name: CausualVAEVideo |
| precision: bf16 |
| keep_aspect_ratio: 'False' |
|
|
|
|
| scaling_factor_video: 0.22970 |
|
|
| scaling_factor_image: 0.26691 |
|
|