| { | |
| "adain": true, | |
| "bottleneck_type": "rq", | |
| "checkpointing": false, | |
| "code_shape": [ | |
| 32, | |
| 32, | |
| 1 | |
| ], | |
| "connect_list": [ | |
| "32", | |
| "64", | |
| "128", | |
| "256" | |
| ], | |
| "ddconfig": { | |
| "attn_resolutions": [ | |
| 32, | |
| 64, | |
| 128 | |
| ], | |
| "ch": 64, | |
| "ch_mult": [ | |
| 1, | |
| 2, | |
| 4, | |
| 4, | |
| 8 | |
| ], | |
| "depths": [ | |
| 2, | |
| 2, | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "double_z": false, | |
| "dropout": 0.0, | |
| "in_channels": 3, | |
| "num_frames": 3, | |
| "num_head": 8, | |
| "num_heads": [ | |
| 8, | |
| 8, | |
| 8, | |
| 8, | |
| 8 | |
| ], | |
| "num_res_blocks": 1, | |
| "out_ch": 3, | |
| "resolution": 512, | |
| "stages_atten": 4, | |
| "window_size": [ | |
| 5, | |
| 5, | |
| 5 | |
| ], | |
| "window_sizes": [ | |
| [ | |
| 4, | |
| 4 | |
| ], | |
| [ | |
| 4, | |
| 4 | |
| ], | |
| [ | |
| 4, | |
| 4 | |
| ], | |
| [ | |
| 4, | |
| 4 | |
| ], | |
| [ | |
| 4, | |
| 4 | |
| ] | |
| ], | |
| "z_channels": 256 | |
| }, | |
| "decay": 0.99, | |
| "detach_16": true, | |
| "dim_embd": 512, | |
| "droprate": 0.0, | |
| "embed_dim": 512, | |
| "fix_modules": [ | |
| "quantizer", | |
| "decoder", | |
| "conditionnet" | |
| ], | |
| "latent_loss_weight": 0.25, | |
| "latent_shape": [ | |
| 32, | |
| 32, | |
| 512 | |
| ], | |
| "loss_type": "mse", | |
| "n_embed": 1024, | |
| "n_head": 8, | |
| "n_layers": 9, | |
| "restart_unused_codes": true, | |
| "shared_codebook": true, | |
| "tf": 3, | |
| "type": "PGTFormer", | |
| "w": 1 | |
| } |