| { | |
| "image_size": 256, | |
| "slot_num": 32, | |
| "codebook_embed_dim": 32, | |
| "n_embed": 8192, | |
| "num_quantizers": 4, | |
| "blocks_layers": 4, | |
| "blocks_image_layers": 4, | |
| "use_blocks_image": true, | |
| "bypass_codebook": false, | |
| "use_causal": true, | |
| "use_slot": true, | |
| "slot_config": { | |
| "T": 1, | |
| "num_iterations": 3, | |
| "use_half_slot": false | |
| }, | |
| "checkpoint_path": { | |
| "model_path": null | |
| }, | |
| "stage1": { | |
| "dino_model_name": "dinov2_vitl14", | |
| "unfreeze_unet": true, | |
| "unfreeze_resnet": false, | |
| "image_size": 256, | |
| "loss_weight": { | |
| "loss_itc": 0.5, | |
| "loss_lm": 0.5, | |
| "loss_diffusion": 1, | |
| "loss_mse": 0.5 | |
| }, | |
| "use_causal": true, | |
| "use_slot": true, | |
| "slot_config": { | |
| "T": 1, | |
| "num_iterations": 3, | |
| "use_half_slot": false | |
| } | |
| }, | |
| "stage2": { | |
| "loss_weight": { | |
| "loss_codebook": 1, | |
| "loss_recon": 1, | |
| "loss_diffusion": 0.1, | |
| "loss_mse": 0.1 | |
| }, | |
| "unfreeze_unet": false, | |
| "unfreeze_linear": false, | |
| "blocks_layers": 4, | |
| "blocks_image_layers": 4, | |
| "use_blocks_image": true, | |
| "unclip": false, | |
| "vq": { | |
| "vq_type": "residual_vq", | |
| "num_quantizers": 4, | |
| "codebook_embed_dim": 32, | |
| "n_embed": 8192 | |
| }, | |
| "bypass_codebook": false | |
| } | |
| } |