| { | |
| "codebook_embed_dim": 32, | |
| "codebook_l2_norm": true, | |
| "codebook_show_usage": true, | |
| "codebook_size": 8192, | |
| "commit_loss_beta": 0.25, | |
| "dec_drop_path_rate": 0.0, | |
| "dec_patch_size": 16, | |
| "dec_pretrained": false, | |
| "dec_tuning_method": "full", | |
| "dec_type": "vit", | |
| "decoder_ch_mult": [ | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 4 | |
| ], | |
| "decoder_model": "vit_base_patch14_dinov2.lvd142m", | |
| "dropout_p": 0.0, | |
| "enc_drop_path_rate": 0.0, | |
| "enc_patch_size": 16, | |
| "enc_pretrained": true, | |
| "enc_tuning_method": "full", | |
| "enc_type": "vit", | |
| "encoder_ch_mult": [ | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 4 | |
| ], | |
| "encoder_model": "vit_base_patch14_dinov2.lvd142m", | |
| "entropy_loss_ratio": 0.0, | |
| "image_size": 256, | |
| "kl_loss_weight": 1e-06, | |
| "num_codebooks": 4, | |
| "num_latent_tokens": 64, | |
| "repa": false, | |
| "repa_align": "global", | |
| "repa_layer_indices": [ | |
| 8 | |
| ], | |
| "repa_loss_weight": 0.1, | |
| "repa_model": "vit_base_patch16_224", | |
| "repa_patch_size": 16, | |
| "repa_proj_dim": 2048, | |
| "tau": 0.07, | |
| "vq_loss_ratio": 1.0, | |
| "vq_mean": -0.003192751, | |
| "vq_std": 0.07235257, | |
| "z_channels": 256 | |
| } |