Slot_Q-Former / config.json
zheedong's picture
Slot Q-Former init
34ddb03 verified
{
"image_size": 256,
"slot_num": 32,
"codebook_embed_dim": 32,
"n_embed": 8192,
"num_quantizers": 4,
"blocks_layers": 4,
"blocks_image_layers": 4,
"use_blocks_image": true,
"bypass_codebook": false,
"use_causal": true,
"use_slot": true,
"slot_config": {
"T": 1,
"num_iterations": 3,
"use_half_slot": false
},
"checkpoint_path": {
"model_path": null
},
"stage1": {
"dino_model_name": "dinov2_vitl14",
"unfreeze_unet": true,
"unfreeze_resnet": false,
"image_size": 256,
"loss_weight": {
"loss_itc": 0.5,
"loss_lm": 0.5,
"loss_diffusion": 1,
"loss_mse": 0.5
},
"use_causal": true,
"use_slot": true,
"slot_config": {
"T": 1,
"num_iterations": 3,
"use_half_slot": false
}
},
"stage2": {
"loss_weight": {
"loss_codebook": 1,
"loss_recon": 1,
"loss_diffusion": 0.1,
"loss_mse": 0.1
},
"unfreeze_unet": false,
"unfreeze_linear": false,
"blocks_layers": 4,
"blocks_image_layers": 4,
"use_blocks_image": true,
"unclip": false,
"vq": {
"vq_type": "residual_vq",
"num_quantizers": 4,
"codebook_embed_dim": 32,
"n_embed": 8192
},
"bypass_codebook": false
}
}