Diffusers
PyTorch
custom_code
dualvitok / config.json
huangrh9's picture
Upload folder using huggingface_hub
5ca5652 verified
{
"auto_map": {
"AutoConfig": "configuration_dualvitok.DualViTokConfig",
"AutoModel": "modeling_dualvitok.DualViTok"
},
"architectures": [
"DualViTok"
],
"semantic_encoder": {
"pretrained_semantic_encoder": "Emova-ollm/qwen2vit600m",
"z_channels": 32,
"num_blocks": 4,
"out_layer": "linear",
"embed_dim": 1280,
"target_mlp": "norm"
},
"semantic_decoder": {
"z_channels": 32,
"num_blocks": 4,
"embed_dim": 1280,
"out_layer": "linear_norm",
"out_channels": 3584
},
"semantic_quantizer_type": "simvq",
"pixel_quantizer_type": "simvq",
"semantic_quantizer_codebook_size": 32768,
"pixel_quantizer_codebook_size": 98304,
"attn_implementation": "eager",
"pixel_encoder": {
"codebook_size": 98304,
"embed_dim": 32,
"z_channels": 32,
"double_z": false,
"in_channels": 3,
"out_channels": 3,
"ch": 128,
"ch_mult": [
1,
1,
2,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [
4
],
"dropout": 0.0,
"use_dc_up_down_blocks": true
},
"pixel_decoder": {
"codebook_size": 98304,
"embed_dim": 64,
"z_channels": 64,
"double_z": false,
"in_channels": 3,
"out_channels": 3,
"ch": 384,
"ch_mult": [
1,
1,
2,
2,
4
],
"num_res_blocks": 2,
"attn_resolutions": [
4
],
"dropout": 0.0,
"use_dc_up_down_blocks": true
},
"torch_dtype": "float16",
"transformers_version": "4.44.2"
}