tencent
/

HunyuanImage-3.0

@@ -1,4 +1,5 @@
 {
     "add_classification_head": false,
     "anyres_pooling_size": 2,
     "anyres_vit_max_image_size": null,
@@ -6,14 +7,14 @@
     "architectures": [
         "HunyuanImage3ForCausalMM"
     ],
     "attention_bias": false,
     "attention_dropout": 0.0,
     "attention_head_dim": 128,
-    "auto_map": {
-        "AutoConfig": "configuration_hunyuan.HunyuanImage3Config",
-        "AutoModel": "hunyuan.HunyuanImage3Model",
-        "AutoModelForCausalLM": "hunyuan.HunyuanImage3ForCausalMM"
-    },
     "bos_token_id": 127958,
     "cla_share_factor": 2,
     "class_num": 0,
@@ -34,7 +35,7 @@
     "intermediate_size": 3072,
     "kv_lora_rank": null,
     "mask_init_id": 12,
-    "max_position_embeddings": 12800,
     "mlp_bias": false,
     "model_type": "hunyuan_image_3_moe",
     "moe_drop_tokens": false,
@@ -194,15 +195,25 @@
     "vit_path": null,
     "vit_remove_prenorm": false,
     "vit_token": 64,
-    "vit_type": null,
     "vit_used_rms_norm": false,
     "vocab_size": 133120,
     "xdrope_section": null,
     "head_dim": 128,
     "vae_downsample_factor": [
         16,
         16
     ],
     "vae": {
         "_class_name": "AutoencoderKLConv3D",
         "block_out_channels": [
@@ -247,15 +258,15 @@
         "do_rescale": true,
         "do_resize": true,
         "image_mean": [
-        0.5,
-        0.5,
-        0.5
         ],
         "image_processor_type": "Siglip2ImageProcessorFast",
         "image_std": [
-        0.5,
-        0.5,
-        0.5
         ],
         "max_num_patches": 1024,
         "patch_size": 16,
@@ -270,4 +281,4 @@
         "depth": 2,
         "torch_dtype": "float32"
     }
-}

 {
+    "model_version": "HunyuanImage-3.0",
     "add_classification_head": false,
     "anyres_pooling_size": 2,
     "anyres_vit_max_image_size": null,
     "architectures": [
         "HunyuanImage3ForCausalMM"
     ],
+    "auto_map": {
+        "AutoConfig": "configuration_hunyuan_image_3.HunyuanImage3Config",
+        "AutoModel": "modeling_hunyuan_image_3.HunyuanImage3Model",
+        "AutoModelForCausalLM": "modeling_hunyuan_image_3.HunyuanImage3ForCausalMM"
+    },
     "attention_bias": false,
     "attention_dropout": 0.0,
     "attention_head_dim": 128,
     "bos_token_id": 127958,
     "cla_share_factor": 2,
     "class_num": 0,
     "intermediate_size": 3072,
     "kv_lora_rank": null,
     "mask_init_id": 12,
+    "max_position_embeddings": 22800,
     "mlp_bias": false,
     "model_type": "hunyuan_image_3_moe",
     "moe_drop_tokens": false,
     "vit_path": null,
     "vit_remove_prenorm": false,
     "vit_token": 64,
+    "vit_type": "siglip2-so400m-patch16-naflex",
     "vit_used_rms_norm": false,
     "vocab_size": 133120,
     "xdrope_section": null,
     "head_dim": 128,
+    "rope_type": "2d",
     "vae_downsample_factor": [
         16,
         16
     ],
+    "vit_downsample_factor": [
+        16,
+        16
+    ],
+    "cond_token_attn_type": "joint_full",
+    "cond_image_type": "vae_vit",
+    "vae_type": "hunyuan-image-vae-v1",
+    "vae_dtype": "float32",
+    "vae_autocast_dtype": "float16",
     "vae": {
         "_class_name": "AutoencoderKLConv3D",
         "block_out_channels": [
         "do_rescale": true,
         "do_resize": true,
         "image_mean": [
+            0.5,
+            0.5,
+            0.5
         ],
         "image_processor_type": "Siglip2ImageProcessorFast",
         "image_std": [
+            0.5,
+            0.5,
+            0.5
         ],
         "max_num_patches": 1024,
         "patch_size": 16,
         "depth": 2,
         "torch_dtype": "float32"
     }
+}