{ "action_dim": 32, "action_head_cfg": { "_convert_": "object", "_target_": "groot.vla.model.dreamzero.action_head.wan_flow_matching_action_tf.WANPolicyHead", "config": { "_recursive_": false, "_target_": "groot.vla.model.dreamzero.action_head.wan_flow_matching_action_tf.WANPolicyHeadConfig", "action_dim": 32, "action_horizon": 24, "action_loss_embodiment_ids": [ 26, 17, 32 ], "add_pos_embed": true, "backbone_embedding_dim": 0, "backbone_features_projector_cfg": null, "decouple_video_action_noise": false, "defer_lora_injection": true, "diffusion_model_cfg": { "_convert_": "object", "_target_": "groot.vla.model.dreamzero.modules.wan_video_dit_action_casual_chunk.CausalWanModel", "diffusion_model_pretrained_path": "/mnt/nas3/yunze/research/wam/dreamzero/checkpoints/Wan2.1-I2V-14B-480P", "dim": 5120, "eps": 1e-06, "ffn_dim": 13824, "frame_seqlen": 880, "freq_dim": 256, "in_dim": 36, "max_chunk_size": 4, "model_type": "i2v", "num_action_per_block": 24, "num_frame_per_block": 2, "num_heads": 40, "num_layers": 40, "num_state_per_block": 1, "out_dim": 16 }, "expand_batch": null, "freeze_decode_layer": false, "hidden_size": 64, "image_encoder_cfg": { "_convert_": "object", "_target_": "groot.vla.model.dreamzero.modules.wan_video_image_encoder.WanImageEncoder", "image_encoder_pretrained_path": "/mnt/nas3/yunze/research/wam/dreamzero/checkpoints/Wan2.1-I2V-14B-480P/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth" }, "init_lora_weights": "kaiming", "input_embedding_dim": 1536, "load_pretrained_det_decode_layer_path": null, "lora_alpha": 16, "lora_rank": 16, "lora_target_modules": "q,k,v,o,ffn.0,ffn.2", "max_action_dim": 32, "max_state_dim": 64, "model_dtype": "float32", "noise_beta_alpha": 1.5, "noise_beta_beta": 1.0, "noise_s": 0.999, "num_frame_per_block": 2, "num_frames": 33, "num_inference_timesteps": 4, "num_timestep_buckets": 1000, "repa_coeff": 1.0, "repa_layer": 8, "skip_component_loading": true, "text_encoder_cfg": { "_convert_": "object", "_target_": "groot.vla.model.dreamzero.modules.wan_video_text_encoder.WanTextEncoder", "text_encoder_pretrained_path": "/mnt/nas3/yunze/research/wam/dreamzero/checkpoints/Wan2.1-I2V-14B-480P/models_t5_umt5-xxl-enc-bf16.pth" }, "tile_size_height": 34, "tile_size_width": 34, "tile_stride_height": 18, "tile_stride_width": 16, "tiled": false, "train_architecture": "lora", "tune_diffusion_model": true, "tune_projector": true, "use_gradient_checkpointing": true, "use_vlln": true, "vae_cfg": { "_convert_": "object", "_target_": "groot.vla.model.dreamzero.modules.wan_video_vae.WanVideoVAE", "vae_pretrained_path": "/mnt/nas3/yunze/research/wam/dreamzero/checkpoints/Wan2.1-I2V-14B-480P/Wan2.1_VAE.pth" }, "video_noise_beta_alpha": 3.0, "video_noise_beta_beta": 1.0, "vl_self_attention_cfg": { "_target_": "groot.vla.model.n1_5.modules.cross_attention_dit.SelfAttentionTransformer", "attention_head_dim": 64, "dropout": 0.2, "final_dropout": true, "num_attention_heads": 24, "num_layers": 4, "positional_embeddings": null } } }, "action_horizon": 24, "architectures": [ "VLA" ], "backbone_cfg": { "_target_": "groot.vla.model.dreamzero.backbone.identity.IdentityBackbone" }, "hidden_size": 0, "model_dtype": "float32", "model_type": "vla", "resume_path": "/mnt/nas3/yunze/research/wam/robodojo/XPolicyLab/policy/DreamZero/checkpoints/RoboDojo-cotrain-arx_x5-3500-joint-42", "torch_dtype": "bfloat16", "transformers_version": "4.51.3" }