Vizuara
/

dreamzero-so101-lora

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "_convert_": "object",
+    "_target_": "groot.vla.model.dreamzero.action_head.wan_flow_matching_action_tf.WANPolicyHead",
+    "config": {
+      "_recursive_": false,
+      "_target_": "groot.vla.model.dreamzero.action_head.wan_flow_matching_action_tf.WANPolicyHeadConfig",
+      "action_dim": 32,
+      "action_horizon": 24,
+      "action_loss_embodiment_ids": [
+        26,
+        17,
+        32
+      ],
+      "add_pos_embed": true,
+      "backbone_embedding_dim": 0,
+      "backbone_features_projector_cfg": null,
+      "decouple_video_action_noise": false,
+      "diffusion_model_cfg": {
+        "_convert_": "object",
+        "_target_": "groot.vla.model.dreamzero.modules.wan_video_dit_action_casual_chunk.CausalWanModel",
+        "diffusion_model_pretrained_path": "/workspace/checkpoints/Wan2.1-I2V-14B-480P",
+        "dim": 5120,
+        "eps": 1e-06,
+        "ffn_dim": 13824,
+        "frame_seqlen": 880,
+        "freq_dim": 256,
+        "in_dim": 36,
+        "max_chunk_size": 4,
+        "model_type": "i2v",
+        "num_action_per_block": 24,
+        "num_frame_per_block": 2,
+        "num_heads": 40,
+        "num_layers": 40,
+        "num_state_per_block": 1,
+        "out_dim": 16
+      },
+      "expand_batch": null,
+      "freeze_decode_layer": false,
+      "hidden_size": 64,
+      "image_encoder_cfg": {
+        "_convert_": "object",
+        "_target_": "groot.vla.model.dreamzero.modules.wan_video_image_encoder.WanImageEncoder",
+        "image_encoder_pretrained_path": "/workspace/checkpoints/Wan2.1-I2V-14B-480P/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
+      },
+      "init_lora_weights": "kaiming",
+      "input_embedding_dim": 1536,
+      "load_pretrained_det_decode_layer_path": null,
+      "lora_alpha": 4,
+      "lora_rank": 4,
+      "lora_target_modules": "q,k,v,o,ffn.0,ffn.2",
+      "max_action_dim": 32,
+      "max_state_dim": 64,
+      "model_dtype": "float32",
+      "noise_beta_alpha": 1.5,
+      "noise_beta_beta": 1.0,
+      "noise_s": 0.999,
+      "num_frame_per_block": 2,
+      "num_frames": 33,
+      "num_inference_timesteps": 4,
+      "num_timestep_buckets": 1000,
+      "repa_coeff": 1.0,
+      "repa_layer": 8,
+      "text_encoder_cfg": {
+        "_convert_": "object",
+        "_target_": "groot.vla.model.dreamzero.modules.wan_video_text_encoder.WanTextEncoder",
+        "text_encoder_pretrained_path": "/workspace/checkpoints/Wan2.1-I2V-14B-480P/models_t5_umt5-xxl-enc-bf16.pth"
+      },
+      "tile_size_height": 34,
+      "tile_size_width": 34,
+      "tile_stride_height": 18,
+      "tile_stride_width": 16,
+      "tiled": false,
+      "train_architecture": "lora",
+      "tune_diffusion_model": true,
+      "tune_projector": true,
+      "use_gradient_checkpointing": true,
+      "use_vlln": true,
+      "vae_cfg": {
+        "_convert_": "object",
+        "_target_": "groot.vla.model.dreamzero.modules.wan_video_vae.WanVideoVAE",
+        "vae_pretrained_path": "/workspace/checkpoints/Wan2.1-I2V-14B-480P/Wan2.1_VAE.pth"
+      },
+      "video_noise_beta_alpha": 3.0,
+      "video_noise_beta_beta": 1.0,
+      "vl_self_attention_cfg": {
+        "_target_": "groot.vla.model.n1_5.modules.cross_attention_dit.SelfAttentionTransformer",
+        "attention_head_dim": 64,
+        "dropout": 0.2,
+        "final_dropout": true,
+        "num_attention_heads": 24,
+        "num_layers": 4,
+        "positional_embeddings": null
+      }
+    }
+  },
+  "action_horizon": 24,
+  "architectures": [
+    "VLA"
+  ],
+  "backbone_cfg": {
+    "_target_": "groot.vla.model.dreamzero.backbone.identity.IdentityBackbone"
+  },
+  "hidden_size": 0,
+  "model_dtype": "float32",
+  "model_type": "vla",
+  "resume_path": "/workspace/checkpoints/dreamzero-so101-lora",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}