Add dreamzero_libero_10_full_finetune_bs64 checkpoints

Browse files

Files changed (5) hide show

dreamzero_libero_10_full_finetune_bs64/checkpoints/step-019032-epoch-12-loss=0.0578.safetensors +3 -0
dreamzero_libero_10_full_finetune_bs64/config.json +280 -0
dreamzero_libero_10_full_finetune_bs64/config.yaml +209 -0
dreamzero_libero_10_full_finetune_bs64/dataset_statistics.json +104 -0
dreamzero_libero_10_full_finetune_bs64/dreamzero_libero_10_full_finetune_2026_04_19_03_59_36.jsonl +0 -0

dreamzero_libero_10_full_finetune_bs64/checkpoints/step-019032-epoch-12-loss=0.0578.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8062b3ff6d4d47a85d1d0aa9c62b58690de64357861a7416900517ae4b30d7f4
+size 91696959464

dreamzero_libero_10_full_finetune_bs64/config.json ADDED Viewed

	@@ -0,0 +1,280 @@

+{
+  "_ckpt_root": "./checkpoints",
+  "_frame_window_size": 9,
+  "_tokenizer": "./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl",
+  "eval": {
+    "dataset": {
+      "transforms": [
+        {
+          "img_keys": [
+            "agentview_image",
+            "robot0_eye_in_hand_image"
+          ],
+          "type": "ProcessLiberoEvalInputs"
+        },
+        {
+          "image_resize_strategy": "resize-naive",
+          "input_sizes": [
+            [
+              3,
+              128,
+              128
+            ],
+            [
+              3,
+              128,
+              128
+            ]
+          ],
+          "means": [
+            [
+              127.5,
+              127.5,
+              127.5
+            ],
+            [
+              127.5,
+              127.5,
+              127.5
+            ]
+          ],
+          "stds": [
+            [
+              127.5,
+              127.5,
+              127.5
+            ],
+            [
+              127.5,
+              127.5,
+              127.5
+            ]
+          ],
+          "type": "TransformImage"
+        },
+        {
+          "gripper_key": "robot0_gripper_qpos",
+          "norm_type": "mean_std",
+          "out_key": "states",
+          "pos_key": "robot0_eef_pos",
+          "quat_key": "robot0_eef_quat",
+          "state_dim": 32,
+          "type": "LiberoProprioFromInputs"
+        },
+        {
+          "max_len": 512,
+          "tokenizer": {
+            "model_path": "./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl",
+            "type": "PretrainedTokenizer"
+          },
+          "type": "LiberoPromptFromInputs",
+          "use_conversation": false
+        },
+        {
+          "frame_window_size": 1,
+          "num_views": 2,
+          "type": "PrepareVideo"
+        }
+      ],
+      "type": "LiberoParquetEvalDataset"
+    },
+    "denormalize_action": {
+      "action_dim": 7,
+      "norm_type": "mean_std",
+      "type": "DenormalizeLiberoAction"
+    },
+    "enable_mixed_precision_training": true,
+    "eval_chunk_size": 10,
+    "mixed_precision_dtype": "bf16",
+    "model_family": "dreamzero",
+    "num_steps_wait": 10,
+    "num_trials_per_task": 50,
+    "resize_size": 128,
+    "seed": 7,
+    "task_suite_name": "libero_10",
+    "type": "LiberoEvalRunner"
+  },
+  "model": {
+    "frame_window_size": 9,
+    "name_mapping": {
+      "vla_head.model": "action_head.model",
+      "vlm_backbone.image_encoder": "action_head.image_encoder",
+      "vlm_backbone.text_encoder": "action_head.text_encoder",
+      "vlm_backbone.vae": "action_head.vae"
+    },
+    "num_views": 2,
+    "pretrained_name_or_path": "./checkpoints/DreamZero-AgiBot",
+    "type": "DreamZeroVLA",
+    "vla_head": {
+      "action_dim": 7,
+      "action_horizon": 10,
+      "dit_dim": 5120,
+      "dit_ffn_dim": 13824,
+      "dit_freq_dim": 256,
+      "dit_in_dim": 36,
+      "dit_num_heads": 40,
+      "dit_num_layers": 40,
+      "dit_out_dim": 16,
+      "frame_seqlen": 128,
+      "hidden_size": 1024,
+      "input_embedding_dim": 1536,
+      "max_action_dim": 32,
+      "max_num_embodiments": 32,
+      "max_state_dim": 64,
+      "noise_beta_alpha": 1.5,
+      "noise_beta_beta": 1.0,
+      "noise_s": 0.999,
+      "num_action_per_block": 10,
+      "num_frame_per_block": 2,
+      "num_frames": 9,
+      "num_inference_steps": 16,
+      "num_state_per_block": 1,
+      "type": "DreamZeroHead",
+      "use_gradient_checkpointing": true
+    },
+    "vlm_backbone": {
+      "image_encoder_path": null,
+      "text_encoder_path": null,
+      "tiled": false,
+      "type": "WanBackbone",
+      "vae_path": null
+    }
+  },
+  "per_device_num_workers": 4,
+  "runner": {
+    "change_key_name": false,
+    "collator": {
+      "keys": [
+        "states",
+        "images",
+        "img_masks",
+        "actions",
+        "action_masks",
+        "embodiment_ids",
+        "frame_masks",
+        "lang_tokens",
+        "lang_masks"
+      ],
+      "meta_keys": [
+        "task_description",
+        "prompt",
+        "info",
+        "stats",
+        "timestamp"
+      ],
+      "type": "DictCollator"
+    },
+    "enable_gradient_checkpointing": true,
+    "enable_mixed_precision_training": true,
+    "learning_rate": 1e-05,
+    "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_epochs": 12,
+    "max_grad_norm": 1.0,
+    "metric": {
+      "active_trackers": [
+        "jsonl",
+        "wandb"
+      ],
+      "grad_accumulation_steps": 1,
+      "run_dir": "work_dirs",
+      "type": "VLAMetric",
+      "window_size": 1
+    },
+    "mixed_precision_dtype": "bf16",
+    "sampler": null,
+    "sharding_strategy": "full-shard",
+    "type": "FSDPTrainRunner",
+    "warmup_ratio": 0.05,
+    "weight_decay": 1e-05
+  },
+  "train_dataloader": {
+    "dataset": {
+      "datasets": {
+        "action_key": "action",
+        "action_window_size": 10,
+        "data_root_path": "./datasets/libero_10_no_noops_lerobotv2.1",
+        "frame_window_size": 9,
+        "statistic_name": "libero_10_no_noops",
+        "transforms": [
+          {
+            "embodiment_id": 0,
+            "name_mappings": {
+              "actions": [
+                "actions"
+              ],
+              "observation.state": [
+                "states"
+              ]
+            },
+            "parquet_keys": [
+              "observation.state",
+              "timestamp",
+              "actions",
+              "info",
+              "stats",
+              "action_masks"
+            ],
+            "type": "ProcessParquetInputs",
+            "video_keys": [
+              "observation.images.image",
+              "observation.images.wrist_image"
+            ]
+          },
+          {
+            "type": "ParquetPrompter",
+            "use_conversation": false
+          },
+          {
+            "max_len": 512,
+            "tokenizer": {
+              "model_path": "./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl",
+              "type": "PretrainedTokenizer"
+            },
+            "type": "ProcessPrompts"
+          },
+          {
+            "height": 128,
+            "type": "ResizeImages",
+            "width": 128
+          },
+          {
+            "type": "SimpleNormalizeImages"
+          },
+          {
+            "action_dim": 32,
+            "action_key": "action",
+            "norm_type": "mean_std",
+            "state_dim": 32,
+            "state_key": "proprio",
+            "type": "NormalizeStatesAndActions"
+          },
+          {
+            "frame_window_size": 9,
+            "num_views": 2,
+            "type": "PrepareVideo"
+          }
+        ],
+        "type": "ParquetDataset",
+        "use_delta": false,
+        "window_start_idx": 0
+      },
+      "name_mappings": {
+        "action": [
+          "action"
+        ],
+        "observation.state": [
+          "proprio"
+        ]
+      },
+      "statistic_keys": [
+        "observation.state",
+        "timestamp",
+        "action"
+      ],
+      "statistic_name": "libero_10_no_noops",
+      "type": "DistributedRepeatingDataset"
+    },
+    "per_device_batch_size": 4,
+    "per_device_num_workers": 4
+  }
+}

dreamzero_libero_10_full_finetune_bs64/config.yaml ADDED Viewed

	@@ -0,0 +1,209 @@

+_ckpt_root: ./checkpoints
+_frame_window_size: 9
+_tokenizer: ./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl
+eval:
+  dataset:
+    transforms:
+    - img_keys:
+      - agentview_image
+      - robot0_eye_in_hand_image
+      type: ProcessLiberoEvalInputs
+    - image_resize_strategy: resize-naive
+      input_sizes:
+      - - 3
+        - 128
+        - 128
+      - - 3
+        - 128
+        - 128
+      means:
+      - - 127.5
+        - 127.5
+        - 127.5
+      - - 127.5
+        - 127.5
+        - 127.5
+      stds:
+      - - 127.5
+        - 127.5
+        - 127.5
+      - - 127.5
+        - 127.5
+        - 127.5
+      type: TransformImage
+    - gripper_key: robot0_gripper_qpos
+      norm_type: mean_std
+      out_key: states
+      pos_key: robot0_eef_pos
+      quat_key: robot0_eef_quat
+      state_dim: 32
+      type: LiberoProprioFromInputs
+    - max_len: 512
+      tokenizer:
+        model_path: ./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl
+        type: PretrainedTokenizer
+      type: LiberoPromptFromInputs
+      use_conversation: false
+    - frame_window_size: 1
+      num_views: 2
+      type: PrepareVideo
+    type: LiberoParquetEvalDataset
+  denormalize_action:
+    action_dim: 7
+    norm_type: mean_std
+    type: DenormalizeLiberoAction
+  enable_mixed_precision_training: true
+  eval_chunk_size: 10
+  mixed_precision_dtype: bf16
+  model_family: dreamzero
+  num_steps_wait: 10
+  num_trials_per_task: 50
+  resize_size: 128
+  seed: 7
+  task_suite_name: libero_10
+  type: LiberoEvalRunner
+model:
+  frame_window_size: 9
+  name_mapping:
+    vla_head.model: action_head.model
+    vlm_backbone.image_encoder: action_head.image_encoder
+    vlm_backbone.text_encoder: action_head.text_encoder
+    vlm_backbone.vae: action_head.vae
+  num_views: 2
+  pretrained_name_or_path: ./checkpoints/DreamZero-AgiBot
+  type: DreamZeroVLA
+  vla_head:
+    action_dim: 7
+    action_horizon: 10
+    dit_dim: 5120
+    dit_ffn_dim: 13824
+    dit_freq_dim: 256
+    dit_in_dim: 36
+    dit_num_heads: 40
+    dit_num_layers: 40
+    dit_out_dim: 16
+    frame_seqlen: 128
+    hidden_size: 1024
+    input_embedding_dim: 1536
+    max_action_dim: 32
+    max_num_embodiments: 32
+    max_state_dim: 64
+    noise_beta_alpha: 1.5
+    noise_beta_beta: 1.0
+    noise_s: 0.999
+    num_action_per_block: 10
+    num_frame_per_block: 2
+    num_frames: 9
+    num_inference_steps: 16
+    num_state_per_block: 1
+    type: DreamZeroHead
+    use_gradient_checkpointing: true
+  vlm_backbone:
+    image_encoder_path: null
+    text_encoder_path: null
+    tiled: false
+    type: WanBackbone
+    vae_path: null
+per_device_num_workers: 4
+runner:
+  change_key_name: false
+  collator:
+    keys:
+    - states
+    - images
+    - img_masks
+    - actions
+    - action_masks
+    - embodiment_ids
+    - frame_masks
+    - lang_tokens
+    - lang_masks
+    meta_keys:
+    - task_description
+    - prompt
+    - info
+    - stats
+    - timestamp
+    type: DictCollator
+  enable_gradient_checkpointing: true
+  enable_mixed_precision_training: true
+  learning_rate: 1.0e-05
+  lr_scheduler_type: linear-warmup+cosine-decay
+  max_epochs: 12
+  max_grad_norm: 1.0
+  metric:
+    active_trackers:
+    - jsonl
+    - wandb
+    grad_accumulation_steps: 1
+    run_dir: work_dirs
+    type: VLAMetric
+    window_size: 1
+  mixed_precision_dtype: bf16
+  sampler: null
+  sharding_strategy: full-shard
+  type: FSDPTrainRunner
+  warmup_ratio: 0.05
+  weight_decay: 1.0e-05
+train_dataloader:
+  dataset:
+    datasets:
+      action_key: action
+      action_window_size: 10
+      data_root_path: ./datasets/libero_10_no_noops_lerobotv2.1
+      frame_window_size: 9
+      statistic_name: libero_10_no_noops
+      transforms:
+      - embodiment_id: 0
+        name_mappings:
+          actions:
+          - actions
+          observation.state:
+          - states
+        parquet_keys:
+        - observation.state
+        - timestamp
+        - actions
+        - info
+        - stats
+        - action_masks
+        type: ProcessParquetInputs
+        video_keys:
+        - observation.images.image
+        - observation.images.wrist_image
+      - type: ParquetPrompter
+        use_conversation: false
+      - max_len: 512
+        tokenizer:
+          model_path: ./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl
+          type: PretrainedTokenizer
+        type: ProcessPrompts
+      - height: 128
+        type: ResizeImages
+        width: 128
+      - type: SimpleNormalizeImages
+      - action_dim: 32
+        action_key: action
+        norm_type: mean_std
+        state_dim: 32
+        state_key: proprio
+        type: NormalizeStatesAndActions
+      - frame_window_size: 9
+        num_views: 2
+        type: PrepareVideo
+      type: ParquetDataset
+      use_delta: false
+      window_start_idx: 0
+    name_mappings:
+      action:
+      - action
+      observation.state:
+      - proprio
+    statistic_keys:
+    - observation.state
+    - timestamp
+    - action
+    statistic_name: libero_10_no_noops
+    type: DistributedRepeatingDataset
+  per_device_batch_size: 4
+  per_device_num_workers: 4

dreamzero_libero_10_full_finetune_bs64/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+  "libero_10_no_noops": {
+    "proprio": {
+      "mean": [
+        -0.0419064655693921,
+        0.0353943785769225,
+        0.8257066448085474,
+        2.908315654671235,
+        -0.5562158603122547,
+        -0.1664910329554594,
+        0.02831534785236664,
+        -0.028561558922556265
+      ],
+      "std": [
+        0.037983810285504724,
+        0.05099922690402999,
+        0.09094586143443492,
+        0.12167118781966886,
+        0.43643697181350727,
+        0.12656789603066015,
+        0.004705366661198258,
+        0.004657921514447958
+      ],
+      "min": [
+        -0.4828203022480011,
+        -0.3255046010017395,
+        0.445506751537323,
+        1.1321442127227783,
+        -3.641430377960205,
+        -1.842738389968872,
+        -0.0010040868073701859,
+        -0.04111652821302414
+      ],
+      "max": [
+        0.21031762659549713,
+        0.39128610491752625,
+        1.3332009315490723,
+        3.6714255809783936,
+        3.560650587081909,
+        1.386339545249939,
+        0.04160946607589722,
+        0.0013633022317662835
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "timestamp": {
+      "mean": [
+        6.968810671239492
+      ],
+      "std": [
+        4.4205853432820845
+      ],
+      "min": [
+        0.0
+      ],
+      "max": [
+        25.2
+      ],
+      "q01": null,
+      "q99": null
+    },
+    "action": {
+      "mean": [
+        0.018203219580245917,
+        0.05858386677049721,
+        -0.05592356325431262,
+        0.004626933903665416,
+        0.0028960781014207345,
+        -0.0076731359981381505,
+        0.5457824565452817
+      ],
+      "std": [
+        0.10678436772960577,
+        0.13569355116695744,
+        0.1388675428804427,
+        0.014251597889066525,
+        0.020520837090261576,
+        0.03297657922665584,
+        0.1881883528070125
+      ],
+      "min": [
+        -0.9375,
+        -0.9375,
+        -0.9375,
+        -0.23642857372760773,
+        -0.3053571283817291,
+        -0.3675000071525574,
+        0.0
+      ],
+      "max": [
+        0.9375,
+        0.9375,
+        0.9375,
+        0.30000001192092896,
+        0.29357144236564636,
+        0.375,
+        1.0
+      ],
+      "q01": null,
+      "q99": null
+    }
+  }
+}

dreamzero_libero_10_full_finetune_bs64/dreamzero_libero_10_full_finetune_2026_04_19_03_59_36.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff