Henryeahhh commited on Oct 15, 2025

Commit

90c97df

verified ·

1 Parent(s): 4e9de94

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

all_flow_matching/config.yaml +322 -0
all_flow_matching/upload.sh +5 -0
all_l1/config.yaml +322 -0
cleandesk50_flow_matching/config.yaml +322 -0
cleandesk50_l1_regression/config.yaml +322 -0
cleandesk_flow_matching/config.yaml +322 -0
cleandesk_l1_regression/config.yaml +322 -0
config.yaml +322 -0
eraser_flow_matching/config.yaml +322 -0
eraser_l1_regression/config.yaml +322 -0
glue/config.yaml +322 -0
glue_flow_matching/config.yaml +322 -0
glue_l1_regression/config.yaml +322 -0
pen_flow_matching/config.yaml +322 -0
wandb/wandb/run-20251002_150921-kqbx0cjv/files/output.log +390 -0
wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-core.log +6 -0
wandb/wandb/run-20251002_154526-bw81vbs0/files/wandb-metadata.json +204 -0
wandb/wandb/run-20251002_154526-bw81vbs0/run-bw81vbs0.wandb +0 -0
wandb/wandb/run-20251002_155015-xojint20/files/wandb-metadata.json +204 -0
wandb/wandb/run-20251002_155441-70dhy5dq/files/requirements.txt +286 -0
wandb/wandb/run-20251002_155441-70dhy5dq/files/wandb-metadata.json +204 -0
wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-core.log +6 -0
wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug.log +0 -0
wandb/wandb/run-20251002_155442-6v8q0jgn/files/output.log +314 -0
wandb/wandb/run-20251002_155442-6v8q0jgn/files/wandb-metadata.json +204 -0
wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-core.log +6 -0
wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-internal.log +6 -0
wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug.log +0 -0
wipe/config.yaml +322 -0
wipe/wandb/wandb/debug-internal.log +6 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/wandb-metadata.json +204 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-internal.log +6 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/run-itiyfljc.wandb +0 -0
wipe_flow_matching/config.yaml +322 -0
wipe_flow_matching/step12000-unsharded/config.yaml +322 -0
wipe_flow_matching/step12000/config.yaml +322 -0
wipe_flow_matching/wandb/wandb/debug-internal.log +10 -0
wipe_flow_matching/wandb/wandb/debug.log +0 -0
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/wandb-metadata.json +204 -0
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-core.log +6 -0
wipe_l1_regression/config.yaml +322 -0
wipe_l1_regression/step12000-unsharded/config.yaml +322 -0
wipe_l1_regression/step12000/config.yaml +322 -0
wipe_l1_regression/wandb/wandb/debug-internal.log +10 -0
wipe_l1_regression/wandb/wandb/debug.log +0 -0
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/requirements.txt +286 -0
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/wandb-metadata.json +204 -0
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-core.log +6 -0
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-internal.log +10 -0
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug.log +0 -0

all_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: all_20251002_164508
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: all_20251002_164508
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

all_flow_matching/upload.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+hf auth login
+huggingface-cli upload-large-folder spatialtemporal-ai/Lerobot_Glue_best ./glue_best --repo-type=model
+hf auth logout

all_l1/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: realworld_20250930_131219
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: 0
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: realworld_20250930_131219
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk50_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: cleandesk50_20251008_163755
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: cleandesk50_20251008_163755
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk50_l1_regression/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: cleandesk50_20251008_163748
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: cleandesk50_20251008_163748
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: cleandesk_20251005_163721
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: cleandesk_20251005_163721
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk_l1_regression/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: cleandesk_20251008_163754
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: cleandesk_20251008_163754
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: glue_20251002_155411
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20251002_155411
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

eraser_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: eraser_20251011_163756
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: eraser_20251011_163756
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

eraser_l1_regression/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: eraser_20251011_163803
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: eraser_20251011_163803
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

glue/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: glue_20251002_162813
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20251002_162813
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

glue_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: glue_20251002_163658
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20251002_163658
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

glue_l1_regression/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: glue_20251002_163658
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20251002_163658
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

pen_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: pen_20251011_163803
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: pen_20251011_163803
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wandb/wandb/run-20251002_150921-kqbx0cjv/files/output.log ADDED Viewed

	@@ -0,0 +1,390 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [15:09:23] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [15:09:30] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+                 INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
+****** length of the dataset: 72641
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7faa4f997a30>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/02 [15:09:39] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* After get lora params successfully
+10/02 [15:11:05] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=35,614
+10/02 [15:11:06] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+[step=1/500000]
+    train/ActionL1Loss=0.5663
+    throughput/total_tokens=192,000
+    System/Peak GPU Memory (MB)=40,144
+[step=2/500000]
+    train/ActionL1Loss=0.5589
+    throughput/total_tokens=384,000
+    throughput/device/tokens_per_second=1,185
+    throughput/device/batches_per_second=0.0494
+    System/Peak GPU Memory (MB)=46,917
+[step=3/500000]
+    train/ActionL1Loss=0.5206
+    throughput/total_tokens=576,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=4/500000]
+    train/ActionL1Loss=0.4852
+    throughput/total_tokens=768,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+[step=5/500000]
+    train/ActionL1Loss=0.4320
+    throughput/total_tokens=960,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+[step=6/500000]
+    train/ActionL1Loss=0.4672
+    throughput/total_tokens=1,152,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+[step=7/500000]
+    train/ActionL1Loss=0.3926
+    throughput/total_tokens=1,344,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+[step=8/500000]
+    train/ActionL1Loss=0.4395
+    throughput/total_tokens=1,536,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+[step=9/500000]
+    train/ActionL1Loss=0.4654
+    throughput/total_tokens=1,728,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+[step=10/500000]
+    train/ActionL1Loss=0.3900
+    throughput/total_tokens=1,920,000
+    throughput/device/tokens_per_second=1,173
+    throughput/device/batches_per_second=0.0489
+    System/Peak GPU Memory (MB)=46,917
+[step=11/500000]
+    train/ActionL1Loss=0.3751
+    throughput/total_tokens=2,112,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0489
+[step=12/500000]
+    train/ActionL1Loss=0.3996
+    throughput/total_tokens=2,304,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=13/500000]
+    train/ActionL1Loss=0.3628
+    throughput/total_tokens=2,496,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=14/500000]
+    train/ActionL1Loss=0.3743
+    throughput/total_tokens=2,688,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=15/500000]
+    train/ActionL1Loss=0.3542
+    throughput/total_tokens=2,880,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=16/500000]
+    train/ActionL1Loss=0.3885
+    throughput/total_tokens=3,072,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=17/500000]
+    train/ActionL1Loss=0.3967
+    throughput/total_tokens=3,264,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=18/500000]
+    train/ActionL1Loss=0.4508
+    throughput/total_tokens=3,456,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=19/500000]
+    train/ActionL1Loss=0.4414
+    throughput/total_tokens=3,648,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=20/500000]
+    optim/total_grad_norm=31.97
+    train/ActionL1Loss=0.3768
+    throughput/total_tokens=3,840,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+    System/Peak GPU Memory (MB)=46,917
+[step=21/500000]
+    train/ActionL1Loss=0.3586
+    throughput/total_tokens=4,032,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=22/500000]
+    train/ActionL1Loss=0.3712
+    throughput/total_tokens=4,224,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=23/500000]
+    train/ActionL1Loss=0.3941
+    throughput/total_tokens=4,416,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=24/500000]
+    train/ActionL1Loss=0.4223
+    throughput/total_tokens=4,608,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=25/500000]
+    train/ActionL1Loss=0.4184
+    throughput/total_tokens=4,800,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=26/500000]
+    train/ActionL1Loss=0.3437
+    throughput/total_tokens=4,992,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=27/500000]
+    train/ActionL1Loss=0.3695
+    throughput/total_tokens=5,184,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=28/500000]
+    train/ActionL1Loss=0.3300
+    throughput/total_tokens=5,376,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=29/500000]
+    train/ActionL1Loss=0.4344
+    throughput/total_tokens=5,568,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=30/500000]
+    train/ActionL1Loss=0.4002
+    throughput/total_tokens=5,760,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+    System/Peak GPU Memory (MB)=46,917
+[step=31/500000]
+    train/ActionL1Loss=0.3070
+    throughput/total_tokens=5,952,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=32/500000]
+    train/ActionL1Loss=0.3657
+    throughput/total_tokens=6,144,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0489
+[step=33/500000]
+    train/ActionL1Loss=0.3855
+    throughput/total_tokens=6,336,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=34/500000]
+    train/ActionL1Loss=0.4027
+    throughput/total_tokens=6,528,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=35/500000]
+    train/ActionL1Loss=0.2975
+    throughput/total_tokens=6,720,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=36/500000]
+    train/ActionL1Loss=0.4002
+    throughput/total_tokens=6,912,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=37/500000]
+    train/ActionL1Loss=0.3601
+    throughput/total_tokens=7,104,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=38/500000]
+    train/ActionL1Loss=0.4267
+    throughput/total_tokens=7,296,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=39/500000]
+    train/ActionL1Loss=0.3714
+    throughput/total_tokens=7,488,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=40/500000]
+    optim/total_grad_norm=20.27
+    train/ActionL1Loss=0.3428
+    throughput/total_tokens=7,680,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+    System/Peak GPU Memory (MB)=46,917
+[step=41/500000]
+    train/ActionL1Loss=0.4135
+    throughput/total_tokens=7,872,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0490
+[step=42/500000]
+    train/ActionL1Loss=0.3713
+    throughput/total_tokens=8,064,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=43/500000]
+    train/ActionL1Loss=0.3708
+    throughput/total_tokens=8,256,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=44/500000]
+    train/ActionL1Loss=0.4028
+    throughput/total_tokens=8,448,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=45/500000]
+    train/ActionL1Loss=0.3508
+    throughput/total_tokens=8,640,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=46/500000]
+    train/ActionL1Loss=0.3318
+    throughput/total_tokens=8,832,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=47/500000]
+    train/ActionL1Loss=0.3590
+    throughput/total_tokens=9,024,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=48/500000]
+    train/ActionL1Loss=0.3704
+    throughput/total_tokens=9,216,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=49/500000]
+    train/ActionL1Loss=0.3401
+    throughput/total_tokens=9,408,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+[step=50/500000]
+    train/ActionL1Loss=0.4467
+    throughput/total_tokens=9,600,000
+    throughput/device/tokens_per_second=1,174
+    throughput/device/batches_per_second=0.0489
+    System/Peak GPU Memory (MB)=46,917
+[step=51/500000]
+    train/ActionL1Loss=0.4312
+    throughput/total_tokens=9,792,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=52/500000]
+    train/ActionL1Loss=0.3493
+    throughput/total_tokens=9,984,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=53/500000]
+    train/ActionL1Loss=0.4043
+    throughput/total_tokens=10,176,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=54/500000]
+    train/ActionL1Loss=0.4185
+    throughput/total_tokens=10,368,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=55/500000]
+    train/ActionL1Loss=0.4030
+    throughput/total_tokens=10,560,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=56/500000]
+    train/ActionL1Loss=0.4105
+    throughput/total_tokens=10,752,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=57/500000]
+    train/ActionL1Loss=0.3801
+    throughput/total_tokens=10,944,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=58/500000]
+    train/ActionL1Loss=0.3240
+    throughput/total_tokens=11,136,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=59/500000]
+    train/ActionL1Loss=0.4480
+    throughput/total_tokens=11,328,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=60/500000]
+    optim/total_grad_norm=22.23
+    train/ActionL1Loss=0.2945
+    throughput/total_tokens=11,520,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+    System/Peak GPU Memory (MB)=46,917
+[step=61/500000]
+    train/ActionL1Loss=0.4101
+    throughput/total_tokens=11,712,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=62/500000]
+    train/ActionL1Loss=0.4025
+    throughput/total_tokens=11,904,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0488
+[step=63/500000]
+    train/ActionL1Loss=0.4508
+    throughput/total_tokens=12,096,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0489
+[step=64/500000]
+    train/ActionL1Loss=0.3416
+    throughput/total_tokens=12,288,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0489
+[step=65/500000]
+    train/ActionL1Loss=0.3825
+    throughput/total_tokens=12,480,000
+    throughput/device/tokens_per_second=1,172
+    throughput/device/batches_per_second=0.0489

wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:09:21.488298147Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdb_ho7_w/port-1805179.txt","pid":1805179,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T15:09:21.489895431Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1805179}
+{"time":"2025-10-02T15:09:21.490851516Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1805179-1805352-669910572/socket","Net":"unix"}}
+{"time":"2025-10-02T15:09:21.492979899Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T15:09:21.505095966Z","level":"INFO","msg":"handleInformInit: received","streamId":"kqbx0cjv","id":"1(@)"}
+{"time":"2025-10-02T15:09:22.667598354Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"kqbx0cjv","id":"1(@)"}

wandb/wandb/run-20251002_154526-bw81vbs0/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T15:45:26.956450Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "wipe",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_wipe.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-260",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "56241807360"
+    }
+  },
+  "memory":  {
+    "total":  "2434606956544"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "2",
+      "uniqueId":  "0x9815965a899d8053",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x702e8efb76b00c21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xd7a6e11358a6574d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xe35cdba2e3fafd21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x4493708eee1ee737",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x4213cc9eeeefc98d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x2d75dae36f0dc353",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0xd79d4a081e34548d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759679082",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2231",
+    "job_name":  "mh_wipe",
+    "job_nodelist":  "auh7-1b-gpu-260",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759419882",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2231",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-260",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2561154",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-260",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "objruxls4ndcc2m3d5i0bpx0ttt9cswe"
+}

wandb/wandb/run-20251002_154526-bw81vbs0/run-bw81vbs0.wandb ADDED Viewed

Binary file (32.8 kB). View file

wandb/wandb/run-20251002_155015-xojint20/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T15:50:15.612316Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "wipe",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_wipe.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-260",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "56242147328"
+    }
+  },
+  "memory":  {
+    "total":  "2434606956544"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "5",
+      "uniqueId":  "0xd79d4a081e34548d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x4493708eee1ee737",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xe35cdba2e3fafd21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x2d75dae36f0dc353",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x4213cc9eeeefc98d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x9815965a899d8053",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xd7a6e11358a6574d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x702e8efb76b00c21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759679370",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2232",
+    "job_name":  "mh_wipe_flow_matching",
+    "job_nodelist":  "auh7-1b-gpu-260",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759420170",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2232",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-260",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2563631",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-260",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "dta64te2cmxj20iztgvki6h4mul24fyy"
+}

wandb/wandb/run-20251002_155441-70dhy5dq/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/wandb/run-20251002_155441-70dhy5dq/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T15:54:41.904163Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "glue",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_glue.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-293",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50659602432"
+    }
+  },
+  "memory":  {
+    "total":  "2434606936064"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "2",
+      "uniqueId":  "0xd3246a860ff61784",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xa307dde62eec0d7d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xd8fa68fa19711efd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0xba4e7044cb7e770",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0x36cd9caedcbd1661",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x5ad6d84cdd116aca",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xbd5d0be0d2a8e2aa",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x8c18f9eeeea22bf2",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759679637",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2235",
+    "job_name":  "mh_glue_l1_regression",
+    "job_nodelist":  "auh7-1b-gpu-293",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759420437",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2235",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-293",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "1811465",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-293",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "79ch7p9c1j6zdjyu7l2owvuh1v64fp2u"
+}

wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:54:41.960056364Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpbo30i4ac/port-1811654.txt","pid":1811654,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T15:54:41.960588222Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1811654}
+{"time":"2025-10-02T15:54:41.960569302Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1811654-1811823-2274215804/socket","Net":"unix"}}
+{"time":"2025-10-02T15:54:42.143689402Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T15:54:42.151758187Z","level":"INFO","msg":"handleInformInit: received","streamId":"70dhy5dq","id":"1(@)"}
+{"time":"2025-10-02T15:54:43.180651706Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"70dhy5dq","id":"1(@)"}

wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20251002_155442-6v8q0jgn/files/output.log ADDED Viewed

	@@ -0,0 +1,314 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [15:54:43] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [15:54:49] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+10/02 [15:54:50] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
+****** length of the dataset: 10316
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f71bbbb8100>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/02 [15:54:56] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* After get lora params successfully
+10/02 [15:56:15] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 36856
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=36,856
+10/02 [15:56:16] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+10/02 [15:56:23] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py:967: UserWarning: The .grad attribute  warnings.py:109
+                          of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed
+                          want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor
+                          by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered
+                          internally at /pytorch/build/aten/src/ATen/core/TensorBody.h:489.)
+                            param_grad = param.grad
+[step=1/500000]
+    train/ActionNoiseL2Loss=1.834
+    throughput/total_tokens=192,000
+    System/Peak GPU Memory (MB)=39,644
+[step=2/500000]
+    train/ActionNoiseL2Loss=1.807
+    throughput/total_tokens=384,000
+    throughput/device/tokens_per_second=1,196
+    throughput/device/batches_per_second=0.0499
+    System/Peak GPU Memory (MB)=46,466
+[step=3/500000]
+    train/ActionNoiseL2Loss=1.699
+    throughput/total_tokens=576,000
+    throughput/device/tokens_per_second=1,175
+    throughput/device/batches_per_second=0.0490
+[step=4/500000]
+    train/ActionNoiseL2Loss=1.790
+    throughput/total_tokens=768,000
+    throughput/device/tokens_per_second=1,167
+    throughput/device/batches_per_second=0.0487
+[step=5/500000]
+    train/ActionNoiseL2Loss=1.693
+    throughput/total_tokens=960,000
+    throughput/device/tokens_per_second=1,164
+    throughput/device/batches_per_second=0.0485
+[step=6/500000]
+    train/ActionNoiseL2Loss=1.679
+    throughput/total_tokens=1,152,000
+    throughput/device/tokens_per_second=1,161
+    throughput/device/batches_per_second=0.0484
+[step=7/500000]
+    train/ActionNoiseL2Loss=1.560
+    throughput/total_tokens=1,344,000
+    throughput/device/tokens_per_second=1,159
+    throughput/device/batches_per_second=0.0483
+[step=8/500000]
+    train/ActionNoiseL2Loss=1.603
+    throughput/total_tokens=1,536,000
+    throughput/device/tokens_per_second=1,158
+    throughput/device/batches_per_second=0.0483
+[step=9/500000]
+    train/ActionNoiseL2Loss=1.556
+    throughput/total_tokens=1,728,000
+    throughput/device/tokens_per_second=1,158
+    throughput/device/batches_per_second=0.0483
+[step=10/500000]
+    train/ActionNoiseL2Loss=1.506
+    throughput/total_tokens=1,920,000
+    throughput/device/tokens_per_second=1,157
+    throughput/device/batches_per_second=0.0482
+    System/Peak GPU Memory (MB)=46,466
+[step=11/500000]
+    train/ActionNoiseL2Loss=1.537
+    throughput/total_tokens=2,112,000
+    throughput/device/tokens_per_second=1,156
+    throughput/device/batches_per_second=0.0482
+[step=12/500000]
+    train/ActionNoiseL2Loss=1.279
+    throughput/total_tokens=2,304,000
+    throughput/device/tokens_per_second=1,155
+    throughput/device/batches_per_second=0.0482
+[step=13/500000]
+    train/ActionNoiseL2Loss=1.477
+    throughput/total_tokens=2,496,000
+    throughput/device/tokens_per_second=1,154
+    throughput/device/batches_per_second=0.0481
+[step=14/500000]
+    train/ActionNoiseL2Loss=1.432
+    throughput/total_tokens=2,688,000
+    throughput/device/tokens_per_second=1,153
+    throughput/device/batches_per_second=0.0481
+[step=15/500000]
+    train/ActionNoiseL2Loss=1.288
+    throughput/total_tokens=2,880,000
+    throughput/device/tokens_per_second=1,153
+    throughput/device/batches_per_second=0.0480
+[step=16/500000]
+    train/ActionNoiseL2Loss=1.371
+    throughput/total_tokens=3,072,000
+    throughput/device/tokens_per_second=1,152
+    throughput/device/batches_per_second=0.0480
+[step=17/500000]
+    train/ActionNoiseL2Loss=1.352
+    throughput/total_tokens=3,264,000
+    throughput/device/tokens_per_second=1,151
+    throughput/device/batches_per_second=0.0480
+[step=18/500000]
+    train/ActionNoiseL2Loss=1.520
+    throughput/total_tokens=3,456,000
+    throughput/device/tokens_per_second=1,151
+    throughput/device/batches_per_second=0.0480
+[step=19/500000]
+    train/ActionNoiseL2Loss=1.305
+    throughput/total_tokens=3,648,000
+    throughput/device/tokens_per_second=1,151
+    throughput/device/batches_per_second=0.0480
+[step=20/500000]
+    optim/total_grad_norm=16.52
+    train/ActionNoiseL2Loss=1.322
+    throughput/total_tokens=3,840,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+    System/Peak GPU Memory (MB)=46,466
+[step=21/500000]
+    train/ActionNoiseL2Loss=1.404
+    throughput/total_tokens=4,032,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=22/500000]
+    train/ActionNoiseL2Loss=1.266
+    throughput/total_tokens=4,224,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=23/500000]
+    train/ActionNoiseL2Loss=1.394
+    throughput/total_tokens=4,416,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=24/500000]
+    train/ActionNoiseL2Loss=1.259
+    throughput/total_tokens=4,608,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0479
+[step=25/500000]
+    train/ActionNoiseL2Loss=1.191
+    throughput/total_tokens=4,800,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0479
+[step=26/500000]
+    train/ActionNoiseL2Loss=1.317
+    throughput/total_tokens=4,992,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0479
+[step=27/500000]
+    train/ActionNoiseL2Loss=1.215
+    throughput/total_tokens=5,184,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0479
+[step=28/500000]
+    train/ActionNoiseL2Loss=1.260
+    throughput/total_tokens=5,376,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0478
+[step=29/500000]
+    train/ActionNoiseL2Loss=1.132
+    throughput/total_tokens=5,568,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0478
+[step=30/500000]
+    train/ActionNoiseL2Loss=1.241
+    throughput/total_tokens=5,760,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0478
+    System/Peak GPU Memory (MB)=46,466
+[step=31/500000]
+    train/ActionNoiseL2Loss=1.084
+    throughput/total_tokens=5,952,000
+    throughput/device/tokens_per_second=1,147
+    throughput/device/batches_per_second=0.0478
+[step=32/500000]
+    train/ActionNoiseL2Loss=1.049
+    throughput/total_tokens=6,144,000
+    throughput/device/tokens_per_second=1,147
+    throughput/device/batches_per_second=0.0478
+[step=33/500000]
+    train/ActionNoiseL2Loss=1.266
+    throughput/total_tokens=6,336,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0478
+[step=34/500000]
+    train/ActionNoiseL2Loss=1.018
+    throughput/total_tokens=6,528,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0479
+[step=35/500000]
+    train/ActionNoiseL2Loss=1.012
+    throughput/total_tokens=6,720,000
+    throughput/device/tokens_per_second=1,148
+    throughput/device/batches_per_second=0.0479
+[step=36/500000]
+    train/ActionNoiseL2Loss=1.101
+    throughput/total_tokens=6,912,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=37/500000]
+    train/ActionNoiseL2Loss=1.093
+    throughput/total_tokens=7,104,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=38/500000]
+    train/ActionNoiseL2Loss=1.153
+    throughput/total_tokens=7,296,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=39/500000]
+    train/ActionNoiseL2Loss=0.9454
+    throughput/total_tokens=7,488,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=40/500000]
+    optim/total_grad_norm=63.39
+    train/ActionNoiseL2Loss=1.099
+    throughput/total_tokens=7,680,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+    System/Peak GPU Memory (MB)=46,466
+[step=41/500000]
+    train/ActionNoiseL2Loss=0.9066
+    throughput/total_tokens=7,872,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=42/500000]
+    train/ActionNoiseL2Loss=1.033
+    throughput/total_tokens=8,064,000
+    throughput/device/tokens_per_second=1,150
+    throughput/device/batches_per_second=0.0479
+[step=43/500000]
+    train/ActionNoiseL2Loss=0.9956
+    throughput/total_tokens=8,256,000
+    throughput/device/tokens_per_second=1,150
+    throughput/device/batches_per_second=0.0479
+[step=44/500000]
+    train/ActionNoiseL2Loss=1.186
+    throughput/total_tokens=8,448,000
+    throughput/device/tokens_per_second=1,150
+    throughput/device/batches_per_second=0.0479
+[step=45/500000]
+    train/ActionNoiseL2Loss=1.020
+    throughput/total_tokens=8,640,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=46/500000]
+    train/ActionNoiseL2Loss=0.9211
+    throughput/total_tokens=8,832,000
+    throughput/device/tokens_per_second=1,150
+    throughput/device/batches_per_second=0.0479
+[step=47/500000]
+    train/ActionNoiseL2Loss=0.9811
+    throughput/total_tokens=9,024,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=48/500000]
+    train/ActionNoiseL2Loss=0.9845
+    throughput/total_tokens=9,216,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479
+[step=49/500000]
+    train/ActionNoiseL2Loss=0.9234
+    throughput/total_tokens=9,408,000
+    throughput/device/tokens_per_second=1,149
+    throughput/device/batches_per_second=0.0479

wandb/wandb/run-20251002_155442-6v8q0jgn/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T15:54:42.003061Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "glue",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_glue.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-260",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "56242470912"
+    }
+  },
+  "memory":  {
+    "total":  "2434606956544"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "6",
+      "uniqueId":  "0x2d75dae36f0dc353",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xd7a6e11358a6574d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x4213cc9eeeefc98d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xe35cdba2e3fafd21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x702e8efb76b00c21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x4493708eee1ee737",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0xd79d4a081e34548d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x9815965a899d8053",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759679637",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2234",
+    "job_name":  "mh_glue_flow_matching",
+    "job_nodelist":  "auh7-1b-gpu-260",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759420437",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2234",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-260",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2565886",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-260",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "1x8epr6rdu28pcmllq7snrfdls3nek8y"
+}

wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:54:42.055940338Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpwa4j5m09/port-2566075.txt","pid":2566075,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T15:54:42.056549445Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2566075}
+{"time":"2025-10-02T15:54:42.056519645Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2566075-2566240-670263594/socket","Net":"unix"}}
+{"time":"2025-10-02T15:54:42.238288115Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T15:54:42.245089407Z","level":"INFO","msg":"handleInformInit: received","streamId":"6v8q0jgn","id":"1(@)"}
+{"time":"2025-10-02T15:54:43.370000731Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"6v8q0jgn","id":"1(@)"}

wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:54:42.24698282Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:54:43.36994658Z","level":"INFO","msg":"stream: created new stream","id":"6v8q0jgn"}
+{"time":"2025-10-02T15:54:43.369995201Z","level":"INFO","msg":"stream: started","id":"6v8q0jgn"}
+{"time":"2025-10-02T15:54:43.370003131Z","level":"INFO","msg":"writer: started","stream_id":"6v8q0jgn"}
+{"time":"2025-10-02T15:54:43.370010001Z","level":"INFO","msg":"handler: started","stream_id":"6v8q0jgn"}
+{"time":"2025-10-02T15:54:43.370045482Z","level":"INFO","msg":"sender: started","stream_id":"6v8q0jgn"}

wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug.log ADDED Viewed

File without changes

wipe/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251002_163406
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251002_163406
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T16:34:36.620221893Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T16:34:37.749739233Z","level":"INFO","msg":"stream: created new stream","id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749792274Z","level":"INFO","msg":"stream: started","id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749802594Z","level":"INFO","msg":"writer: started","stream_id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749817664Z","level":"INFO","msg":"handler: started","stream_id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749829744Z","level":"INFO","msg":"sender: started","stream_id":"itiyfljc"}

wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T16:34:36.375177Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "wipe",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_wipe.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe/wandb",
+  "host":  "auh7-1b-gpu-293",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50660397056"
+    }
+  },
+  "memory":  {
+    "total":  "2434606936064"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "3",
+      "uniqueId":  "0x36cd9caedcbd1661",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x5ad6d84cdd116aca",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xa307dde62eec0d7d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xbd5d0be0d2a8e2aa",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0xba4e7044cb7e770",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0xd3246a860ff61784",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xd8fa68fa19711efd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x8c18f9eeeea22bf2",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759682032",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2261",
+    "job_name":  "mh_wipe_flow_matching",
+    "job_nodelist":  "auh7-1b-gpu-293",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759422832",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2261",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-293",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "1816946",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-293",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "e61jhvldaqba9uvqusim29dt4x4fm38h"
+}

wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T16:34:36.620221893Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T16:34:37.749739233Z","level":"INFO","msg":"stream: created new stream","id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749792274Z","level":"INFO","msg":"stream: started","id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749802594Z","level":"INFO","msg":"writer: started","stream_id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749817664Z","level":"INFO","msg":"handler: started","stream_id":"itiyfljc"}
+{"time":"2025-10-02T16:34:37.749829744Z","level":"INFO","msg":"sender: started","stream_id":"itiyfljc"}

wipe/wandb/wandb/run-20251002_163436-itiyfljc/run-itiyfljc.wandb ADDED Viewed

File without changes

wipe_flow_matching/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251005_163733
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251005_163733
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe_flow_matching/step12000-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251005_163733
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251005_163733
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe_flow_matching/step12000/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251005_163733
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251005_163733
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe_flow_matching/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2025-10-05T16:38:13.19911913Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-05T16:38:14.385618537Z","level":"INFO","msg":"stream: created new stream","id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385648767Z","level":"INFO","msg":"stream: started","id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385660457Z","level":"INFO","msg":"handler: started","stream_id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385655167Z","level":"INFO","msg":"writer: started","stream_id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385680798Z","level":"INFO","msg":"sender: started","stream_id":"0cfqmuqw"}
+{"time":"2025-10-06T16:34:15.587824169Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-06T18:35:03.703248769Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-06T20:02:36.97363154Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-07T05:02:26.79910172Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}

wipe_flow_matching/wandb/wandb/debug.log ADDED Viewed

File without changes

wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-05T16:38:12.937946Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "wipe",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_wipe.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching/wandb",
+  "host":  "auh7-1b-gpu-268",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50524278784"
+    }
+  },
+  "memory":  {
+    "total":  "2434606911488"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "2",
+      "uniqueId":  "0x4e23787acbcc959c",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x21097ed02658304",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x5222e4ce7a335651",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x6ea319284113b182",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0x3974e08aaf22dd9e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x88a800d44035c135",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x9c1461c3fb78979f",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0x47f0c4894158743b",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759941420",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2279",
+    "job_name":  "mh_wipe_flow_matching",
+    "job_nodelist":  "auh7-1b-gpu-268",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759682220",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2279",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-268",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "1295134",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-268",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "z62bo323zzwiyws743dfdlddov1q0sqt"
+}

wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-05T16:38:13.182675593Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9aw022y7/port-1295327.txt","pid":1295327,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-05T16:38:13.18385037Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1295327}
+{"time":"2025-10-05T16:38:13.185026547Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1295327-1295508-4047313830/socket","Net":"unix"}}
+{"time":"2025-10-05T16:38:13.18868387Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-05T16:38:13.197275504Z","level":"INFO","msg":"handleInformInit: received","streamId":"0cfqmuqw","id":"1(@)"}
+{"time":"2025-10-05T16:38:14.385653777Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0cfqmuqw","id":"1(@)"}

wipe_l1_regression/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251005_163714
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251005_163714
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe_l1_regression/step12000-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251005_163714
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251005_163714
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe_l1_regression/step12000/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: wipe_20251005_163714
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: wipe_20251005_163714
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

wipe_l1_regression/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2025-10-05T16:37:44.133320669Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-05T16:37:45.160495159Z","level":"INFO","msg":"stream: created new stream","id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.160546189Z","level":"INFO","msg":"stream: started","id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.16056551Z","level":"INFO","msg":"handler: started","stream_id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.1605953Z","level":"INFO","msg":"sender: started","stream_id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.16057409Z","level":"INFO","msg":"writer: started","stream_id":"a1znetn8"}
+{"time":"2025-10-06T15:55:45.597714896Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-06T18:21:33.322202546Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a1znetn8/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-07T13:03:00.740491875Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-08T00:38:26.283561572Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}

wipe_l1_regression/wandb/wandb/debug.log ADDED Viewed

File without changes

wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-05T16:37:43.884420Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "wipe",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_wipe.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression/wandb",
+  "host":  "auh7-1b-gpu-306",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50268852224"
+    }
+  },
+  "memory":  {
+    "total":  "2434611519488"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "3",
+      "uniqueId":  "0x95be8fdc770fcfd7",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0x27087f06439a527d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x413935505e32b8da",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0xa0442ab3bdd405c1",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x12140cd9e24f12e9",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x82728d7f9bd937e4",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0xaabcddaa244a3d6e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x24ee801b7c402006",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759941420",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2280",
+    "job_name":  "mh_wipe_l1_regression",
+    "job_nodelist":  "auh7-1b-gpu-306",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759682220",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2280",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-306",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "1826176",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-306",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "bdwjpywyhycxf8g9ov01yp1e5him8k31"
+}

wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-05T16:37:43.93852383Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_xq7pohw/port-1826365.txt","pid":1826365,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-05T16:37:43.939021639Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1826365}
+{"time":"2025-10-05T16:37:43.939000279Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1826365-1826537-524537524/socket","Net":"unix"}}
+{"time":"2025-10-05T16:37:44.124047526Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-05T16:37:44.131326394Z","level":"INFO","msg":"handleInformInit: received","streamId":"a1znetn8","id":"1(@)"}
+{"time":"2025-10-05T16:37:45.16055242Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"a1znetn8","id":"1(@)"}

wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2025-10-05T16:37:44.133320669Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-05T16:37:45.160495159Z","level":"INFO","msg":"stream: created new stream","id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.160546189Z","level":"INFO","msg":"stream: started","id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.16056551Z","level":"INFO","msg":"handler: started","stream_id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.1605953Z","level":"INFO","msg":"sender: started","stream_id":"a1znetn8"}
+{"time":"2025-10-05T16:37:45.16057409Z","level":"INFO","msg":"writer: started","stream_id":"a1znetn8"}
+{"time":"2025-10-06T15:55:45.597714896Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-06T18:21:33.322202546Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a1znetn8/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-07T13:03:00.740491875Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-08T00:38:26.283561572Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}

wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug.log ADDED Viewed

File without changes