Henryeahhh commited on Oct 15, 2025

Commit

4919132

verified ·

1 Parent(s): 90c97df

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

glue_l1_regression/step12000/config.yaml +322 -0
pen_flow_matching/step11500-action-head/metadata.pt +3 -0
pen_flow_matching/step12000-action-head/metadata.pt +3 -0
pen_flow_matching/step12000-unsharded/config.yaml +322 -0
pen_flow_matching/step12000/config.yaml +322 -0
pen_flow_matching/wandb/wandb/debug-internal.log +8 -0
pen_flow_matching/wandb/wandb/debug.log +0 -0
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug.log +0 -0
wandb/wandb/debug-internal.log +6 -0
wandb/wandb/run-20251002_150921-kqbx0cjv/files/requirements.txt +286 -0
wandb/wandb/run-20251002_150921-kqbx0cjv/files/wandb-metadata.json +204 -0
wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-internal.log +6 -0
wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug.log +0 -0
wandb/wandb/run-20251002_151047-gal9lnsm/files/output.log +365 -0
wandb/wandb/run-20251002_151047-gal9lnsm/files/requirements.txt +286 -0
wandb/wandb/run-20251002_151047-gal9lnsm/files/wandb-metadata.json +203 -0
wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-core.log +6 -0
wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-internal.log +6 -0
wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug.log +0 -0
wandb/wandb/run-20251002_154526-bw81vbs0/files/output.log +81 -0
wandb/wandb/run-20251002_154526-bw81vbs0/files/requirements.txt +286 -0
wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-core.log +6 -0
wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-internal.log +6 -0
wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug.log +0 -0
wandb/wandb/run-20251002_155015-xojint20/files/output.log +88 -0
wandb/wandb/run-20251002_155015-xojint20/files/requirements.txt +286 -0
wandb/wandb/run-20251002_155015-xojint20/logs/debug-core.log +6 -0
wandb/wandb/run-20251002_155015-xojint20/logs/debug-internal.log +6 -0
wandb/wandb/run-20251002_155015-xojint20/run-xojint20.wandb +0 -0
wandb/wandb/run-20251002_155441-70dhy5dq/files/output.log +318 -0
wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-internal.log +6 -0
wandb/wandb/run-20251002_155442-6v8q0jgn/files/requirements.txt +286 -0
wipe/wandb/wandb/debug.log +0 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/output.log +15 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/requirements.txt +286 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-core.log +6 -0
wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug.log +0 -0
wipe_flow_matching/step11500-action-head/metadata.pt +3 -0
wipe_flow_matching/step12000-action-head/metadata.pt +3 -0
wipe_flow_matching/step12000-unsharded/lora.pt +3 -0
wipe_flow_matching/step12000-unsharded/train.pt +3 -0
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/output.log +0 -0
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/requirements.txt +286 -0
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-internal.log +10 -0
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug.log +0 -0
wipe_l1_regression/step11500-action-head/metadata.pt +3 -0
wipe_l1_regression/step12000-action-head/metadata.pt +3 -0
wipe_l1_regression/step12000-unsharded/lora.pt +3 -0
wipe_l1_regression/step12000-unsharded/train.pt +3 -0
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/output.log +0 -0

glue_l1_regression/step12000/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: glue_20251002_163658
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20251002_163658
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

pen_flow_matching/step11500-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
+size 1331

pen_flow_matching/step12000-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:995307502120af3866f237cd0bc484fc848a652539d28e53cbea882abc16ba6b
+size 1331

pen_flow_matching/step12000-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: pen_20251011_163803
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: a1_real_world
+  rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: pen_20251011_163803
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

pen_flow_matching/step12000/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: pen_20251011_163803
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: a1_real_world
+  rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: pen_20251011_163803
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

pen_flow_matching/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,8 @@

+{"time":"2025-10-11T16:38:45.301569164Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-11T16:38:46.459201786Z","level":"INFO","msg":"stream: created new stream","id":"a381qnn9"}
+{"time":"2025-10-11T16:38:46.459237957Z","level":"INFO","msg":"stream: started","id":"a381qnn9"}
+{"time":"2025-10-11T16:38:46.459266458Z","level":"INFO","msg":"handler: started","stream_id":"a381qnn9"}
+{"time":"2025-10-11T16:38:46.459291898Z","level":"INFO","msg":"sender: started","stream_id":"a381qnn9"}
+{"time":"2025-10-11T16:38:46.459287598Z","level":"INFO","msg":"writer: started","stream_id":"a381qnn9"}
+{"time":"2025-10-12T06:42:47.897888022Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a381qnn9/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-12T14:34:32.120286068Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a381qnn9/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}

pen_flow_matching/wandb/wandb/debug.log ADDED Viewed

File without changes

pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:54:42.154138214Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:54:43.180595015Z","level":"INFO","msg":"stream: created new stream","id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180644946Z","level":"INFO","msg":"stream: started","id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180663737Z","level":"INFO","msg":"sender: started","stream_id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180659826Z","level":"INFO","msg":"writer: started","stream_id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180682767Z","level":"INFO","msg":"handler: started","stream_id":"70dhy5dq"}

wandb/wandb/run-20251002_150921-kqbx0cjv/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/wandb/run-20251002_150921-kqbx0cjv/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T15:09:21.237465Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "realworld",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_cleandesk.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-293",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50658734080"
+    }
+  },
+  "memory":  {
+    "total":  "2434606936064"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "6",
+      "uniqueId":  "0xa307dde62eec0d7d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xd8fa68fa19711efd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0x36cd9caedcbd1661",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0xba4e7044cb7e770",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xbd5d0be0d2a8e2aa",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x5ad6d84cdd116aca",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0xd3246a860ff61784",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x8c18f9eeeea22bf2",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759676881",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2221",
+    "job_name":  "mh_cleandesk",
+    "job_nodelist":  "auh7-1b-gpu-293",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759417681",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2221",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-293",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "1804994",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-293",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "z2ddwaxyl0hxhxvll2z1wkcfd4ygtgyd"
+}

wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:09:21.507261489Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:09:22.667529923Z","level":"INFO","msg":"stream: created new stream","id":"kqbx0cjv"}
+{"time":"2025-10-02T15:09:22.667591843Z","level":"INFO","msg":"stream: started","id":"kqbx0cjv"}
+{"time":"2025-10-02T15:09:22.667623244Z","level":"INFO","msg":"writer: started","stream_id":"kqbx0cjv"}
+{"time":"2025-10-02T15:09:22.667639754Z","level":"INFO","msg":"handler: started","stream_id":"kqbx0cjv"}
+{"time":"2025-10-02T15:09:22.667666975Z","level":"INFO","msg":"sender: started","stream_id":"kqbx0cjv"}

wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20251002_151047-gal9lnsm/files/output.log ADDED Viewed

	@@ -0,0 +1,365 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [15:10:54] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [15:11:06] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+                 INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
+****** length of the dataset: 27906
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f11812bff40>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/02 [15:11:14] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* After get lora params successfully
+10/02 [15:12:41] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=35,614
+10/02 [15:12:42] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+[step=1/500000]
+    train/ActionL1Loss=0.6062
+    throughput/total_tokens=192,000
+    System/Peak GPU Memory (MB)=40,144
+[step=2/500000]
+    train/ActionL1Loss=0.6043
+    throughput/total_tokens=384,000
+    throughput/device/tokens_per_second=1,201
+    throughput/device/batches_per_second=0.0501
+    System/Peak GPU Memory (MB)=46,917
+[step=3/500000]
+    train/ActionL1Loss=0.5778
+    throughput/total_tokens=576,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=4/500000]
+    train/ActionL1Loss=0.5434
+    throughput/total_tokens=768,000
+    throughput/device/tokens_per_second=1,155
+    throughput/device/batches_per_second=0.0481
+[step=5/500000]
+    train/ActionL1Loss=0.5383
+    throughput/total_tokens=960,000
+    throughput/device/tokens_per_second=1,153
+    throughput/device/batches_per_second=0.0481
+[step=6/500000]
+    train/ActionL1Loss=0.5146
+    throughput/total_tokens=1,152,000
+    throughput/device/tokens_per_second=1,152
+    throughput/device/batches_per_second=0.0480
+[step=7/500000]
+    train/ActionL1Loss=0.4823
+    throughput/total_tokens=1,344,000
+    throughput/device/tokens_per_second=1,152
+    throughput/device/batches_per_second=0.0480
+[step=8/500000]
+    train/ActionL1Loss=0.4415
+    throughput/total_tokens=1,536,000
+    throughput/device/tokens_per_second=1,153
+    throughput/device/batches_per_second=0.0481
+[step=9/500000]
+    train/ActionL1Loss=0.4776
+    throughput/total_tokens=1,728,000
+    throughput/device/tokens_per_second=1,155
+    throughput/device/batches_per_second=0.0481
+[step=10/500000]
+    train/ActionL1Loss=0.4819
+    throughput/total_tokens=1,920,000
+    throughput/device/tokens_per_second=1,156
+    throughput/device/batches_per_second=0.0482
+    System/Peak GPU Memory (MB)=46,917
+[step=11/500000]
+    train/ActionL1Loss=0.4335
+    throughput/total_tokens=2,112,000
+    throughput/device/tokens_per_second=1,157
+    throughput/device/batches_per_second=0.0482
+[step=12/500000]
+    train/ActionL1Loss=0.4216
+    throughput/total_tokens=2,304,000
+    throughput/device/tokens_per_second=1,158
+    throughput/device/batches_per_second=0.0483
+[step=13/500000]
+    train/ActionL1Loss=0.3851
+    throughput/total_tokens=2,496,000
+    throughput/device/tokens_per_second=1,159
+    throughput/device/batches_per_second=0.0483
+[step=14/500000]
+    train/ActionL1Loss=0.3569
+    throughput/total_tokens=2,688,000
+    throughput/device/tokens_per_second=1,160
+    throughput/device/batches_per_second=0.0483
+[step=15/500000]
+    train/ActionL1Loss=0.4119
+    throughput/total_tokens=2,880,000
+    throughput/device/tokens_per_second=1,160
+    throughput/device/batches_per_second=0.0484
+[step=16/500000]
+    train/ActionL1Loss=0.4318
+    throughput/total_tokens=3,072,000
+    throughput/device/tokens_per_second=1,161
+    throughput/device/batches_per_second=0.0484
+[step=17/500000]
+    train/ActionL1Loss=0.3860
+    throughput/total_tokens=3,264,000
+    throughput/device/tokens_per_second=1,161
+    throughput/device/batches_per_second=0.0484
+[step=18/500000]
+    train/ActionL1Loss=0.3929
+    throughput/total_tokens=3,456,000
+    throughput/device/tokens_per_second=1,162
+    throughput/device/batches_per_second=0.0484
+[step=19/500000]
+    train/ActionL1Loss=0.3802
+    throughput/total_tokens=3,648,000
+    throughput/device/tokens_per_second=1,162
+    throughput/device/batches_per_second=0.0484
+[step=20/500000]
+    optim/total_grad_norm=29.43
+    train/ActionL1Loss=0.3528
+    throughput/total_tokens=3,840,000
+    throughput/device/tokens_per_second=1,162
+    throughput/device/batches_per_second=0.0484
+    System/Peak GPU Memory (MB)=46,917
+[step=21/500000]
+    train/ActionL1Loss=0.3761
+    throughput/total_tokens=4,032,000
+    throughput/device/tokens_per_second=1,162
+    throughput/device/batches_per_second=0.0484
+[step=22/500000]
+    train/ActionL1Loss=0.3916
+    throughput/total_tokens=4,224,000
+    throughput/device/tokens_per_second=1,162
+    throughput/device/batches_per_second=0.0484
+[step=23/500000]
+    train/ActionL1Loss=0.3271
+    throughput/total_tokens=4,416,000
+    throughput/device/tokens_per_second=1,164
+    throughput/device/batches_per_second=0.0485
+[step=24/500000]
+    train/ActionL1Loss=0.3833
+    throughput/total_tokens=4,608,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=25/500000]
+    train/ActionL1Loss=0.3419
+    throughput/total_tokens=4,800,000
+    throughput/device/tokens_per_second=1,167
+    throughput/device/batches_per_second=0.0487
+[step=26/500000]
+    train/ActionL1Loss=0.3660
+    throughput/total_tokens=4,992,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=27/500000]
+    train/ActionL1Loss=0.3771
+    throughput/total_tokens=5,184,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=28/500000]
+    train/ActionL1Loss=0.3350
+    throughput/total_tokens=5,376,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=29/500000]
+    train/ActionL1Loss=0.4330
+    throughput/total_tokens=5,568,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=30/500000]
+    train/ActionL1Loss=0.3133
+    throughput/total_tokens=5,760,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+    System/Peak GPU Memory (MB)=46,917
+[step=31/500000]
+    train/ActionL1Loss=0.3785
+    throughput/total_tokens=5,952,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=32/500000]
+    train/ActionL1Loss=0.3183
+    throughput/total_tokens=6,144,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=33/500000]
+    train/ActionL1Loss=0.3700
+    throughput/total_tokens=6,336,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=34/500000]
+    train/ActionL1Loss=0.3268
+    throughput/total_tokens=6,528,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=35/500000]
+    train/ActionL1Loss=0.3539
+    throughput/total_tokens=6,720,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=36/500000]
+    train/ActionL1Loss=0.3596
+    throughput/total_tokens=6,912,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=37/500000]
+    train/ActionL1Loss=0.3529
+    throughput/total_tokens=7,104,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=38/500000]
+    train/ActionL1Loss=0.3620
+    throughput/total_tokens=7,296,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=39/500000]
+    train/ActionL1Loss=0.3647
+    throughput/total_tokens=7,488,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=40/500000]
+    optim/total_grad_norm=21.94
+    train/ActionL1Loss=0.3782
+    throughput/total_tokens=7,680,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+    System/Peak GPU Memory (MB)=46,917
+[step=41/500000]
+    train/ActionL1Loss=0.3259
+    throughput/total_tokens=7,872,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+[step=42/500000]
+    train/ActionL1Loss=0.3395
+    throughput/total_tokens=8,064,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+[step=43/500000]
+    train/ActionL1Loss=0.3244
+    throughput/total_tokens=8,256,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=44/500000]
+    train/ActionL1Loss=0.3537
+    throughput/total_tokens=8,448,000
+    throughput/device/tokens_per_second=1,167
+    throughput/device/batches_per_second=0.0486
+[step=45/500000]
+    train/ActionL1Loss=0.3423
+    throughput/total_tokens=8,640,000
+    throughput/device/tokens_per_second=1,167
+    throughput/device/batches_per_second=0.0486
+[step=46/500000]
+    train/ActionL1Loss=0.3216
+    throughput/total_tokens=8,832,000
+    throughput/device/tokens_per_second=1,167
+    throughput/device/batches_per_second=0.0487
+[step=47/500000]
+    train/ActionL1Loss=0.3626
+    throughput/total_tokens=9,024,000
+    throughput/device/tokens_per_second=1,167
+    throughput/device/batches_per_second=0.0487
+[step=48/500000]
+    train/ActionL1Loss=0.3210
+    throughput/total_tokens=9,216,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=49/500000]
+    train/ActionL1Loss=0.3180
+    throughput/total_tokens=9,408,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487
+[step=50/500000]
+    train/ActionL1Loss=0.3499
+    throughput/total_tokens=9,600,000
+    throughput/device/tokens_per_second=1,169
+    throughput/device/batches_per_second=0.0487
+    System/Peak GPU Memory (MB)=46,917
+[step=51/500000]
+    train/ActionL1Loss=0.3278
+    throughput/total_tokens=9,792,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=52/500000]
+    train/ActionL1Loss=0.3730
+    throughput/total_tokens=9,984,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=53/500000]
+    train/ActionL1Loss=0.3430
+    throughput/total_tokens=10,176,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=54/500000]
+    train/ActionL1Loss=0.3628
+    throughput/total_tokens=10,368,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=55/500000]
+    train/ActionL1Loss=0.3139
+    throughput/total_tokens=10,560,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=56/500000]
+    train/ActionL1Loss=0.3347
+    throughput/total_tokens=10,752,000
+    throughput/device/tokens_per_second=1,166
+    throughput/device/batches_per_second=0.0486
+[step=57/500000]
+    train/ActionL1Loss=0.3825
+    throughput/total_tokens=10,944,000
+    throughput/device/tokens_per_second=1,165
+    throughput/device/batches_per_second=0.0486
+[step=58/500000]
+    train/ActionL1Loss=0.3657
+    throughput/total_tokens=11,136,000
+    throughput/device/tokens_per_second=1,165
+    throughput/device/batches_per_second=0.0486
+[step=59/500000]
+    train/ActionL1Loss=0.3329
+    throughput/total_tokens=11,328,000
+    throughput/device/tokens_per_second=1,165
+    throughput/device/batches_per_second=0.0486
+[step=60/500000]
+    optim/total_grad_norm=28.20
+    train/ActionL1Loss=0.3882
+    throughput/total_tokens=11,520,000
+    throughput/device/tokens_per_second=1,165
+    throughput/device/batches_per_second=0.0485
+    System/Peak GPU Memory (MB)=46,917

wandb/wandb/run-20251002_151047-gal9lnsm/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/wandb/run-20251002_151047-gal9lnsm/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,203 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-02T15:10:47.778990Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "realworld",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_cleandesk50.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-260",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "56241074176"
+    }
+  },
+  "memory":  {
+    "total":  "2434606956544"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "2",
+      "uniqueId":  "0x9815965a899d8053",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x4213cc9eeeefc98d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0xd79d4a081e34548d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xd7a6e11358a6574d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x2d75dae36f0dc353",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x702e8efb76b00c21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x4493708eee1ee737",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xe35cdba2e3fafd21",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759676993",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2222",
+    "job_name":  "mh_cleandesk50",
+    "job_nodelist":  "auh7-1b-gpu-260",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759417793",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2222",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-260",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2555521",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-260",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "nw73z7xb5cgzo0hg2igu85u5fde2wemd"
+}

wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:10:47.93095326Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvxt8f_1d/port-2555704.txt","pid":2555704,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T15:10:47.932417878Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2555704}
+{"time":"2025-10-02T15:10:47.931479477Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2555704-2555872-3886299646/socket","Net":"unix"}}
+{"time":"2025-10-02T15:10:48.03182349Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T15:10:48.038756993Z","level":"INFO","msg":"handleInformInit: received","streamId":"gal9lnsm","id":"1(@)"}
+{"time":"2025-10-02T15:10:53.841859694Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gal9lnsm","id":"1(@)"}

wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:10:48.040587486Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:10:53.84149929Z","level":"INFO","msg":"stream: created new stream","id":"gal9lnsm"}
+{"time":"2025-10-02T15:10:53.841853494Z","level":"INFO","msg":"stream: started","id":"gal9lnsm"}
+{"time":"2025-10-02T15:10:53.841880754Z","level":"INFO","msg":"sender: started","stream_id":"gal9lnsm"}
+{"time":"2025-10-02T15:10:53.841894045Z","level":"INFO","msg":"writer: started","stream_id":"gal9lnsm"}
+{"time":"2025-10-02T15:10:53.842927647Z","level":"INFO","msg":"handler: started","stream_id":"gal9lnsm"}

wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20251002_154526-bw81vbs0/files/output.log ADDED Viewed

	@@ -0,0 +1,81 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [15:45:28] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [15:45:35] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+10/02 [15:45:36] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+****** length of the dataset: 18397
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f5ce66e1750>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/02 [15:45:42] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* After get lora params successfully
+10/02 [15:46:56] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=35,614
+10/02 [15:46:57] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+[step=1/500000]
+    train/ActionL1Loss=0.5548
+    throughput/total_tokens=192,000
+    System/Peak GPU Memory (MB)=40,144
+[step=2/500000]
+    train/ActionL1Loss=0.6130
+    throughput/total_tokens=384,000
+    throughput/device/tokens_per_second=1,170
+    throughput/device/batches_per_second=0.0488
+    System/Peak GPU Memory (MB)=46,917
+[step=3/500000]
+    train/ActionL1Loss=0.6006
+    throughput/total_tokens=576,000
+    throughput/device/tokens_per_second=999.5
+    throughput/device/batches_per_second=0.0416
+[step=4/500000]
+    train/ActionL1Loss=0.5381
+    throughput/total_tokens=768,000
+    throughput/device/tokens_per_second=898.3
+    throughput/device/batches_per_second=0.0374
+[step=5/500000]
+    train/ActionL1Loss=0.4982
+    throughput/total_tokens=960,000
+    throughput/device/tokens_per_second=851.4
+    throughput/device/batches_per_second=0.0355

wandb/wandb/run-20251002_154526-bw81vbs0/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:45:27.013808824Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpr0uhv_6g/port-2561337.txt","pid":2561337,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T15:45:27.014628014Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2561337}
+{"time":"2025-10-02T15:45:27.014614203Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2561337-2561508-3882934790/socket","Net":"unix"}}
+{"time":"2025-10-02T15:45:27.192771801Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T15:45:27.199344999Z","level":"INFO","msg":"handleInformInit: received","streamId":"bw81vbs0","id":"1(@)"}
+{"time":"2025-10-02T15:45:28.225110984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"bw81vbs0","id":"1(@)"}

wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:45:27.201294023Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:45:28.225059133Z","level":"INFO","msg":"stream: created new stream","id":"bw81vbs0"}
+{"time":"2025-10-02T15:45:28.225105354Z","level":"INFO","msg":"stream: started","id":"bw81vbs0"}
+{"time":"2025-10-02T15:45:28.225129364Z","level":"INFO","msg":"writer: started","stream_id":"bw81vbs0"}
+{"time":"2025-10-02T15:45:28.225137364Z","level":"INFO","msg":"sender: started","stream_id":"bw81vbs0"}
+{"time":"2025-10-02T15:45:28.225195135Z","level":"INFO","msg":"handler: started","stream_id":"bw81vbs0"}

wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug.log ADDED Viewed

File without changes

wandb/wandb/run-20251002_155015-xojint20/files/output.log ADDED Viewed

	@@ -0,0 +1,88 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [15:50:17] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [15:50:19] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+10/02 [15:50:20] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+****** length of the dataset: 18397
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f8994997820>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/02 [15:50:26] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* After get lora params successfully
+10/02 [15:51:44] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 36856
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=36,856
+10/02 [15:51:45] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+10/02 [15:51:52] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py:967: UserWarning: The .grad attribute  warnings.py:109
+                          of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed
+                          want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor
+                          by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered
+                          internally at /pytorch/build/aten/src/ATen/core/TensorBody.h:489.)
+                            param_grad = param.grad
+[step=1/500000]
+    train/ActionNoiseL2Loss=1.632
+    throughput/total_tokens=192,000
+    System/Peak GPU Memory (MB)=39,644
+[step=2/500000]
+    train/ActionNoiseL2Loss=1.683
+    throughput/total_tokens=384,000
+    throughput/device/tokens_per_second=1,194
+    throughput/device/batches_per_second=0.0498
+    System/Peak GPU Memory (MB)=46,466
+[step=3/500000]
+    train/ActionNoiseL2Loss=1.640
+    throughput/total_tokens=576,000
+    throughput/device/tokens_per_second=1,176
+    throughput/device/batches_per_second=0.0490
+[step=4/500000]
+    train/ActionNoiseL2Loss=1.547
+    throughput/total_tokens=768,000
+    throughput/device/tokens_per_second=1,171
+    throughput/device/batches_per_second=0.0488
+[step=5/500000]
+    train/ActionNoiseL2Loss=1.508
+    throughput/total_tokens=960,000
+    throughput/device/tokens_per_second=1,168
+    throughput/device/batches_per_second=0.0487

wandb/wandb/run-20251002_155015-xojint20/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/wandb/run-20251002_155015-xojint20/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:50:15.670289561Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpzs22pa0_/port-2563820.txt","pid":2563820,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T15:50:15.670880688Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2563820}
+{"time":"2025-10-02T15:50:15.670869618Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2563820-2563984-1205114273/socket","Net":"unix"}}
+{"time":"2025-10-02T15:50:15.848134405Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T15:50:15.855061477Z","level":"INFO","msg":"handleInformInit: received","streamId":"xojint20","id":"1(@)"}
+{"time":"2025-10-02T15:50:17.025989793Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xojint20","id":"1(@)"}

wandb/wandb/run-20251002_155015-xojint20/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:50:15.85696447Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:50:17.025925463Z","level":"INFO","msg":"stream: created new stream","id":"xojint20"}
+{"time":"2025-10-02T15:50:17.025984043Z","level":"INFO","msg":"stream: started","id":"xojint20"}
+{"time":"2025-10-02T15:50:17.026005264Z","level":"INFO","msg":"sender: started","stream_id":"xojint20"}
+{"time":"2025-10-02T15:50:17.026005514Z","level":"INFO","msg":"writer: started","stream_id":"xojint20"}
+{"time":"2025-10-02T15:50:17.026057304Z","level":"INFO","msg":"handler: started","stream_id":"xojint20"}

wandb/wandb/run-20251002_155015-xojint20/run-xojint20.wandb ADDED Viewed

Binary file (65.5 kB). View file

wandb/wandb/run-20251002_155441-70dhy5dq/files/output.log ADDED Viewed

	@@ -0,0 +1,318 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [15:54:43] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [15:54:49] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+10/02 [15:54:50] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
+****** length of the dataset: 10316
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f57cc61bdc0>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/02 [15:54:55] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* After get lora params successfully
+10/02 [15:56:08] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=35,614
+                 WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+[step=1/500000]
+    train/ActionL1Loss=0.6604
+    throughput/total_tokens=192,000
+    System/Peak GPU Memory (MB)=40,144
+[step=2/500000]
+    train/ActionL1Loss=0.6686
+    throughput/total_tokens=384,000
+    throughput/device/tokens_per_second=1,220
+    throughput/device/batches_per_second=0.0508
+    System/Peak GPU Memory (MB)=46,917
+[step=3/500000]
+    train/ActionL1Loss=0.6322
+    throughput/total_tokens=576,000
+    throughput/device/tokens_per_second=1,199
+    throughput/device/batches_per_second=0.0500
+[step=4/500000]
+    train/ActionL1Loss=0.6230
+    throughput/total_tokens=768,000
+    throughput/device/tokens_per_second=1,190
+    throughput/device/batches_per_second=0.0496
+[step=5/500000]
+    train/ActionL1Loss=0.5815
+    throughput/total_tokens=960,000
+    throughput/device/tokens_per_second=1,187
+    throughput/device/batches_per_second=0.0495
+[step=6/500000]
+    train/ActionL1Loss=0.5807
+    throughput/total_tokens=1,152,000
+    throughput/device/tokens_per_second=1,185
+    throughput/device/batches_per_second=0.0494
+[step=7/500000]
+    train/ActionL1Loss=0.5010
+    throughput/total_tokens=1,344,000
+    throughput/device/tokens_per_second=1,184
+    throughput/device/batches_per_second=0.0493
+[step=8/500000]
+    train/ActionL1Loss=0.5155
+    throughput/total_tokens=1,536,000
+    throughput/device/tokens_per_second=1,183
+    throughput/device/batches_per_second=0.0493
+[step=9/500000]
+    train/ActionL1Loss=0.5458
+    throughput/total_tokens=1,728,000
+    throughput/device/tokens_per_second=1,182
+    throughput/device/batches_per_second=0.0493
+[step=10/500000]
+    train/ActionL1Loss=0.4240
+    throughput/total_tokens=1,920,000
+    throughput/device/tokens_per_second=1,182
+    throughput/device/batches_per_second=0.0493
+    System/Peak GPU Memory (MB)=46,917
+[step=11/500000]
+    train/ActionL1Loss=0.4684
+    throughput/total_tokens=2,112,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=12/500000]
+    train/ActionL1Loss=0.4157
+    throughput/total_tokens=2,304,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=13/500000]
+    train/ActionL1Loss=0.5035
+    throughput/total_tokens=2,496,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=14/500000]
+    train/ActionL1Loss=0.4165
+    throughput/total_tokens=2,688,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=15/500000]
+    train/ActionL1Loss=0.3336
+    throughput/total_tokens=2,880,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=16/500000]
+    train/ActionL1Loss=0.4032
+    throughput/total_tokens=3,072,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=17/500000]
+    train/ActionL1Loss=0.4553
+    throughput/total_tokens=3,264,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=18/500000]
+    train/ActionL1Loss=0.5436
+    throughput/total_tokens=3,456,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=19/500000]
+    train/ActionL1Loss=0.3642
+    throughput/total_tokens=3,648,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=20/500000]
+    optim/total_grad_norm=21.01
+    train/ActionL1Loss=0.4468
+    throughput/total_tokens=3,840,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+    System/Peak GPU Memory (MB)=46,917
+[step=21/500000]
+    train/ActionL1Loss=0.4660
+    throughput/total_tokens=4,032,000
+    throughput/device/tokens_per_second=1,179
+    throughput/device/batches_per_second=0.0492
+[step=22/500000]
+    train/ActionL1Loss=0.3718
+    throughput/total_tokens=4,224,000
+    throughput/device/tokens_per_second=1,179
+    throughput/device/batches_per_second=0.0492
+[step=23/500000]
+    train/ActionL1Loss=0.4880
+    throughput/total_tokens=4,416,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=24/500000]
+    train/ActionL1Loss=0.4259
+    throughput/total_tokens=4,608,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=25/500000]
+    train/ActionL1Loss=0.4473
+    throughput/total_tokens=4,800,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=26/500000]
+    train/ActionL1Loss=0.4736
+    throughput/total_tokens=4,992,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=27/500000]
+    train/ActionL1Loss=0.4105
+    throughput/total_tokens=5,184,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=28/500000]
+    train/ActionL1Loss=0.4386
+    throughput/total_tokens=5,376,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=29/500000]
+    train/ActionL1Loss=0.4463
+    throughput/total_tokens=5,568,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=30/500000]
+    train/ActionL1Loss=0.4582
+    throughput/total_tokens=5,760,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+    System/Peak GPU Memory (MB)=46,917
+[step=31/500000]
+    train/ActionL1Loss=0.3000
+    throughput/total_tokens=5,952,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=32/500000]
+    train/ActionL1Loss=0.4196
+    throughput/total_tokens=6,144,000
+    throughput/device/tokens_per_second=1,180
+    throughput/device/batches_per_second=0.0492
+[step=33/500000]
+    train/ActionL1Loss=0.4201
+    throughput/total_tokens=6,336,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=34/500000]
+    train/ActionL1Loss=0.3680
+    throughput/total_tokens=6,528,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=35/500000]
+    train/ActionL1Loss=0.3642
+    throughput/total_tokens=6,720,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=36/500000]
+    train/ActionL1Loss=0.4062
+    throughput/total_tokens=6,912,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=37/500000]
+    train/ActionL1Loss=0.4864
+    throughput/total_tokens=7,104,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=38/500000]
+    train/ActionL1Loss=0.4030
+    throughput/total_tokens=7,296,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=39/500000]
+    train/ActionL1Loss=0.3131
+    throughput/total_tokens=7,488,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=40/500000]
+    optim/total_grad_norm=17.23
+    train/ActionL1Loss=0.4256
+    throughput/total_tokens=7,680,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+    System/Peak GPU Memory (MB)=46,917
+[step=41/500000]
+    train/ActionL1Loss=0.3575
+    throughput/total_tokens=7,872,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=42/500000]
+    train/ActionL1Loss=0.4358
+    throughput/total_tokens=8,064,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=43/500000]
+    train/ActionL1Loss=0.2869
+    throughput/total_tokens=8,256,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=44/500000]
+    train/ActionL1Loss=0.4891
+    throughput/total_tokens=8,448,000
+    throughput/device/tokens_per_second=1,182
+    throughput/device/batches_per_second=0.0493
+[step=45/500000]
+    train/ActionL1Loss=0.3633
+    throughput/total_tokens=8,640,000
+    throughput/device/tokens_per_second=1,182
+    throughput/device/batches_per_second=0.0493
+[step=46/500000]
+    train/ActionL1Loss=0.3974
+    throughput/total_tokens=8,832,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=47/500000]
+    train/ActionL1Loss=0.3156
+    throughput/total_tokens=9,024,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=48/500000]
+    train/ActionL1Loss=0.4408
+    throughput/total_tokens=9,216,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=49/500000]
+    train/ActionL1Loss=0.3966
+    throughput/total_tokens=9,408,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+[step=50/500000]
+    train/ActionL1Loss=0.3903
+    throughput/total_tokens=9,600,000
+    throughput/device/tokens_per_second=1,181
+    throughput/device/batches_per_second=0.0492
+    System/Peak GPU Memory (MB)=46,917
+[step=51/500000]
+    train/ActionL1Loss=0.2963
+    throughput/total_tokens=9,792,000
+    throughput/device/tokens_per_second=1,177
+    throughput/device/batches_per_second=0.0491

wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T15:54:42.154138214Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-02T15:54:43.180595015Z","level":"INFO","msg":"stream: created new stream","id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180644946Z","level":"INFO","msg":"stream: started","id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180663737Z","level":"INFO","msg":"sender: started","stream_id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180659826Z","level":"INFO","msg":"writer: started","stream_id":"70dhy5dq"}
+{"time":"2025-10-02T15:54:43.180682767Z","level":"INFO","msg":"handler: started","stream_id":"70dhy5dq"}

wandb/wandb/run-20251002_155442-6v8q0jgn/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wipe/wandb/wandb/debug.log ADDED Viewed

File without changes

wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/output.log ADDED Viewed

	@@ -0,0 +1,15 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/02 [16:34:38] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/02 [16:34:45] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+                 INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:435
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe

wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-02T16:34:36.433466086Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmptn5gtmeu/port-1817135.txt","pid":1817135,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-02T16:34:36.434959359Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1817135}
+{"time":"2025-10-02T16:34:36.434944369Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1817135-1817304-2550672707/socket","Net":"unix"}}
+{"time":"2025-10-02T16:34:36.610751367Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-02T16:34:36.618123521Z","level":"INFO","msg":"handleInformInit: received","streamId":"itiyfljc","id":"1(@)"}
+{"time":"2025-10-02T16:34:37.749798524Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"itiyfljc","id":"1(@)"}

wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug.log ADDED Viewed

File without changes

wipe_flow_matching/step11500-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
+size 1331

wipe_flow_matching/step12000-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:995307502120af3866f237cd0bc484fc848a652539d28e53cbea882abc16ba6b
+size 1331

wipe_flow_matching/step12000-unsharded/lora.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b09055f15a54dd092b4dd30833406731057005822da0c55c16231cf2e68f7f6
+size 1243

wipe_flow_matching/step12000-unsharded/train.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd8bfbb0ee49ab78063ef6cfdd404afa5cc66b67c8d3c5bb7cd6db0cb4c048d5
+size 15061

wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2025-10-05T16:38:13.19911913Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-05T16:38:14.385618537Z","level":"INFO","msg":"stream: created new stream","id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385648767Z","level":"INFO","msg":"stream: started","id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385660457Z","level":"INFO","msg":"handler: started","stream_id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385655167Z","level":"INFO","msg":"writer: started","stream_id":"0cfqmuqw"}
+{"time":"2025-10-05T16:38:14.385680798Z","level":"INFO","msg":"sender: started","stream_id":"0cfqmuqw"}
+{"time":"2025-10-06T16:34:15.587824169Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-06T18:35:03.703248769Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-10-06T20:02:36.97363154Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-07T05:02:26.79910172Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}

wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug.log ADDED Viewed

File without changes

wipe_l1_regression/step11500-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f3468869fca8c228cf940661de292bc786f2b18fa96d39a892606183f8dd9c0
+size 1331

wipe_l1_regression/step12000-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:620f952d81c645b6e811733acfcda83cb57d683353c484746edb322f5094c21e
+size 1331

wipe_l1_regression/step12000-unsharded/lora.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b09055f15a54dd092b4dd30833406731057005822da0c55c16231cf2e68f7f6
+size 1243

wipe_l1_regression/step12000-unsharded/train.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:724fab83190f8cbcb009297c5aa7582489c7d894b16beebfd03fdc7f888a2ce8
+size 15061

wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff