Henryeahhh commited on Oct 15, 2025

Commit

6d71685

verified ·

1 Parent(s): b00e6cc

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +9 -0
all_flow_matching/glue_best/config.yaml +316 -0
all_flow_matching/glue_best/step11000-action-head/metadata.pt +3 -0
all_flow_matching/glue_best/step11500-action-head/metadata.pt +3 -0
all_flow_matching/glue_best/step11500-unsharded/config.yaml +316 -0
all_flow_matching/glue_best/step11500-unsharded/lora.pt +3 -0
all_flow_matching/glue_best/step11500-unsharded/train.pt +3 -0
all_flow_matching/glue_best/step11500/config.yaml +316 -0
all_flow_matching/glue_best/wandb/wandb/debug-internal.log +13 -0
all_flow_matching/glue_best/wandb/wandb/debug.log +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/run-dnrnwv30.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/run-hmmpns57.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/config.yaml +611 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/output.log +33 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/run-wtatxotn.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/config.yaml +611 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/output.log +33 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/requirements.txt +283 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/wandb-metadata.json +204 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/wandb-summary.json +1 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/logs/debug-internal.log +11 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/run-6tj2c8pr.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/config.yaml +611 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/output.log +29 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/requirements.txt +283 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/wandb-metadata.json +204 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/wandb-summary.json +1 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/logs/debug-core.log +14 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/logs/debug-internal.log +11 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/logs/debug.log +1 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/run-qsv5q1hc.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/config.yaml +611 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/output.log +47 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/requirements.txt +283 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/wandb-metadata.json +204 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/wandb-summary.json +1 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/logs/debug-core.log +14 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/logs/debug-internal.log +11 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/logs/debug.log +1 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/run-lqn400wc.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/config.yaml +615 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/output.log +74 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/requirements.txt +285 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/wandb-metadata.json +204 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/wandb-summary.json +1 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/logs/debug-core.log +16 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/logs/debug-internal.log +12 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/logs/debug.log +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/run-rwm1qqvr.wandb +0 -0
all_flow_matching/glue_best/wandb/wandb/run-20250924_075956-zoletkkn/files/config.yaml +615 -0

.gitattributes CHANGED Viewed

@@ -42,3 +42,12 @@ wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/run-a1znetn8.wandb f
 cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/run-quokv8gn.wandb filter=lfs diff=lfs merge=lfs -text
 cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/run-fqdwkc8m.wandb filter=lfs diff=lfs merge=lfs -text
 glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/run-tmwli25x.wandb filter=lfs diff=lfs merge=lfs -text

 cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/run-quokv8gn.wandb filter=lfs diff=lfs merge=lfs -text
 cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/run-fqdwkc8m.wandb filter=lfs diff=lfs merge=lfs -text
 glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/run-tmwli25x.wandb filter=lfs diff=lfs merge=lfs -text
+glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/run-7ovz4jzt.wandb filter=lfs diff=lfs merge=lfs -text
+eraser_l1_regression/wandb/wandb/run-20251011_163844-qzez8pv7/run-qzez8pv7.wandb filter=lfs diff=lfs merge=lfs -text
+all_l1/wandb/wandb/run-20250930_131250-ea1k0g3y/run-ea1k0g3y.wandb filter=lfs diff=lfs merge=lfs -text
+all_flow_matching/glue_best/wandb/wandb/run-20250924_081723-x94cyrsz/run-x94cyrsz.wandb filter=lfs diff=lfs merge=lfs -text
+eraser_flow_matching/wandb/wandb/run-20251011_163832-yqnt28c8/run-yqnt28c8.wandb filter=lfs diff=lfs merge=lfs -text
+all_flow_matching/wandb/wandb/run-20250928_104655-3b31u4we/run-3b31u4we.wandb filter=lfs diff=lfs merge=lfs -text
+wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/run-0cfqmuqw.wandb filter=lfs diff=lfs merge=lfs -text
+pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/run-a381qnn9.wandb filter=lfs diff=lfs merge=lfs -text
+cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/run-gqyapbwp.wandb filter=lfs diff=lfs merge=lfs -text

all_flow_matching/glue_best/config.yaml ADDED Viewed

	@@ -0,0 +1,316 @@

+run_name: glue_20250924_082336
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: false
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: false
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 32
+lora_llm: true
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20250924_082336
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

all_flow_matching/glue_best/step11000-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91bd03dced0b2509ade669c28c4f205463e4b4e83b54d6726754eeb8ea952bfe
+size 1331

all_flow_matching/glue_best/step11500-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
+size 1331

all_flow_matching/glue_best/step11500-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,316 @@

+run_name: glue_20250924_082336
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: false
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: false
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 32
+lora_llm: true
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20250924_082336
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

all_flow_matching/glue_best/step11500-unsharded/lora.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6269c7cb774e69a5c43b4109cffb347c2936232c5222e2b8a75056ee1188671d
+size 304417027

all_flow_matching/glue_best/step11500-unsharded/train.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b1734f30ac548c86aa66c22a5cfb32bd2320a41d56faab841b4fd53020a6d1b
+size 15061

all_flow_matching/glue_best/step11500/config.yaml ADDED Viewed

	@@ -0,0 +1,316 @@

+run_name: glue_20250924_082336
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: false
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: false
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 32
+lora_llm: true
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: glue_20250924_082336
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

all_flow_matching/glue_best/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,13 @@

+{"time":"2025-09-24T08:24:16.705237241Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-09-24T08:24:17.754431561Z","level":"INFO","msg":"stream: created new stream","id":"4dp69fok"}
+{"time":"2025-09-24T08:24:17.754478082Z","level":"INFO","msg":"stream: started","id":"4dp69fok"}
+{"time":"2025-09-24T08:24:17.754498402Z","level":"INFO","msg":"sender: started","stream_id":"4dp69fok"}
+{"time":"2025-09-24T08:24:17.754506202Z","level":"INFO","msg":"writer: started","stream_id":"4dp69fok"}
+{"time":"2025-09-24T08:24:17.754546793Z","level":"INFO","msg":"handler: started","stream_id":"4dp69fok"}
+{"time":"2025-09-24T15:15:45.267501791Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-09-24T20:24:27.534186056Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-09-25T23:01:28.093149981Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-09-26T02:33:15.940926228Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/4dp69fok/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-09-26T19:36:48.428667728Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/4dp69fok/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-09-26T20:06:50.687851553Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-09-26T20:59:26.86775551Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}

all_flow_matching/glue_best/wandb/wandb/debug.log ADDED Viewed

File without changes

all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/run-dnrnwv30.wandb ADDED Viewed

Binary file (8.79 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/run-hmmpns57.wandb ADDED Viewed

Binary file (8.79 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/config.yaml ADDED Viewed

	@@ -0,0 +1,611 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            o421nvn5u6ub6ruog26gg83x0g2lmgbt:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "768"
+                    - --lora_rank
+                    - "32"
+                    - --lora_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "22"
+                    - --global_batch_size
+                    - "176"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - glue
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "51147874304"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: c13f2763af61e0d729a8b5ab4bdefc512205bcc5
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x137c9ede1bb1518e"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x21a2e88d06c419dc"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x399226d2b2bfa544"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x3558c3014c813fdb"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf61ec17df11883bd"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x9b5c1c302c8129f8"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xfa8b85a4625b04f"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa515afd8ced1d39d"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-188
+                memory:
+                    total: "2434606952448"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1758954648"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "1605"
+                    job_name: realworld_mh
+                    job_nodelist: auh7-1b-gpu-188
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1758695448"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "1605"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-188
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "2191329"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-188
+                    topology_addr_pattern: node
+                startedAt: "2025-09-24T06:31:28.005264Z"
+                writerId: o421nvn5u6ub6ruog26gg83x0g2lmgbt
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 13
+                - 15
+                - 16
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 768
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 22
+device_train_grad_accum:
+    value: 1
+device_train_microbatch_size:
+    value: 22
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 768
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: false
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 176
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: true
+lora_rank:
+    value: 32
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_use_left_eef: false
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: glue_20250924_063100
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: true

all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/output.log ADDED Viewed

	@@ -0,0 +1,33 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+09/24 [06:31:30] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': 'Lerobot_Glue_best', 'path': '/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best', 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/mnt/data2/guominghao/a1/warehouse/glue_lerobot', 0.6, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 0.4, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 0.1, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+build_tokenizer, cache_dir None tokenizer_dir None
+09/24 [06:31:31] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:128
+09/24 [06:31:32] INFO     | >> build_rlds_train_dataset: Loading train dataset: vla_dataset_realworld/train                                                          __init__.py:517
+****** Import RLDSBatchTransform, RLDSDataset successfully.
+****** before RLDS dataset...
+****** data_config.rlds_dataset_name: Lerobot_Glue_best
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py", line 397, in <module>
+    train(cfg)
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 160, in main
+    train_loader = build_train_dataloader(cfg, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 196, in build_train_dataloader
+    return build_vla_train_dataloader(train_config, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 314, in build_vla_train_dataloader
+    ds = build_rlds_train_dataset(train_config, _normalization_type, _image_augmentation, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 527, in build_rlds_train_dataset
+    dataset = RLDSDataset(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/rlds_datasets.py", line 355, in __init__
+    per_dataset_kwargs, weights = get_oxe_dataset_kwargs_and_weights(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/rlds/oxe/materialize.py", line 119, in get_oxe_dataset_kwargs_and_weights
+    make_oxe_dataset_kwargs(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/rlds/oxe/materialize.py", line 31, in make_oxe_dataset_kwargs
+    dataset_kwargs = deepcopy(OXE_DATASET_CONFIGS[dataset_name])
+KeyError: 'Lerobot_Glue_best'

all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/run-wtatxotn.wandb ADDED Viewed

Binary file (18.9 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/config.yaml ADDED Viewed

	@@ -0,0 +1,611 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            71y4kqofohuhlolkoekjc4r6f1aprdzt:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "768"
+                    - --lora_rank
+                    - "32"
+                    - --lora_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "22"
+                    - --global_batch_size
+                    - "176"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - glue
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "51148013568"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: c13f2763af61e0d729a8b5ab4bdefc512205bcc5
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x9b5c1c302c8129f8"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf61ec17df11883bd"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x137c9ede1bb1518e"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xfa8b85a4625b04f"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x3558c3014c813fdb"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa515afd8ced1d39d"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x399226d2b2bfa544"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x21a2e88d06c419dc"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-188
+                memory:
+                    total: "2434606952448"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1758954964"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "1606"
+                    job_name: realworld_mh
+                    job_nodelist: auh7-1b-gpu-188
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1758695764"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "1606"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-188
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "2192665"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-188
+                    topology_addr_pattern: node
+                startedAt: "2025-09-24T06:36:42.806544Z"
+                writerId: 71y4kqofohuhlolkoekjc4r6f1aprdzt
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 13
+                - 15
+                - 16
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 768
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 22
+device_train_grad_accum:
+    value: 1
+device_train_microbatch_size:
+    value: 22
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 768
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: false
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 176
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: true
+lora_rank:
+    value: 32
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_use_left_eef: false
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: glue_20250924_063615
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: true

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/output.log ADDED Viewed

	@@ -0,0 +1,33 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+09/24 [06:36:44] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': 'Lerobot_Glue_best', 'path': '/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1', 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/mnt/data2/guominghao/a1/warehouse/glue_lerobot', 0.6, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 0.4, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 0.1, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+build_tokenizer, cache_dir None tokenizer_dir None
+09/24 [06:36:46] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:128
+09/24 [06:36:47] INFO     | >> build_rlds_train_dataset: Loading train dataset: vla_dataset_realworld/train                                                          __init__.py:517
+****** Import RLDSBatchTransform, RLDSDataset successfully.
+****** before RLDS dataset...
+****** data_config.rlds_dataset_name: Lerobot_Glue_best
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py", line 397, in <module>
+    train(cfg)
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 160, in main
+    train_loader = build_train_dataloader(cfg, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 196, in build_train_dataloader
+    return build_vla_train_dataloader(train_config, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 314, in build_vla_train_dataloader
+    ds = build_rlds_train_dataset(train_config, _normalization_type, _image_augmentation, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 527, in build_rlds_train_dataset
+    dataset = RLDSDataset(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/rlds_datasets.py", line 355, in __init__
+    per_dataset_kwargs, weights = get_oxe_dataset_kwargs_and_weights(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/rlds/oxe/materialize.py", line 119, in get_oxe_dataset_kwargs_and_weights
+    make_oxe_dataset_kwargs(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/rlds/oxe/materialize.py", line 31, in make_oxe_dataset_kwargs
+    dataset_kwargs = deepcopy(OXE_DATASET_CONFIGS[dataset_name])
+KeyError: 'Lerobot_Glue_best'

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,283 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-rocm==2.16.2
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+numpy==2.2.6
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-09-24T06:36:42.806544Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "768",
+    "--lora_rank",
+    "32",
+    "--lora_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "22",
+    "--global_batch_size",
+    "176",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "glue",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "c13f2763af61e0d729a8b5ab4bdefc512205bcc5"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-188",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "51148013568"
+    }
+  },
+  "memory":  {
+    "total":  "2434606952448"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "1",
+      "uniqueId":  "0x9b5c1c302c8129f8",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xf61ec17df11883bd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x137c9ede1bb1518e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xfa8b85a4625b04f",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x3558c3014c813fdb",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xa515afd8ced1d39d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x399226d2b2bfa544",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x21a2e88d06c419dc",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1758954964",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "1606",
+    "job_name":  "realworld_mh",
+    "job_nodelist":  "auh7-1b-gpu-188",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1758695764",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "1606",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-188",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2192665",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-188",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "71y4kqofohuhlolkoekjc4r6f1aprdzt"
+}

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_runtime":2,"_wandb":{"runtime":2}}

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-09-24T06:36:43.051707086Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-09-24T06:36:44.180319507Z","level":"INFO","msg":"stream: created new stream","id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:44.180366367Z","level":"INFO","msg":"stream: started","id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:44.180386688Z","level":"INFO","msg":"writer: started","stream_id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:44.180391338Z","level":"INFO","msg":"handler: started","stream_id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:44.180402908Z","level":"INFO","msg":"sender: started","stream_id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:47.096300319Z","level":"INFO","msg":"stream: closing","id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:48.366366183Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-24T06:36:48.706218134Z","level":"INFO","msg":"handler: closed","stream_id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:48.707592142Z","level":"INFO","msg":"sender: closed","stream_id":"6tj2c8pr"}
+{"time":"2025-09-24T06:36:48.707612462Z","level":"INFO","msg":"stream: closed","id":"6tj2c8pr"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/run-6tj2c8pr.wandb ADDED Viewed

Binary file (18.9 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/config.yaml ADDED Viewed

	@@ -0,0 +1,611 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            ym1fr90agfv5lp1xadwns4zfs5lnvysu:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "768"
+                    - --lora_rank
+                    - "32"
+                    - --lora_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "22"
+                    - --global_batch_size
+                    - "176"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - glue
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "51148275712"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: c13f2763af61e0d729a8b5ab4bdefc512205bcc5
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x399226d2b2bfa544"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x3558c3014c813fdb"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa515afd8ced1d39d"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x137c9ede1bb1518e"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xfa8b85a4625b04f"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x21a2e88d06c419dc"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x9b5c1c302c8129f8"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf61ec17df11883bd"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-188
+                memory:
+                    total: "2434606952448"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1758955952"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "1607"
+                    job_name: realworld_mh
+                    job_nodelist: auh7-1b-gpu-188
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1758696752"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "1607"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-188
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "2194698"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-188
+                    topology_addr_pattern: node
+                startedAt: "2025-09-24T06:53:10.958875Z"
+                writerId: ym1fr90agfv5lp1xadwns4zfs5lnvysu
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 13
+                - 15
+                - 16
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 768
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 22
+device_train_grad_accum:
+    value: 1
+device_train_microbatch_size:
+    value: 22
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 768
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: false
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 176
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: true
+lora_rank:
+    value: 32
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_use_left_eef: false
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: glue_20250924_065243
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: true

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/output.log ADDED Viewed

	@@ -0,0 +1,29 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+09/24 [06:53:12] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best', 0.6, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 0.4, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 0.1, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+09/24 [06:53:14] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:128
+09/24 [06:53:15] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:434
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py", line 397, in <module>
+    train(cfg)
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 160, in main
+    train_loader = build_train_dataloader(cfg, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 196, in build_train_dataloader
+    return build_vla_train_dataloader(train_config, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 327, in build_vla_train_dataloader
+    ds = build_lerobot_train_dataset(train_config, normalization_type,device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 435, in build_lerobot_train_dataset
+    from olmo.data.vla.lerobot_datasets import LeRobotDatasetWrapper
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/lerobot_datasets.py", line 71, in <module>
+    class LeRobotDatasetWrapper(Dataset):
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/lerobot_datasets.py", line 72, in LeRobotDatasetWrapper
+    def __init__(self, dataset_path, chunk_size=NUM_ACTIONS_CHUNK,
+NameError: name 'NUM_ACTIONS_CHUNK' is not defined

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,283 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-rocm==2.16.2
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+numpy==2.2.6
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-09-24T06:53:10.958875Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "768",
+    "--lora_rank",
+    "32",
+    "--lora_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "22",
+    "--global_batch_size",
+    "176",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "glue",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "c13f2763af61e0d729a8b5ab4bdefc512205bcc5"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-188",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "51148275712"
+    }
+  },
+  "memory":  {
+    "total":  "2434606952448"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "2",
+      "uniqueId":  "0x399226d2b2bfa544",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x3558c3014c813fdb",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xa515afd8ced1d39d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x137c9ede1bb1518e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xfa8b85a4625b04f",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x21a2e88d06c419dc",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0x9b5c1c302c8129f8",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xf61ec17df11883bd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1758955952",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "1607",
+    "job_name":  "realworld_mh",
+    "job_nodelist":  "auh7-1b-gpu-188",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1758696752",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "1607",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-188",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2194698",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-188",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "ym1fr90agfv5lp1xadwns4zfs5lnvysu"
+}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":2},"_runtime":2}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-09-24T06:53:11.01260872Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpysryscrf/port-2194776.txt","pid":2194776,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-09-24T06:53:11.013097116Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2194776}
+{"time":"2025-09-24T06:53:11.013088106Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2194776-2194946-2300508100/socket","Net":"unix"}}
+{"time":"2025-09-24T06:53:11.195667052Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-09-24T06:53:11.202657842Z","level":"INFO","msg":"handleInformInit: received","streamId":"qsv5q1hc","id":"1(@)"}
+{"time":"2025-09-24T06:53:12.34562108Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"qsv5q1hc","id":"1(@)"}
+{"time":"2025-09-24T06:53:15.425080291Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-09-24T06:53:15.425127242Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-09-24T06:53:15.425161212Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-09-24T06:53:15.425190602Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-09-24T06:53:15.425244213Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2194776-2194946-2300508100/socket","Net":"unix"}}
+{"time":"2025-09-24T06:53:16.766852773Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-09-24T06:53:16.767200728Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-09-24T06:53:16.767210378Z","level":"INFO","msg":"server is closed"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-09-24T06:53:11.204449275Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-09-24T06:53:12.345567379Z","level":"INFO","msg":"stream: created new stream","id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:12.345615649Z","level":"INFO","msg":"stream: started","id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:12.34563651Z","level":"INFO","msg":"sender: started","stream_id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:12.34563539Z","level":"INFO","msg":"writer: started","stream_id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:12.34568667Z","level":"INFO","msg":"handler: started","stream_id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:15.425126022Z","level":"INFO","msg":"stream: closing","id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:16.457729801Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-24T06:53:16.764520193Z","level":"INFO","msg":"handler: closed","stream_id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:16.765675218Z","level":"INFO","msg":"sender: closed","stream_id":"qsv5q1hc"}
+{"time":"2025-09-24T06:53:16.765705399Z","level":"INFO","msg":"stream: closed","id":"qsv5q1hc"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/logs/debug.log ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2025-09-24 06:53:15,425 INFO wandb-AsyncioManager-main:2194776 [service_client.py:_forward_responses():84] Reached EOF.

all_flow_matching/glue_best/wandb/wandb/run-20250924_065310-qsv5q1hc/run-qsv5q1hc.wandb ADDED Viewed

Binary file (17.9 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/config.yaml ADDED Viewed

	@@ -0,0 +1,611 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            pv6kdvw48bx7dygl9qkpmbu5bsrvk9dc:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "768"
+                    - --lora_rank
+                    - "32"
+                    - --lora_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "22"
+                    - --global_batch_size
+                    - "176"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - glue
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "51148382208"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: c13f2763af61e0d729a8b5ab4bdefc512205bcc5
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x3558c3014c813fdb"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xfa8b85a4625b04f"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x9b5c1c302c8129f8"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x399226d2b2bfa544"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa515afd8ced1d39d"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x137c9ede1bb1518e"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf61ec17df11883bd"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x21a2e88d06c419dc"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-188
+                memory:
+                    total: "2434606952448"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1758956113"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "1608"
+                    job_name: realworld_mh
+                    job_nodelist: auh7-1b-gpu-188
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1758696913"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "1608"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-188
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "2195813"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-188
+                    topology_addr_pattern: node
+                startedAt: "2025-09-24T06:55:50.673091Z"
+                writerId: pv6kdvw48bx7dygl9qkpmbu5bsrvk9dc
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 13
+                - 15
+                - 16
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 768
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 22
+device_train_grad_accum:
+    value: 1
+device_train_microbatch_size:
+    value: 22
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 768
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: false
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 176
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: true
+lora_rank:
+    value: 32
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_use_left_eef: false
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: glue_20250924_065523
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: true

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/output.log ADDED Viewed

	@@ -0,0 +1,47 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+09/24 [06:55:52] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best', 0.6, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 0.4, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 0.1, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+09/24 [06:55:54] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:128
+                 INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:434
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py", line 397, in <module>
+    train(cfg)
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 160, in main
+    train_loader = build_train_dataloader(cfg, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 196, in build_train_dataloader
+    return build_vla_train_dataloader(train_config, device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 327, in build_vla_train_dataloader
+    ds = build_lerobot_train_dataset(train_config, normalization_type,device)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 439, in build_lerobot_train_dataset
+    dataset = LeRobotDatasetWrapper(
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/lerobot_datasets.py", line 84, in __init__
+    dataset_demo = LeRobotDataset(repo_id=os.path.basename(dataset_path),root=dataset_path)
+  File "/vast/users/xiaodan/zhangjian/lerobot/src/lerobot/datasets/lerobot_dataset.py", line 610, in __init__
+    self.meta = LeRobotDatasetMetadata(
+  File "/vast/users/xiaodan/zhangjian/lerobot/src/lerobot/datasets/lerobot_dataset.py", line 101, in __init__
+    self.load_metadata()
+  File "/vast/users/xiaodan/zhangjian/lerobot/src/lerobot/datasets/lerobot_dataset.py", line 112, in load_metadata
+    check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION)
+  File "/vast/users/xiaodan/zhangjian/lerobot/src/lerobot/datasets/utils.py", line 487, in check_version_compatibility
+    raise BackwardCompatibilityError(repo_id, v_check)
+lerobot.datasets.backward_compatibility.BackwardCompatibilityError:
+The dataset you requested (Lerobot_Glue_best) is in 2.1 format.
+We introduced a new format since v3.0 which is not backward compatible with v2.1.
+Please, update your dataset to the new format using this command:
+```
+python -m lerobot.datasets.v30.convert_dataset_v21_to_v30 --repo-id=Lerobot_Glue_best
+```
+If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
+or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,283 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-rocm==2.16.2
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+numpy==2.2.6
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-09-24T06:55:50.673091Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "768",
+    "--lora_rank",
+    "32",
+    "--lora_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "22",
+    "--global_batch_size",
+    "176",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "glue",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "c13f2763af61e0d729a8b5ab4bdefc512205bcc5"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-188",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "51148382208"
+    }
+  },
+  "memory":  {
+    "total":  "2434606952448"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "0",
+      "uniqueId":  "0x3558c3014c813fdb",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xfa8b85a4625b04f",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0x9b5c1c302c8129f8",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x399226d2b2bfa544",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xa515afd8ced1d39d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x137c9ede1bb1518e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xf61ec17df11883bd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x21a2e88d06c419dc",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1758956113",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "1608",
+    "job_name":  "realworld_mh",
+    "job_nodelist":  "auh7-1b-gpu-188",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1758696913",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "1608",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-188",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2195813",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-188",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "pv6kdvw48bx7dygl9qkpmbu5bsrvk9dc"
+}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":2},"_runtime":2}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-09-24T06:55:50.723563798Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjs8cyprs/port-2195891.txt","pid":2195891,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-09-24T06:55:50.72450232Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2195891}
+{"time":"2025-09-24T06:55:50.724421019Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2195891-2196055-1725862429/socket","Net":"unix"}}
+{"time":"2025-09-24T06:55:50.908962781Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-09-24T06:55:50.915479784Z","level":"INFO","msg":"handleInformInit: received","streamId":"lqn400wc","id":"1(@)"}
+{"time":"2025-09-24T06:55:52.044031974Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"lqn400wc","id":"1(@)"}
+{"time":"2025-09-24T06:55:54.891857034Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-09-24T06:55:54.89233836Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-09-24T06:55:54.89233283Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-09-24T06:55:54.89236797Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-09-24T06:55:54.892429561Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2195891-2196055-1725862429/socket","Net":"unix"}}
+{"time":"2025-09-24T06:55:56.94476761Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-09-24T06:55:56.944779601Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-09-24T06:55:56.944793901Z","level":"INFO","msg":"server is closed"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-09-24T06:55:50.917562781Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-09-24T06:55:52.043989044Z","level":"INFO","msg":"stream: created new stream","id":"lqn400wc"}
+{"time":"2025-09-24T06:55:52.044025974Z","level":"INFO","msg":"stream: started","id":"lqn400wc"}
+{"time":"2025-09-24T06:55:52.044043335Z","level":"INFO","msg":"handler: started","stream_id":"lqn400wc"}
+{"time":"2025-09-24T06:55:52.044047115Z","level":"INFO","msg":"writer: started","stream_id":"lqn400wc"}
+{"time":"2025-09-24T06:55:52.044082945Z","level":"INFO","msg":"sender: started","stream_id":"lqn400wc"}
+{"time":"2025-09-24T06:55:54.8923419Z","level":"INFO","msg":"stream: closing","id":"lqn400wc"}
+{"time":"2025-09-24T06:55:56.556098297Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-24T06:55:56.941936164Z","level":"INFO","msg":"handler: closed","stream_id":"lqn400wc"}
+{"time":"2025-09-24T06:55:56.943528235Z","level":"INFO","msg":"sender: closed","stream_id":"lqn400wc"}
+{"time":"2025-09-24T06:55:56.943536815Z","level":"INFO","msg":"stream: closed","id":"lqn400wc"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/logs/debug.log ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2025-09-24 06:55:54,891 INFO wandb-AsyncioManager-main:2195891 [service_client.py:_forward_responses():84] Reached EOF.

all_flow_matching/glue_best/wandb/wandb/run-20250924_065550-lqn400wc/run-lqn400wc.wandb ADDED Viewed

Binary file (19.8 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/config.yaml ADDED Viewed

	@@ -0,0 +1,615 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            1pt1kzn3156onku1dbmbvrh2eyknhblo:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "768"
+                    - --lora_rank
+                    - "32"
+                    - --lora_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "22"
+                    - --global_batch_size
+                    - "176"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - glue
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "50552754176"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: c13f2763af61e0d729a8b5ab4bdefc512205bcc5
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xab01f34fc0edbb6e"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x37e5d0f3d8682cca"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf07610cbfae55ec0"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x9a76422d710d96e6"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xe1b4f7497ad1d2db"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x2e0c5f8d27fbe8f1"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xca50e2816c5058ba"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xfaa84ccf6c76f5e3"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-295
+                memory:
+                    total: "2434606931968"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1758959301"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "1610"
+                    job_name: realworld_mh
+                    job_nodelist: auh7-1b-gpu-295
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1758700101"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "1610"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-295
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "3944397"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-295
+                    topology_addr_pattern: node
+                startedAt: "2025-09-24T07:49:27.634943Z"
+                writerId: 1pt1kzn3156onku1dbmbvrh2eyknhblo
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 2
+                - 13
+                - 15
+                - 16
+                - 61
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "10":
+                - 19
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 768
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 22
+device_train_grad_accum:
+    value: 1
+device_train_microbatch_size:
+    value: 22
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 768
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: false
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 176
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: true
+lora_rank:
+    value: 32
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_use_left_eef: false
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: glue_20250924_074844
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: true

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/output.log ADDED Viewed

	@@ -0,0 +1,74 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+09/24 [07:49:29] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best', 0.6, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 0.4, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 0.1, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+09/24 [07:49:31] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:128
+09/24 [07:49:32] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:434
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/a1/Lerobot_Glue_best
+****** length of the dataset: 17698
+****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
+****** Expect one of: []
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f68482ee830>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+09/24 [07:49:49] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
+unexpected keys: []
+************************* Initialize model successful!
+************************* LoRA flags: use_lora=True, lora_llm=True, lora_vit=False, lora_connector=False
+************************* Before add lora to model
+************************* Add lora to model.transformer successful!
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+************* Before get lora params
+************* get lora params name: llm_params[0]: blocks.6._fsdp_wrapped_module.ff_out.parametrizations.weight.0.lora_A
+************* After get lora params successfully
+09/24 [07:51:21] INFO     | >> Constructing optimizer with 1 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 39071
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=39,071
+!!!Training failed:
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 593, in main
+    trainer.fit()
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 2277, in fit
+    for batch in self.train_loader:
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
+    data = self._next_data()
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 43, in fetch
+    return self.collate_fn(data)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py", line 158, in __call__
+    self._add_action_tokens_to_batch(batch)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py", line 254, in _add_action_tokens_to_batch
+    action_tokens = self._build_action_tokens()
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py", line 337, in _build_action_tokens
+    assert len(right_eef_tokens) == ACTION_DIMS_MAPPING['right_end_effector']
+AssertionError
+wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,285 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-rocm==2.16.2
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-09-24T07:49:27.634943Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "768",
+    "--lora_rank",
+    "32",
+    "--lora_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "22",
+    "--global_batch_size",
+    "176",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "glue",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "c13f2763af61e0d729a8b5ab4bdefc512205bcc5"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
+  "host":  "auh7-1b-gpu-295",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50552754176"
+    }
+  },
+  "memory":  {
+    "total":  "2434606931968"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "2",
+      "uniqueId":  "0xab01f34fc0edbb6e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x37e5d0f3d8682cca",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xf07610cbfae55ec0",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0x9a76422d710d96e6",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0xe1b4f7497ad1d2db",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x2e0c5f8d27fbe8f1",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0xca50e2816c5058ba",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xfaa84ccf6c76f5e3",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1758959301",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "1610",
+    "job_name":  "realworld_mh",
+    "job_nodelist":  "auh7-1b-gpu-295",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1758700101",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "1610",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-295",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "3944397",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-295",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "1pt1kzn3156onku1dbmbvrh2eyknhblo"
+}

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_runtime":113.732812458,"System/Peak GPU Memory (MB)":39071.62890625,"_timestamp":1.7587002818870535e+09,"_step":0,"_wandb":{"runtime":113}}

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-09-24T07:49:27.889617724Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpsbiq8ish/port-3944478.txt","pid":3944478,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-09-24T07:49:27.891870094Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3944478}
+{"time":"2025-09-24T07:49:27.892998519Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3944478-3944654-2489211586/socket","Net":"unix"}}
+{"time":"2025-09-24T07:49:28.091672549Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-09-24T07:49:28.107596353Z","level":"INFO","msg":"handleInformInit: received","streamId":"rwm1qqvr","id":"1(@)"}
+{"time":"2025-09-24T07:49:29.254468509Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"rwm1qqvr","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.363561879Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"rwm1qqvr","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367101887Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"rwm1qqvr","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367107627Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367113717Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367121007Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-09-24T07:51:25.367120867Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367156617Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367160127Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-09-24T07:51:25.367206328Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3944478-3944654-2489211586/socket","Net":"unix"}}
+{"time":"2025-09-24T07:51:25.367223268Z","level":"INFO","msg":"server is closed"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-09-24T07:49:28.109477949Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-09-24T07:49:29.254414398Z","level":"INFO","msg":"stream: created new stream","id":"rwm1qqvr"}
+{"time":"2025-09-24T07:49:29.254462629Z","level":"INFO","msg":"stream: started","id":"rwm1qqvr"}
+{"time":"2025-09-24T07:49:29.254479529Z","level":"INFO","msg":"writer: started","stream_id":"rwm1qqvr"}
+{"time":"2025-09-24T07:49:29.254487509Z","level":"INFO","msg":"handler: started","stream_id":"rwm1qqvr"}
+{"time":"2025-09-24T07:49:29.254512839Z","level":"INFO","msg":"sender: started","stream_id":"rwm1qqvr"}
+{"time":"2025-09-24T07:51:23.341950674Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":1.454277139}],"total_operations":1}}
+{"time":"2025-09-24T07:51:24.999785419Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-24T07:51:25.363888344Z","level":"INFO","msg":"stream: closing","id":"rwm1qqvr"}
+{"time":"2025-09-24T07:51:25.363901414Z","level":"INFO","msg":"handler: closed","stream_id":"rwm1qqvr"}
+{"time":"2025-09-24T07:51:25.36514555Z","level":"INFO","msg":"sender: closed","stream_id":"rwm1qqvr"}
+{"time":"2025-09-24T07:51:25.36515125Z","level":"INFO","msg":"stream: closed","id":"rwm1qqvr"}

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/logs/debug.log ADDED Viewed

File without changes

all_flow_matching/glue_best/wandb/wandb/run-20250924_074927-rwm1qqvr/run-rwm1qqvr.wandb ADDED Viewed

Binary file (54.6 kB). View file

all_flow_matching/glue_best/wandb/wandb/run-20250924_075956-zoletkkn/files/config.yaml ADDED Viewed

	@@ -0,0 +1,615 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            89offtg18nkl0daugw7ob6ogc3vo0r47:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "768"
+                    - --lora_rank
+                    - "32"
+                    - --lora_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "22"
+                    - --global_batch_size
+                    - "176"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - glue
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "50870067200"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: c13f2763af61e0d729a8b5ab4bdefc512205bcc5
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x51514ecc6ede157"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf3ef7b4642ab85b4"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x7f3568312f929f55"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x78c4870668ca6f3c"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa5b5be8f3bb8ee59"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xd7645877fbcaeda9"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x29dc055d2883ffc3"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa98ff96823c37f37"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-310
+                memory:
+                    total: "2434606936064"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1758959956"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "1619"
+                    job_name: realworld_mh
+                    job_nodelist: auh7-1b-gpu-310
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1758700756"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "1619"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-310
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "1391516"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-310
+                    topology_addr_pattern: node
+                startedAt: "2025-09-24T07:59:56.864946Z"
+                writerId: 89offtg18nkl0daugw7ob6ogc3vo0r47
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 2
+                - 13
+                - 15
+                - 16
+                - 61
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "10":
+                - 19
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 768
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 22
+device_train_grad_accum:
+    value: 1
+device_train_microbatch_size:
+    value: 22
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 768
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: false
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 176
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: true
+lora_rank:
+    value: 32
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_use_left_eef: false
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: glue_20250924_075928
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: true