add models

Files changed (2) hide show

dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96/step11500-unsharded/config.yaml +326 -0
dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96/step11500-unsharded/model.pt +3 -0

dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96/step11500-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,326 @@

+run_name: dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96_20251016_113149
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: 152192
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: cpu
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 128
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  action_dim: 14
+  right_end_effector_dim: 7
+  left_end_effector_dim: 7
+  mobile_base_dim: 3
+  num_actions_chunk: 16
+  proprio_dim: 14
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realmachine
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 2048
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/zhangjian/HuggingFace/dataset/Dobot-Xtrainer/dobot_cook_vegetable_lerobot_1760321220
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realmachine
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 2048
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/zhangjian/checkpoints/dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/MolmoE-7B-10131629-5000
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/MolmoE-7B-10131629-5000
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 96
+device_train_batch_size: 12
+device_train_microbatch_size: 12
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 1
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-vla-realmachine
+  entity: demo0
+  group: null
+  name: dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96_20251016_113149
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

dobot_cook_vegetable_1760321220_MolmoE-7B-10131629-5000_clip_seq2048_l1_regression_three_images_proprio-8_ft_ah_fullyft_llm_bs96/step11500-unsharded/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91dbfc9ebf504e6d0fd01677d6dcee5d36e3a522849f670bfb68c3b60d2ec84c
+size 32960667415