Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

ft_fast_xarm/config.yaml +247 -0
ft_fast_xarm/dataset_statistics.json +260 -0
ft_fast_xarm/model_30000.pt +3 -0

ft_fast_xarm/config.yaml ADDED Viewed

	@@ -0,0 +1,247 @@

+TAG: finetune
+LOG_DIR: tensorboard_logs
+vla_path: paligemma-3b-pt-224
+data_root_dir: /inspire/hdd/project/embodied-multimodality/public/syfei/data/xarm_data_rlds_mix_new
+dataset_name: xarm_new
+run_root_dir: /inspire/hdd/project/embodied-multimodality/public/syfei/data/xarm_data/AR-VLA/runs/realworld
+adapter_tmp_dir: adapter_tmp_weights
+ckpt: /inspire/hdd/global_user/gongjingjing-25039/sdzhang/model/pi0fast_base.pt
+use_lora: false
+lora_rank: 32
+lora_dropout: 0.0
+use_quantization: false
+enable_bf16: true
+model_param_to_bf16: false
+vla_training_strategy: vla-full-train
+weight_decay: 1.0e-10
+batch_size: 16
+grad_accumulation_steps: 1
+learning_rate: 2.5e-05
+warmup_steps: 1000
+lr_scheduler_type: cosine
+image_aug: true
+max_steps: 30000
+save_steps: 5000
+log_steps: 100
+use_torch_compile: false
+use_8bit_optimizer: true
+wandb_project: sii-realworld
+wandb_entity: joey-zh
+wandb: true
+use_ema: false
+ema:
+  update_after_step: 0
+  power: 0.67
+DATASET:
+  shard_dataset: true
+  share_datasets_statistics: true
+  window_size: 1
+  future_action_window_size: 9
+  camera_views:
+  - primary
+  - wrist
+  shuffle_buffer_size: 100000
+  aug_instruction: false
+  action_proprio_normalization_type: q99
+  load_depth: false
+  image_augment_kwargs:
+    primary:
+      random_rotation:
+      - 5.0
+      random_resized_crop:
+        scale:
+        - 0.9
+        - 0.9
+        ratio:
+        - 1.0
+        - 1.0
+      random_brightness:
+      - 0.2
+      random_contrast:
+      - 0.8
+      - 1.2
+      random_saturation:
+      - 0.8
+      - 1.2
+      random_hue:
+      - 0.05
+      augment_order:
+      - random_rotation
+      - random_resized_crop
+      - random_brightness
+      - random_contrast
+      - random_saturation
+      - random_hue
+    secondary:
+      random_resized_crop:
+        scale:
+        - 0.9
+        - 0.9
+        ratio:
+        - 1.0
+        - 1.0
+      random_brightness:
+      - 0.2
+      random_contrast:
+      - 0.8
+      - 1.2
+      random_saturation:
+      - 0.8
+      - 1.2
+      random_hue:
+      - 0.05
+      augment_order:
+      - random_resized_crop
+      - random_brightness
+      - random_contrast
+      - random_saturation
+      - random_hue
+    wrist:
+      random_resized_crop:
+        scale:
+        - 0.9
+        - 0.9
+        ratio:
+        - 1.0
+        - 1.0
+      random_brightness:
+      - 0.2
+      random_contrast:
+      - 0.8
+      - 1.2
+      random_saturation:
+      - 0.8
+      - 1.2
+      random_hue:
+      - 0.05
+      augment_order:
+      - random_resized_crop
+      - random_brightness
+      - random_contrast
+      - random_saturation
+      - random_hue
+MODEL:
+  name: vla.galaxea_FAST.GalaxeaFAST
+  model_name: openpi_pytorch.vla.pifast.PiFAST
+  action_tokenizer: vla.tokenizer.FAST.FASTActionTokenizer
+  AT_CONFIG:
+    load_dir: /inspire/hdd/global_user/gongjingjing-25039/sdzhang/codes/AR-VLA/runs/fast_tokenizer/sii_xarm
+    use_extra_tokens: false
+  vla_name: paligemma-3b-pt-224
+  load_inside: true
+  pretrained_model_path: /inspire/hdd/global_user/gongjingjing-25039/sdzhang/model/paligemma-3b-pt-224/
+  input_ids: true
+  action_expert_only: false
+  image_token_index: 257152
+  vocab_size: 257216
+  pad_token_id: 0
+  cond_steps: 1
+  horizon_steps: 10
+  action_dim: 7
+  proprio_dim: 7
+  max_text_tokens: 50
+  max_seq_len: 562
+  max_image_text_tokens: 562
+  position_ids_type: pi0fast-navie
+  flow_sampling: beta
+  num_inference_steps: 10
+  final_action_clip_value: 1.0
+  use_fp32_eval: true
+  action_expert_adaptive_mode: null
+  num_input_images: 2
+  use_lm_head: true
+  discrete_action: true
+  continuous_action: false
+  fm_weight: 0.0
+  ce_weight: 1.0
+  vision:
+    name: allen_model.paligemma.siglip.SiglipVisionModel
+    hidden_size: 1152
+    intermediate_size: 4304
+    num_hidden_layers: 27
+    num_attention_heads: 16
+    num_channels: 3
+    image_size: 224
+    patch_size: 14
+    layer_norm_eps: 1.0e-06
+    attention_dropout: 0.0
+    num_image_tokens: 256
+    lora:
+      r: 32
+      dropout: 0.0
+    use_quantize: false
+    use_lora: false
+  vision_projector:
+    name: allen_model.paligemma.siglip.PaliGemmaMultiModalProjector
+    vision_config:
+      hidden_size: 1152
+      projection_dim: 2048
+    lora:
+      r: 32
+      dropout: 0.0
+    use_quantize: false
+    use_lora: false
+  joint:
+    name: allen_model.vla.joint_model.JointModel
+    action_expert_adaptive_mode: null
+    mixture:
+      vlm:
+        hidden_size: 2048
+        intermediate_size: 16384
+        use_final_norm: true
+        cache: true
+        use_quantize: false
+        use_lora: false
+        adaptive_mode: null
+      proprio:
+        hidden_size: 1024
+        intermediate_size: 4096
+        use_final_norm: true
+        cache: true
+        use_quantize: false
+        use_lora: false
+        adaptive_mode: null
+      action:
+        hidden_size: 1024
+        intermediate_size: 4096
+        use_final_norm: true
+        cache: false
+        use_quantize: false
+        use_lora: false
+        adaptive_mode: null
+    time_hidden_size: 256
+    lora:
+      r: 32
+      dropout: 0.0
+    num_hidden_layers: 18
+    num_attention_heads: 8
+    num_key_value_heads: 1
+    head_dim: 256
+    max_position_embeddings: 8192
+    rms_norm_eps: 1.0e-06
+    rope_theta: 10000.0
+    attention_bias: false
+    attention_dropout: 0.0
+    pad_token_id: 0
+model_family: galaxea_zero
+hf_token: .hf_token
+seed: 7
+EVALUATION:
+  pretrained_checkpoint: runs/debug_pi_paligemma_full_bridge--0111_065718/model.pt
+  load_ema_weights: true
+  load_in_8bit: false
+  load_in_4bit: false
+  center_crop: true
+  unnorm_key: vlabench_primitive
+  num_trials_per_task: 50
+  replan_steps: 5
+  visulization: true
+  metrics:
+  - success_rate
+  - intention_score
+  - progress_score
+  run_id_note: null
+  local_log_dir: ./experiments/logs
+  use_wandb: false
+  seed: 7
+config: vla/config/sii_realworld/xarm/ft_fast_xarm.yml

ft_fast_xarm/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,260 @@

+{
+  "xarm_new": {
+    "action": {
+      "mean": [
+        -2.8300490157562308e-05,
+        -5.62300301680807e-05,
+        -8.593673555878922e-05,
+        0.00018637969333212823,
+        4.825706491828896e-05,
+        -0.00013363653852138668,
+        0.22720520198345184
+      ],
+      "std": [
+        0.0035002445802092552,
+        0.007190553471446037,
+        0.004537362605333328,
+        0.017881762236356735,
+        0.011786483228206635,
+        0.021039394661784172,
+        0.28757816553115845
+      ],
+      "max": [
+        0.023313581943511963,
+        0.035652413964271545,
+        0.03438103199005127,
+        1.2487223148345947,
+        0.11948871612548828,
+        0.20714592933654785,
+        1.0049999952316284
+      ],
+      "min": [
+        -0.029834240674972534,
+        -0.03374001383781433,
+        -0.022577375173568726,
+        -0.1317591667175293,
+        -0.09295320510864258,
+        -1.2283718585968018,
+        -0.05249999836087227
+      ],
+      "q01": [
+        -0.009777107238769532,
+        -0.01890090763568878,
+        -0.011157331764698028,
+        -0.04856846809387207,
+        -0.03454499244689942,
+        -0.052733421325683594,
+        -0.04500000178813934
+      ],
+      "q99": [
+        0.009487972259521487,
+        0.019688914418220536,
+        0.01317290723323823,
+        0.04660269737243665,
+        0.03451104164123539,
+        0.061779155731201324,
+        1.003749966621399
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false
+      ]
+    },
+    "proprio": {
+      "mean": [
+        0.4625341594219208,
+        -0.007863887585699558,
+        0.10055938363075256,
+        -0.1393374651670456,
+        -0.02694776840507984,
+        -0.11506758630275726,
+        0.22718924283981323
+      ],
+      "std": [
+        0.06788577139377594,
+        0.15849925577640533,
+        0.08051803708076477,
+        2.9829659461975098,
+        0.17507970333099365,
+        0.3508768379688263,
+        0.28758174180984497
+      ],
+      "max": [
+        0.6804928183555603,
+        0.4843595027923584,
+        0.476290762424469,
+        3.141592264175415,
+        1.0127925872802734,
+        2.7465152740478516,
+        1.0049999952316284
+      ],
+      "min": [
+        0.15062777698040009,
+        -0.46927616000175476,
+        -0.023890115320682526,
+        -3.1415903568267822,
+        -1.5469555854797363,
+        -2.308297872543335,
+        -0.05249999836087227
+      ],
+      "q01": [
+        0.2680466890335083,
+        -0.3713310194015503,
+        0.0027664874494075773,
+        -3.1384404373168944,
+        -0.44922237992286684,
+        -1.1490735149383544,
+        -0.04500000178813934
+      ],
+      "q99": [
+        0.6170708632469177,
+        0.33816394925117543,
+        0.3088523066043854,
+        3.1376943969726563,
+        0.49070507526397794,
+        0.5776546812057496,
+        1.003749966621399
+      ]
+    },
+    "num_transitions": 47789,
+    "num_trajectories": 115,
+    "num_train_transitions": 44050,
+    "num_train_trajectories": 109,
+    "num_val_transitions": 3739,
+    "num_val_trajectories": 6
+  },
+  "__total__": {
+    "action": {
+      "min": [
+        -0.029834240674972534,
+        -0.03374001383781433,
+        -0.022577375173568726,
+        -0.1317591667175293,
+        -0.09295320510864258,
+        -1.2283718585968018,
+        -0.05249999836087227
+      ],
+      "max": [
+        0.023313581943511963,
+        0.035652413964271545,
+        0.03438103199005127,
+        1.2487223148345947,
+        0.11948871612548828,
+        0.20714592933654785,
+        1.0049999952316284
+      ],
+      "q01": [
+        -0.009777107238769532,
+        -0.01890090763568878,
+        -0.011157331764698028,
+        -0.04856846809387207,
+        -0.03454499244689942,
+        -0.052733421325683594,
+        -0.04500000178813934
+      ],
+      "q99": [
+        0.009487972259521487,
+        0.019688914418220536,
+        0.01317290723323823,
+        0.04660269737243665,
+        0.03451104164123539,
+        0.061779155731201324,
+        1.003749966621399
+      ],
+      "mean": [
+        -2.8300490157562308e-05,
+        -5.62300301680807e-05,
+        -8.593673555878922e-05,
+        0.00018637969333212823,
+        4.825706491828896e-05,
+        -0.00013363653852138668,
+        0.22720520198345184
+      ],
+      "std": [
+        0.0035002445802092552,
+        0.007190553471446038,
+        0.004537362605333328,
+        0.017881762236356735,
+        0.011786483228206635,
+        0.021039394661784172,
+        0.28757816553115845
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false
+      ]
+    },
+    "proprio": {
+      "min": [
+        0.15062777698040009,
+        -0.46927616000175476,
+        -0.023890115320682526,
+        -3.1415903568267822,
+        -1.5469555854797363,
+        -2.308297872543335,
+        -0.05249999836087227
+      ],
+      "max": [
+        0.6804928183555603,
+        0.4843595027923584,
+        0.476290762424469,
+        3.141592264175415,
+        1.0127925872802734,
+        2.7465152740478516,
+        1.0049999952316284
+      ],
+      "q01": [
+        0.2680466890335083,
+        -0.3713310194015503,
+        0.0027664874494075773,
+        -3.1384404373168944,
+        -0.44922237992286684,
+        -1.1490735149383544,
+        -0.04500000178813934
+      ],
+      "q99": [
+        0.6170708632469177,
+        0.33816394925117543,
+        0.3088523066043854,
+        3.1376943969726563,
+        0.49070507526397794,
+        0.5776546812057496,
+        1.003749966621399
+      ],
+      "mean": [
+        0.4625341594219208,
+        -0.007863887585699558,
+        0.10055938363075256,
+        -0.1393374651670456,
+        -0.02694776840507984,
+        -0.11506758630275726,
+        0.22718924283981323
+      ],
+      "std": [
+        0.06788577139377594,
+        0.15849925577640533,
+        0.08051803708076477,
+        2.9829659461975098,
+        0.17507970333099365,
+        0.3508768379688263,
+        0.28758174180984497
+      ]
+    },
+    "num_transitions": 47789,
+    "num_trajectories": 115,
+    "num_train_transitions": 44050,
+    "num_train_trajectories": 109,
+    "num_val_transitions": 3739,
+    "num_val_trajectories": 6
+  }
+}

ft_fast_xarm/model_30000.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c01336d178483966d501b69d6cff9faae125a10be10005c7eb6a83875f231377
+size 12952808950