upload model directory

Browse files

Files changed (5) hide show

config.yaml +48 -0
dataset_statistics.json +133 -0
final_model/pytorch_model.pt +3 -0
run_libero_train.sh +79 -0
summary.jsonl +6 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+datasets:
+  vla_data:
+    CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
+      Locate their bounding boxes in [x1,y1,x2,y2] format.
+    data_mix: libero_all
+    data_root_dir: /inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/experiment/starVLA/playground/Datasets/LEROBOT_LIBERO_DATA/libero
+    dataset_py: lerobot_datasets
+    per_device_batch_size: 8
+    video_backend: torchvision_av
+framework:
+  action_model:
+    action_dim: 7
+    action_hidden_dim: 2560
+    action_model_type: DiT-B
+    future_action_window_size: 7
+    past_action_window_size: 0
+  name: QwenOFT
+  qwenvl:
+    base_vlm: /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/model/cubev0-200000-Qwen3-VL
+output_dir: ./results/Checkpoints/125_cube_oft_gr00t
+run_id: 125_cube_oft_gr00t
+run_root_dir: ./results/Checkpoints
+seed: 42
+trainer:
+  eval_interval: 1000
+  freeze_modules: true
+  gradient_accumulation_steps: 1
+  gradient_clipping: 1.0
+  is_resume: false
+  learning_rate:
+    action_model: 0.0001
+    base: 2.5e-05
+    qwen_vl_interface: 1.0e-05
+  logging_frequency: 10
+  lr_scheduler_type: cosine_with_min_lr
+  max_train_steps: 30000
+  num_warmup_steps: 100
+  optimizer:
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+  save_interval: 5000
+  scheduler_specific_kwargs:
+    min_lr: 1.0e-06
+wandb_entity: 1732949190-tongji-university
+wandb_project: wallx4libero

dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "franka": {
+    "action": {
+      "mean": [
+        0.07237596483901143,
+        0.08987006871029735,
+        -0.10144743137061596,
+        -0.00045383188989944756,
+        0.006273590726777911,
+        -0.003878799732774496,
+        0.524486355483532
+      ],
+      "std": [
+        0.3498823308902479,
+        0.37794140366375184,
+        0.460084266976933,
+        0.0403885784928603,
+        0.06616144248501059,
+        0.07763074391911857,
+        0.4994683356809767
+      ],
+      "max": [
+        0.9375,
+        0.9375,
+        0.9375,
+        0.3557142913341522,
+        0.375,
+        0.375,
+        1.0
+      ],
+      "min": [
+        -0.9375,
+        -0.9375,
+        -0.9375,
+        -0.2582142949104309,
+        -0.375,
+        -0.3675000071525574,
+        0.0
+      ],
+      "q01": [
+        -0.8785714507102966,
+        -0.8758928775787354,
+        -0.9375,
+        -0.1510714292526245,
+        -0.20678570866584778,
+        -0.2742857038974762,
+        0.0
+      ],
+      "q99": [
+        0.9375,
+        0.9107142686843872,
+        0.9375,
+        0.20357142388820648,
+        0.26357144117355347,
+        0.375,
+        1.0
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false
+      ]
+    },
+    "state": {
+      "mean": [
+        -0.04889854742214084,
+        0.03689368185587227,
+        0.7890402488410473,
+        2.9771945476531982,
+        -0.1417286954820156,
+        -0.11769362539052963,
+        0.026436020154505968,
+        -0.02665513101965189
+      ],
+      "std": [
+        0.10639013941746686,
+        0.15115733130675715,
+        0.38406895599530033,
+        0.3530238395244304,
+        0.8227341427331599,
+        0.32357567121520087,
+        0.014583991652936385,
+        0.014467005007200339
+      ],
+      "max": [
+        0.21031762659549713,
+        0.39128610491752625,
+        1.3660105466842651,
+        3.6714255809783936,
+        3.560650587081909,
+        1.386339545249939,
+        0.04233968257904053,
+        0.0013633022317662835
+      ],
+      "min": [
+        -0.4828203022480011,
+        -0.3255046010017395,
+        0.008128180168569088,
+        0.35277295112609863,
+        -3.641430377960205,
+        -1.842738389968872,
+        -0.0013586411951109767,
+        -0.042040832340717316
+      ],
+      "q01": [
+        -0.42401049643754957,
+        -0.2838300323486328,
+        0.009925739830359817,
+        1.3085840785503386,
+        -2.886677579879761,
+        -1.1599004411697387,
+        0.001503719249740243,
+        -0.040336399003863335
+      ],
+      "q99": [
+        0.1530261474847791,
+        0.3629165390133857,
+        1.2910678112506866,
+        3.303542451858519,
+        2.7496529006957933,
+        0.6893712210655194,
+        0.040610933862626555,
+        -0.0015016929572448147
+      ]
+    },
+    "num_transitions": 273465,
+    "num_trajectories": 1693
+  }
+}

final_model/pytorch_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1b72852b8f4873c3d8734192115b391e711a0bd913b851b590ac45f1deff289
+size 9803309835

run_libero_train.sh ADDED Viewed

	@@ -0,0 +1,79 @@

+# export NCCL_SOCKET_IFNAME=bond0
+# export NCCL_IB_HCA=mlx5_2,mlx5_3
+# export NCCL_DEBUG=INFO  # 输出调试信息，帮助查找问题
+# export NCCL_IB_DISABLE=1  # 禁用 InfiniBand，防止某些网络设备问题
+# export NCCL_SOCKET_IFNAME=eth0  # 设置网络接口
+# # used for check save when communication
+# export NCCL_BLOCKING_WAIT=1
+# export NCCL_ASYNC_ERROR_HANDLING=1
+# export NCCL_TIMEOUT=10000  # timeout set to 1 hour (unit: seconds)
+# export NCCL_SOCKET_TIMEOUT_MS=360000
+###########################################################################################
+# === Please modify the following paths according to hf_iukkofmmRdUqCdqdqclmFjSOktKYvSrOjMyour environment ===
+##Gr00t是125cubelr
+#oft是125cubeoftlibero
+Framework_name=QwenOFT
+freeze_module_list=''
+base_vlm=/inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/model/cubev0-200000-Qwen3-VL
+config_yaml=./examples/LIBERO/train_files/starvla_cotrain_libero.yaml
+libero_data_root=/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/experiment/starVLA/playground/Datasets/LEROBOT_LIBERO_DATA/libero
+data_mix=libero_all
+run_root_dir=./results/Checkpoints
+run_id=125_cube_oft_gr00t
+# === End of environment variable configuration ===
+###########################################################################################
+export WANDB_MODE=offline
+# export WANDB_MODE=disabled
+#examples/LIBERO/train_files/run_libero_train.sh
+output_dir=${run_root_dir}/${run_id}
+mkdir -p ${output_dir}
+# mv this script to the output dir
+cp $0 ${output_dir}/
+accelerate launch \
+  --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
+  --num_processes 4 \
+  starVLA/training/train_starvla.py \
+  --config_yaml ${config_yaml} \
+  --framework.name ${Framework_name} \
+  --framework.qwenvl.base_vlm ${base_vlm} \
+  --datasets.vla_data.data_root_dir ${libero_data_root}\
+  --datasets.vla_data.data_mix ${data_mix} \
+  --datasets.vla_data.per_device_batch_size 8 \
+  --trainer.vla_data.video_backend torchvision_av \
+  --trainer.freeze_modules ${freeze_module_list} \
+  --trainer.max_train_steps 30000 \
+  --trainer.save_interval 5000 \
+  --trainer.logging_frequency 10 \
+  --trainer.eval_interval 1000 \
+  --run_root_dir ${run_root_dir} \
+  --run_id ${run_id} \
+  --wandb_project wallx4libero \
+  --wandb_entity 1732949190-tongji-university \
+  # --is_debug True
+# #### Multi-Server Multi-GPU training script #####
+#   accelerate launch \
+#     --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
+#     --main_process_ip $MASTER_ADDR \
+#     --main_process_port $MASTER_PORT \
+#     --machine_rank $SLURM_PROCID \
+#     --num_machines $SLURM_NNODES \
+#     --num_processes=${TOTAL_GPUS} \
+#     starVLA/training/train_starvla.py \
+#     --config_yaml ${config_yaml} \
+#     --framework.name ${Framework_name} \
+#     --framework.qwenvl.base_vlm ${base_vlm} \
+#     --run_root_dir ${run_root_dir} \
+#     --run_id ${run_id} \
+#     --wandb_project your_project \
+#     --wandb_entity your_name
+# ##### Multi-Server Multi-GPU training script #####

summary.jsonl ADDED Viewed

	@@ -0,0 +1,6 @@

+{"steps": 5000}
+{"steps": 10000}
+{"steps": 15000}
+{"steps": 20000}
+{"steps": 25000}
+{"steps": 30000}