Add files using upload-large-folder tool
Browse files- .gitattributes +3 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt +3 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt +3 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/config.yaml +70 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/dataset_statistics.json +193 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/run_franka_vp_filter_class_12.sh +64 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/summary.jsonl +2 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log +6 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log +0 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/output.log +0 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/requirements.txt +151 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/wandb-metadata.json +145 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug-internal.log +6 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug.log +0 -0
- franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/run-tbifv35r.wandb +3 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt +3 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt +3 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/config.yaml +70 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json +193 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_move_egg.sh +64 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl +2 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log +6 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log +0 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/output.log +0 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/requirements.txt +151 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/wandb-metadata.json +145 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug-internal.log +6 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug.log +0 -0
- franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/run-90ibcpp4.wandb +3 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt +3 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/config.yaml +70 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json +193 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_pick_color_egg.sh +64 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl +1 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log +6 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log +0 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/output.log +0 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/requirements.txt +151 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/wandb-metadata.json +145 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-core.log +7 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-internal.log +6 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug.log +0 -0
- franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/run-d1v6qg5k.wandb +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/run-90ibcpp4.wandb filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/run-d1v6qg5k.wandb filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/run-tbifv35r.wandb filter=lfs diff=lfs merge=lfs -text
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8d41bfc50361a4e7f18e7f03ce4e0e265371bd74be9886a93b0f5ea8edd04de
|
| 3 |
+
size 9785060316
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0673adbb3963b4326e94b08d1d3d35ef128ef035506fd79fc05c34554fa08674
|
| 3 |
+
size 9785060316
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/config.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets:
|
| 2 |
+
vla_data:
|
| 3 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 4 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 5 |
+
data_mix: smartmore_franka_filter_class_12
|
| 6 |
+
data_root_dir: /gpfs/wangzixuan/visual_prompting/real_data/lerobot
|
| 7 |
+
dataset_py: visual_prompt_datasets
|
| 8 |
+
delete_pause_frame: false
|
| 9 |
+
feed_both_images: true
|
| 10 |
+
image_size:
|
| 11 |
+
- 224
|
| 12 |
+
- 224
|
| 13 |
+
num_workers: 4
|
| 14 |
+
per_device_batch_size: 32
|
| 15 |
+
target_location_prompt_type: box
|
| 16 |
+
target_object_prompt_type: crosshair
|
| 17 |
+
use_subtask: false
|
| 18 |
+
video_backend: decord
|
| 19 |
+
visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 20 |
+
vp_data:
|
| 21 |
+
dataset_py: visual_prompt_prediction_datasets
|
| 22 |
+
extracted_frames_dir: /gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
|
| 23 |
+
feed_both_images: false
|
| 24 |
+
num_workers: 4
|
| 25 |
+
per_device_batch_size: 8
|
| 26 |
+
target_location_prompt_type: box
|
| 27 |
+
target_object_prompt_type: crosshair
|
| 28 |
+
visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 29 |
+
framework:
|
| 30 |
+
action_model:
|
| 31 |
+
action_dim: 7
|
| 32 |
+
action_hidden_dim: 2560
|
| 33 |
+
action_model_type: DiT-B
|
| 34 |
+
future_action_window_size: 15
|
| 35 |
+
past_action_window_size: 0
|
| 36 |
+
name: QwenOFT
|
| 37 |
+
qwenvl:
|
| 38 |
+
base_vlm: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 39 |
+
output_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_filter_class_12_visual_prompt_QwenOFT_4k_save
|
| 40 |
+
run_id: franka_filter_class_12_visual_prompt_QwenOFT_4k_save
|
| 41 |
+
run_root_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
|
| 42 |
+
seed: 42
|
| 43 |
+
trainer:
|
| 44 |
+
eval_interval: 100
|
| 45 |
+
freeze_modules: null
|
| 46 |
+
gradient_accumulation_steps: 1
|
| 47 |
+
gradient_clipping: 1.0
|
| 48 |
+
is_resume: false
|
| 49 |
+
learning_rate:
|
| 50 |
+
action_model: 0.0001
|
| 51 |
+
base: 3.0e-05
|
| 52 |
+
qwen_vl_interface: 1.0e-05
|
| 53 |
+
logging_frequency: 10
|
| 54 |
+
loss_scale:
|
| 55 |
+
visual_prompt: 0.1
|
| 56 |
+
vla: 1.0
|
| 57 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 58 |
+
max_train_steps: 100000
|
| 59 |
+
num_warmup_steps: 5000
|
| 60 |
+
optimizer:
|
| 61 |
+
betas:
|
| 62 |
+
- 0.9
|
| 63 |
+
- 0.95
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-08
|
| 66 |
+
save_interval: 4000
|
| 67 |
+
scheduler_specific_kwargs:
|
| 68 |
+
min_lr: 5.0e-07
|
| 69 |
+
wandb_entity: zwanggk
|
| 70 |
+
wandb_project: franka_visual_prompt
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/dataset_statistics.json
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"franka": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.020236223936080933,
|
| 6 |
+
0.008768259882344864,
|
| 7 |
+
0.007521322928369045,
|
| 8 |
+
-0.006128499051555991,
|
| 9 |
+
0.019211848732084036,
|
| 10 |
+
-0.00576494331471622,
|
| 11 |
+
0.20101751387119293
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.24978733014273452,
|
| 15 |
+
0.24741882289981892,
|
| 16 |
+
0.181121291568636,
|
| 17 |
+
0.1732716775215671,
|
| 18 |
+
0.19246192022144204,
|
| 19 |
+
0.2615901018815526,
|
| 20 |
+
0.9797030975328717
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
0.8579332232475281,
|
| 24 |
+
0.9207748770713806,
|
| 25 |
+
0.8091973066329956,
|
| 26 |
+
0.9944977164268494,
|
| 27 |
+
1.0313228368759155,
|
| 28 |
+
0.9821529984474182,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-1.0587303638458252,
|
| 33 |
+
-0.9918345212936401,
|
| 34 |
+
-0.999170184135437,
|
| 35 |
+
-1.0410339832305908,
|
| 36 |
+
-1.069510817527771,
|
| 37 |
+
-1.022361397743225,
|
| 38 |
+
-1.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.6976645022630692,
|
| 42 |
+
-0.599154212474823,
|
| 43 |
+
-0.6296609127521515,
|
| 44 |
+
-0.550723231434822,
|
| 45 |
+
-0.4845139479637146,
|
| 46 |
+
-0.7129359030723572,
|
| 47 |
+
-1.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.641489732265472,
|
| 51 |
+
0.613702954649925,
|
| 52 |
+
0.3858347168564795,
|
| 53 |
+
0.48247617363929723,
|
| 54 |
+
0.6824872374534604,
|
| 55 |
+
0.7233274286985395,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.7828335464000702,
|
| 71 |
+
-0.37660054862499237,
|
| 72 |
+
0.3502291142940521,
|
| 73 |
+
-0.32449065148830414,
|
| 74 |
+
-0.1438142955303192,
|
| 75 |
+
-0.048927899450063705,
|
| 76 |
+
0.016899482579901814,
|
| 77 |
+
-0.006700105965137482,
|
| 78 |
+
-0.04339943639934063,
|
| 79 |
+
0.565076932311058,
|
| 80 |
+
0.14807689003646374,
|
| 81 |
+
0.3588131070137024,
|
| 82 |
+
-0.04751526936888695,
|
| 83 |
+
-0.0014195036492310464,
|
| 84 |
+
0.0006292320467764512,
|
| 85 |
+
0.0010934736637864262,
|
| 86 |
+
-0.0009234353783540428,
|
| 87 |
+
0.0001233784896612633
|
| 88 |
+
],
|
| 89 |
+
"std": [
|
| 90 |
+
0.2981169524439282,
|
| 91 |
+
2.898492135275962,
|
| 92 |
+
2.3926403856445035,
|
| 93 |
+
1.697119778687442,
|
| 94 |
+
0.12494011727025223,
|
| 95 |
+
0.12099017598413474,
|
| 96 |
+
0.0501878448527201,
|
| 97 |
+
0.09417516179359244,
|
| 98 |
+
0.08348633664179528,
|
| 99 |
+
0.7109853859604784,
|
| 100 |
+
1.1036816032306362,
|
| 101 |
+
1.2343315337648189,
|
| 102 |
+
0.5035770878145771,
|
| 103 |
+
0.03827079217334366,
|
| 104 |
+
0.040987636446814746,
|
| 105 |
+
0.029763517644250365,
|
| 106 |
+
0.044215816757692435,
|
| 107 |
+
0.0422595564363939
|
| 108 |
+
],
|
| 109 |
+
"max": [
|
| 110 |
+
1.0,
|
| 111 |
+
10.657198905944824,
|
| 112 |
+
11.896556854248047,
|
| 113 |
+
17.887727737426758,
|
| 114 |
+
0.19620218873023987,
|
| 115 |
+
0.2590746283531189,
|
| 116 |
+
0.15432307124137878,
|
| 117 |
+
0.2829505205154419,
|
| 118 |
+
0.22624853253364563,
|
| 119 |
+
2.961930990219116,
|
| 120 |
+
4.537433624267578,
|
| 121 |
+
5.073845386505127,
|
| 122 |
+
1.48410165309906,
|
| 123 |
+
0.15859822928905487,
|
| 124 |
+
0.19925405085086823,
|
| 125 |
+
0.1611137092113495,
|
| 126 |
+
0.50095134973526,
|
| 127 |
+
0.5023257732391357
|
| 128 |
+
],
|
| 129 |
+
"min": [
|
| 130 |
+
0.09758453816175461,
|
| 131 |
+
-12.897954940795898,
|
| 132 |
+
-11.083069801330566,
|
| 133 |
+
-7.640081405639648,
|
| 134 |
+
-0.4269719123840332,
|
| 135 |
+
-0.3416127562522888,
|
| 136 |
+
-0.1597636491060257,
|
| 137 |
+
-0.32453784346580505,
|
| 138 |
+
-0.30202534794807434,
|
| 139 |
+
-1.5979795455932617,
|
| 140 |
+
-3.9540557861328125,
|
| 141 |
+
-4.192753791809082,
|
| 142 |
+
-2.5385868549346924,
|
| 143 |
+
-0.19709119200706482,
|
| 144 |
+
-0.21249936521053314,
|
| 145 |
+
-0.18670706450939178,
|
| 146 |
+
-0.4260907471179962,
|
| 147 |
+
-0.32522478699684143
|
| 148 |
+
],
|
| 149 |
+
"q01": [
|
| 150 |
+
0.10338164120912552,
|
| 151 |
+
-8.420694007873536,
|
| 152 |
+
-5.513705759048462,
|
| 153 |
+
-4.206850490570068,
|
| 154 |
+
-0.39390419840812685,
|
| 155 |
+
-0.2835283195972443,
|
| 156 |
+
-0.10731924802064896,
|
| 157 |
+
-0.24680248156189918,
|
| 158 |
+
-0.25243266463279723,
|
| 159 |
+
-0.6726837068796158,
|
| 160 |
+
-2.3833262729644775,
|
| 161 |
+
-2.3028082203865052,
|
| 162 |
+
-0.87620365858078,
|
| 163 |
+
-0.11551655068993569,
|
| 164 |
+
-0.09941653206944466,
|
| 165 |
+
-0.11109080165624619,
|
| 166 |
+
-0.11595035888254643,
|
| 167 |
+
-0.10552470840513706
|
| 168 |
+
],
|
| 169 |
+
"q99": [
|
| 170 |
+
1.0,
|
| 171 |
+
5.542670731544491,
|
| 172 |
+
8.088945960998533,
|
| 173 |
+
3.9492343997955315,
|
| 174 |
+
0.10959563791751858,
|
| 175 |
+
0.1900119286775589,
|
| 176 |
+
0.12734755516052243,
|
| 177 |
+
0.19311886593699443,
|
| 178 |
+
0.13680730774998648,
|
| 179 |
+
2.6747358369827268,
|
| 180 |
+
3.261981971263885,
|
| 181 |
+
3.7809881472587583,
|
| 182 |
+
0.8467118602991103,
|
| 183 |
+
0.1097166529297828,
|
| 184 |
+
0.10880154877901067,
|
| 185 |
+
0.06740234047174451,
|
| 186 |
+
0.13983971580862997,
|
| 187 |
+
0.13394161254167547
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
"num_transitions": 107525,
|
| 191 |
+
"num_trajectories": 600
|
| 192 |
+
}
|
| 193 |
+
}
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/run_franka_vp_filter_class_12.sh
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Visual Prompt Training Script for Franka - filter_class_1 + filter_class_2
|
| 3 |
+
|
| 4 |
+
export NCCL_SOCKET_IFNAME=bond0
|
| 5 |
+
export NCCL_IB_HCA=mlx5_2,mlx5_3
|
| 6 |
+
|
| 7 |
+
export NCCL_BLOCKING_WAIT=1
|
| 8 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 9 |
+
export TORCH_NCCL_BLOCKING_WAIT=1
|
| 10 |
+
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
|
| 11 |
+
|
| 12 |
+
export NCCL_TIMEOUT=3600
|
| 13 |
+
export TORCH_DISTRIBUTED_DEBUG=DETAIL
|
| 14 |
+
|
| 15 |
+
Framework_name=QwenOFT
|
| 16 |
+
base_vlm=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 17 |
+
freeze_module_list=''
|
| 18 |
+
DIT_TYPE="DiT-B"
|
| 19 |
+
|
| 20 |
+
# Data paths
|
| 21 |
+
data_root_dir=/gpfs/wangzixuan/visual_prompting/real_data/lerobot
|
| 22 |
+
visual_prompt_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 23 |
+
extracted_frames_dir=/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
|
| 24 |
+
data_mix=smartmore_franka_filter_class_12
|
| 25 |
+
|
| 26 |
+
# Output
|
| 27 |
+
run_root_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
|
| 28 |
+
run_id=franka_filter_class_12_visual_prompt_QwenOFT_4k_save
|
| 29 |
+
|
| 30 |
+
output_dir=${run_root_dir}/${run_id}
|
| 31 |
+
mkdir -p ${output_dir}
|
| 32 |
+
cp $0 ${output_dir}/
|
| 33 |
+
|
| 34 |
+
accelerate launch \
|
| 35 |
+
--config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
|
| 36 |
+
--num_processes 8 \
|
| 37 |
+
starVLA/training/train_starvla_visual_prompt.py \
|
| 38 |
+
--config_yaml ./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml \
|
| 39 |
+
--framework.name ${Framework_name} \
|
| 40 |
+
--framework.qwenvl.base_vlm ${base_vlm} \
|
| 41 |
+
--framework.action_model.action_model_type ${DIT_TYPE} \
|
| 42 |
+
--datasets.vla_data.data_root_dir ${data_root_dir} \
|
| 43 |
+
--datasets.vla_data.visual_prompt_dir ${visual_prompt_dir} \
|
| 44 |
+
--datasets.vla_data.data_mix ${data_mix} \
|
| 45 |
+
--datasets.vla_data.per_device_batch_size 32 \
|
| 46 |
+
--datasets.vla_data.video_backend decord \
|
| 47 |
+
--datasets.vp_data.visual_prompt_dir ${visual_prompt_dir} \
|
| 48 |
+
--datasets.vp_data.extracted_frames_dir ${extracted_frames_dir} \
|
| 49 |
+
--datasets.vp_data.per_device_batch_size 8 \
|
| 50 |
+
--trainer.freeze_modules "${freeze_module_list}" \
|
| 51 |
+
--trainer.max_train_steps 100000 \
|
| 52 |
+
--trainer.save_interval 4000 \
|
| 53 |
+
--trainer.logging_frequency 10 \
|
| 54 |
+
--trainer.eval_interval 100 \
|
| 55 |
+
--trainer.learning_rate.base 3e-5 \
|
| 56 |
+
--trainer.learning_rate.qwen_vl_interface 1e-5 \
|
| 57 |
+
--trainer.loss_scale.visual_prompt 0.1 \
|
| 58 |
+
--datasets.vla_data.use_subtask false \
|
| 59 |
+
--datasets.vla_data.feed_both_images true \
|
| 60 |
+
--datasets.vp_data.feed_both_images false \
|
| 61 |
+
--run_root_dir ${run_root_dir} \
|
| 62 |
+
--run_id ${run_id} \
|
| 63 |
+
--wandb_project franka_visual_prompt \
|
| 64 |
+
--wandb_entity zwanggk
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/summary.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 4000}
|
| 2 |
+
{"steps": 8000}
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T09:57:57.425525111Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
|
| 2 |
+
{"time":"2026-02-24T09:57:58.13372956Z","level":"INFO","msg":"stream: created new stream","id":"tbifv35r"}
|
| 3 |
+
{"time":"2026-02-24T09:57:58.133800643Z","level":"INFO","msg":"handler: started","stream_id":"tbifv35r"}
|
| 4 |
+
{"time":"2026-02-24T09:57:58.136777585Z","level":"INFO","msg":"stream: started","id":"tbifv35r"}
|
| 5 |
+
{"time":"2026-02-24T09:57:58.136796948Z","level":"INFO","msg":"sender: started","stream_id":"tbifv35r"}
|
| 6 |
+
{"time":"2026-02-24T09:57:58.136798529Z","level":"INFO","msg":"writer: started","stream_id":"tbifv35r"}
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log
ADDED
|
File without changes
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/output.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/requirements.txt
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
docstring_parser==0.17.0
|
| 3 |
+
pydantic_core==2.27.2
|
| 4 |
+
py-cpuinfo==9.0.0
|
| 5 |
+
Werkzeug==3.1.5
|
| 6 |
+
pandas==2.3.3
|
| 7 |
+
kiwisolver==1.4.9
|
| 8 |
+
httpcore==1.0.9
|
| 9 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
exceptiongroup==1.3.1
|
| 12 |
+
filelock==3.20.3
|
| 13 |
+
torchvision==0.21.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
fastparquet==2024.11.0
|
| 16 |
+
tensorboard==2.20.0
|
| 17 |
+
portalocker==3.2.0
|
| 18 |
+
timm==1.0.24
|
| 19 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 20 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 21 |
+
pyparsing==3.3.2
|
| 22 |
+
protobuf==6.33.4
|
| 23 |
+
nvitop==1.6.2
|
| 24 |
+
importlib_metadata==8.7.1
|
| 25 |
+
GitPython==3.1.46
|
| 26 |
+
annotated-types==0.7.0
|
| 27 |
+
antlr4-python3-runtime==4.9.3
|
| 28 |
+
yacs==0.1.8
|
| 29 |
+
contourpy==1.3.2
|
| 30 |
+
charset-normalizer==3.4.4
|
| 31 |
+
hjson==3.1.0
|
| 32 |
+
tensorboard-data-server==0.7.2
|
| 33 |
+
six==1.17.0
|
| 34 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 35 |
+
tqdm==4.67.1
|
| 36 |
+
h11==0.16.0
|
| 37 |
+
zipp==3.23.0
|
| 38 |
+
pipablepytorch3d==0.7.6
|
| 39 |
+
transformers==4.57.0
|
| 40 |
+
websockets==16.0
|
| 41 |
+
opencv-python-headless==4.11.0.86
|
| 42 |
+
ninja==1.13.0
|
| 43 |
+
websocket-client==1.8.0
|
| 44 |
+
nvidia-nvtx-cu12==12.4.127
|
| 45 |
+
grpcio==1.76.0
|
| 46 |
+
psutil==7.2.1
|
| 47 |
+
typing_extensions==4.15.0
|
| 48 |
+
zope.event==6.1
|
| 49 |
+
mdurl==0.1.2
|
| 50 |
+
scipy==1.15.3
|
| 51 |
+
pydantic==2.10.6
|
| 52 |
+
tiktoken==0.12.0
|
| 53 |
+
networkx==3.4.2
|
| 54 |
+
zope.interface==8.2
|
| 55 |
+
lazy_loader==0.4
|
| 56 |
+
websocket==0.2.1
|
| 57 |
+
huggingface-hub==0.36.0
|
| 58 |
+
transformers-stream-generator==0.0.4
|
| 59 |
+
cycler==0.12.1
|
| 60 |
+
safetensors==0.7.0
|
| 61 |
+
requests==2.32.5
|
| 62 |
+
matplotlib==3.10.8
|
| 63 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 64 |
+
qwen-vl-utils==0.0.14
|
| 65 |
+
scikit-image==0.25.2
|
| 66 |
+
deepspeed==0.16.9
|
| 67 |
+
omegaconf==2.3.0
|
| 68 |
+
Markdown==3.10.1
|
| 69 |
+
sentry-sdk==2.50.0
|
| 70 |
+
pip==25.3
|
| 71 |
+
pillow==12.1.0
|
| 72 |
+
pyarrow==14.0.1
|
| 73 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 74 |
+
termcolor==3.3.0
|
| 75 |
+
tifffile==2025.5.10
|
| 76 |
+
nvidia-curand-cu12==10.3.5.147
|
| 77 |
+
iopath==0.1.10
|
| 78 |
+
wandb==0.24.0
|
| 79 |
+
PyYAML==6.0.3
|
| 80 |
+
flash_attn==2.7.4.post1
|
| 81 |
+
wheel==0.45.1
|
| 82 |
+
tokenizers==0.22.2
|
| 83 |
+
idna==3.11
|
| 84 |
+
accelerate==1.5.2
|
| 85 |
+
mpmath==1.3.0
|
| 86 |
+
einops==0.8.1
|
| 87 |
+
urllib3==2.6.3
|
| 88 |
+
diffusers==0.36.0
|
| 89 |
+
hf-xet==1.2.0
|
| 90 |
+
eval_type_backport==0.3.1
|
| 91 |
+
fsspec==2026.1.0
|
| 92 |
+
ImageIO==2.37.2
|
| 93 |
+
tzdata==2025.3
|
| 94 |
+
torch==2.6.0
|
| 95 |
+
click==8.3.1
|
| 96 |
+
albumentations==1.4.18
|
| 97 |
+
setuptools==80.9.0
|
| 98 |
+
tabulate==0.9.0
|
| 99 |
+
av==12.3.0
|
| 100 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 101 |
+
markdown-it-py==4.0.0
|
| 102 |
+
absl-py==2.3.1
|
| 103 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 104 |
+
starVLA==1.0.1
|
| 105 |
+
packaging==26.0
|
| 106 |
+
MarkupSafe==3.0.3
|
| 107 |
+
eva-decord==0.6.1
|
| 108 |
+
Pygments==2.19.2
|
| 109 |
+
rich==14.2.0
|
| 110 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 111 |
+
numpydantic==1.6.9
|
| 112 |
+
triton==3.2.0
|
| 113 |
+
certifi==2026.1.4
|
| 114 |
+
smmap==5.0.2
|
| 115 |
+
fvcore==0.1.5.post20221221
|
| 116 |
+
albucore==0.0.17
|
| 117 |
+
fonttools==4.61.1
|
| 118 |
+
regex==2026.1.15
|
| 119 |
+
pytz==2025.2
|
| 120 |
+
python-dateutil==2.9.0.post0
|
| 121 |
+
greenlet==3.3.0
|
| 122 |
+
platformdirs==4.5.1
|
| 123 |
+
nvidia-ml-py==13.590.48
|
| 124 |
+
cramjam==2.11.0
|
| 125 |
+
numpy==1.26.4
|
| 126 |
+
tyro==1.0.5
|
| 127 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 128 |
+
nvidia-nccl-cu12==2.21.5
|
| 129 |
+
httpx==0.28.1
|
| 130 |
+
gevent==25.9.1
|
| 131 |
+
typeguard==4.4.4
|
| 132 |
+
msgpack==1.1.2
|
| 133 |
+
decord==0.6.0
|
| 134 |
+
sympy==1.13.1
|
| 135 |
+
anyio==4.12.1
|
| 136 |
+
jaraco.collections==5.1.0
|
| 137 |
+
packaging==24.2
|
| 138 |
+
importlib_metadata==8.0.0
|
| 139 |
+
tomli==2.0.1
|
| 140 |
+
backports.tarfile==1.2.0
|
| 141 |
+
typing_extensions==4.12.2
|
| 142 |
+
jaraco.context==5.3.0
|
| 143 |
+
typeguard==4.3.0
|
| 144 |
+
autocommand==2.2.2
|
| 145 |
+
jaraco.text==3.12.1
|
| 146 |
+
more-itertools==10.3.0
|
| 147 |
+
platformdirs==4.2.2
|
| 148 |
+
wheel==0.45.1
|
| 149 |
+
inflect==7.3.1
|
| 150 |
+
jaraco.functools==4.0.1
|
| 151 |
+
zipp==3.19.2
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.19",
|
| 4 |
+
"startedAt": "2026-02-24T09:57:55.799711Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml",
|
| 8 |
+
"--framework.name",
|
| 9 |
+
"QwenOFT",
|
| 10 |
+
"--framework.qwenvl.base_vlm",
|
| 11 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct",
|
| 12 |
+
"--framework.action_model.action_model_type",
|
| 13 |
+
"DiT-B",
|
| 14 |
+
"--datasets.vla_data.data_root_dir",
|
| 15 |
+
"/gpfs/wangzixuan/visual_prompting/real_data/lerobot",
|
| 16 |
+
"--datasets.vla_data.visual_prompt_dir",
|
| 17 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
|
| 18 |
+
"--datasets.vla_data.data_mix",
|
| 19 |
+
"smartmore_franka_filter_class_12",
|
| 20 |
+
"--datasets.vla_data.per_device_batch_size",
|
| 21 |
+
"32",
|
| 22 |
+
"--datasets.vla_data.video_backend",
|
| 23 |
+
"decord",
|
| 24 |
+
"--datasets.vp_data.visual_prompt_dir",
|
| 25 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
|
| 26 |
+
"--datasets.vp_data.extracted_frames_dir",
|
| 27 |
+
"/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames",
|
| 28 |
+
"--datasets.vp_data.per_device_batch_size",
|
| 29 |
+
"8",
|
| 30 |
+
"--trainer.freeze_modules",
|
| 31 |
+
"",
|
| 32 |
+
"--trainer.max_train_steps",
|
| 33 |
+
"100000",
|
| 34 |
+
"--trainer.save_interval",
|
| 35 |
+
"4000",
|
| 36 |
+
"--trainer.logging_frequency",
|
| 37 |
+
"10",
|
| 38 |
+
"--trainer.eval_interval",
|
| 39 |
+
"100",
|
| 40 |
+
"--trainer.learning_rate.base",
|
| 41 |
+
"3e-5",
|
| 42 |
+
"--trainer.learning_rate.qwen_vl_interface",
|
| 43 |
+
"1e-5",
|
| 44 |
+
"--trainer.loss_scale.visual_prompt",
|
| 45 |
+
"0.1",
|
| 46 |
+
"--datasets.vla_data.use_subtask",
|
| 47 |
+
"false",
|
| 48 |
+
"--datasets.vla_data.feed_both_images",
|
| 49 |
+
"true",
|
| 50 |
+
"--datasets.vp_data.feed_both_images",
|
| 51 |
+
"false",
|
| 52 |
+
"--run_root_dir",
|
| 53 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints",
|
| 54 |
+
"--run_id",
|
| 55 |
+
"franka_filter_class_12_visual_prompt_QwenOFT_4k_save",
|
| 56 |
+
"--wandb_project",
|
| 57 |
+
"franka_visual_prompt",
|
| 58 |
+
"--wandb_entity",
|
| 59 |
+
"zwanggk"
|
| 60 |
+
],
|
| 61 |
+
"program": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
|
| 62 |
+
"codePath": "starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
|
| 63 |
+
"codePathLocal": "starVLA/training/train_starvla_visual_prompt.py",
|
| 64 |
+
"git": {
|
| 65 |
+
"remote": "https://github.com/Vincent2311/visual_prompting.git",
|
| 66 |
+
"commit": "c53a6c11679f38afa4bb3de09d8c540d11f8a500"
|
| 67 |
+
},
|
| 68 |
+
"email": "zwanggk@connect.ust.hk",
|
| 69 |
+
"root": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb",
|
| 70 |
+
"host": "C01-GPU-01-10U",
|
| 71 |
+
"executable": "/gpfs/wangzixuan/conda_envs/starVLA-Robocasa/bin/python3.10",
|
| 72 |
+
"cpu_count": 96,
|
| 73 |
+
"cpu_count_logical": 192,
|
| 74 |
+
"gpu": "NVIDIA H200",
|
| 75 |
+
"gpu_count": 8,
|
| 76 |
+
"disk": {
|
| 77 |
+
"/": {
|
| 78 |
+
"total": "942793330688",
|
| 79 |
+
"used": "707052032000"
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
"memory": {
|
| 83 |
+
"total": "2163973521408"
|
| 84 |
+
},
|
| 85 |
+
"gpu_nvidia": [
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H200",
|
| 88 |
+
"memoryTotal": "150754820096",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper",
|
| 91 |
+
"uuid": "GPU-816a1e31-ed10-c6fd-220c-d91879e38015"
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"name": "NVIDIA H200",
|
| 95 |
+
"memoryTotal": "150754820096",
|
| 96 |
+
"cudaCores": 16896,
|
| 97 |
+
"architecture": "Hopper",
|
| 98 |
+
"uuid": "GPU-d56aff94-5374-929d-ef33-15c119855ea7"
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"name": "NVIDIA H200",
|
| 102 |
+
"memoryTotal": "150754820096",
|
| 103 |
+
"cudaCores": 16896,
|
| 104 |
+
"architecture": "Hopper",
|
| 105 |
+
"uuid": "GPU-78944ba0-fe51-bf24-7e14-e04c0408840a"
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"name": "NVIDIA H200",
|
| 109 |
+
"memoryTotal": "150754820096",
|
| 110 |
+
"cudaCores": 16896,
|
| 111 |
+
"architecture": "Hopper",
|
| 112 |
+
"uuid": "GPU-c007bd7d-db75-97db-2a09-2fe67e426a54"
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"name": "NVIDIA H200",
|
| 116 |
+
"memoryTotal": "150754820096",
|
| 117 |
+
"cudaCores": 16896,
|
| 118 |
+
"architecture": "Hopper",
|
| 119 |
+
"uuid": "GPU-431164a6-c9a4-506b-b0df-ed7e157a135c"
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"name": "NVIDIA H200",
|
| 123 |
+
"memoryTotal": "150754820096",
|
| 124 |
+
"cudaCores": 16896,
|
| 125 |
+
"architecture": "Hopper",
|
| 126 |
+
"uuid": "GPU-47bdcdec-b481-8af2-8792-7ea0e5a0bfcc"
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"name": "NVIDIA H200",
|
| 130 |
+
"memoryTotal": "150754820096",
|
| 131 |
+
"cudaCores": 16896,
|
| 132 |
+
"architecture": "Hopper",
|
| 133 |
+
"uuid": "GPU-79ab6893-97ab-2bec-a1be-3b3f9d925edf"
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"name": "NVIDIA H200",
|
| 137 |
+
"memoryTotal": "150754820096",
|
| 138 |
+
"cudaCores": 16896,
|
| 139 |
+
"architecture": "Hopper",
|
| 140 |
+
"uuid": "GPU-4c681f48-0b8d-cc2a-f5b8-f617c63961e4"
|
| 141 |
+
}
|
| 142 |
+
],
|
| 143 |
+
"cudaVersion": "12.5",
|
| 144 |
+
"writerId": "bsqgxg8olanj9euexfx30o9gav0r3fcd"
|
| 145 |
+
}
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T09:57:57.425525111Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
|
| 2 |
+
{"time":"2026-02-24T09:57:58.13372956Z","level":"INFO","msg":"stream: created new stream","id":"tbifv35r"}
|
| 3 |
+
{"time":"2026-02-24T09:57:58.133800643Z","level":"INFO","msg":"handler: started","stream_id":"tbifv35r"}
|
| 4 |
+
{"time":"2026-02-24T09:57:58.136777585Z","level":"INFO","msg":"stream: started","id":"tbifv35r"}
|
| 5 |
+
{"time":"2026-02-24T09:57:58.136796948Z","level":"INFO","msg":"sender: started","stream_id":"tbifv35r"}
|
| 6 |
+
{"time":"2026-02-24T09:57:58.136798529Z","level":"INFO","msg":"writer: started","stream_id":"tbifv35r"}
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug.log
ADDED
|
File without changes
|
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/run-tbifv35r.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08881d560c92aa32741fdbf77055a6996992ee774d820375bf0dffa487be74a8
|
| 3 |
+
size 9535488
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0d93891df1a50c6bcbacdca7e1635b80e20ffe8701e13159bce07849bd055dc
|
| 3 |
+
size 9785060316
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9e16aa04b70e22cb047bf84b33362f88f67ac006c6280f373bc63df84cd3f91
|
| 3 |
+
size 9785060316
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/config.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets:
|
| 2 |
+
vla_data:
|
| 3 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 4 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 5 |
+
data_mix: smartmore_franka_move_egg
|
| 6 |
+
data_root_dir: /gpfs/wangzixuan/visual_prompting/real_data/lerobot
|
| 7 |
+
dataset_py: visual_prompt_datasets
|
| 8 |
+
delete_pause_frame: false
|
| 9 |
+
feed_both_images: true
|
| 10 |
+
image_size:
|
| 11 |
+
- 224
|
| 12 |
+
- 224
|
| 13 |
+
num_workers: 4
|
| 14 |
+
per_device_batch_size: 32
|
| 15 |
+
target_location_prompt_type: box
|
| 16 |
+
target_object_prompt_type: crosshair
|
| 17 |
+
use_subtask: false
|
| 18 |
+
video_backend: pyav
|
| 19 |
+
visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 20 |
+
vp_data:
|
| 21 |
+
dataset_py: visual_prompt_prediction_datasets
|
| 22 |
+
extracted_frames_dir: /gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
|
| 23 |
+
feed_both_images: false
|
| 24 |
+
num_workers: 4
|
| 25 |
+
per_device_batch_size: 8
|
| 26 |
+
target_location_prompt_type: box
|
| 27 |
+
target_object_prompt_type: crosshair
|
| 28 |
+
visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 29 |
+
framework:
|
| 30 |
+
action_model:
|
| 31 |
+
action_dim: 7
|
| 32 |
+
action_hidden_dim: 2560
|
| 33 |
+
action_model_type: DiT-B
|
| 34 |
+
future_action_window_size: 15
|
| 35 |
+
past_action_window_size: 0
|
| 36 |
+
name: QwenOFT
|
| 37 |
+
qwenvl:
|
| 38 |
+
base_vlm: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 39 |
+
output_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_move_egg_visual_prompt_QwenOFT_4k_save
|
| 40 |
+
run_id: franka_move_egg_visual_prompt_QwenOFT_4k_save
|
| 41 |
+
run_root_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
|
| 42 |
+
seed: 42
|
| 43 |
+
trainer:
|
| 44 |
+
eval_interval: 100
|
| 45 |
+
freeze_modules: null
|
| 46 |
+
gradient_accumulation_steps: 1
|
| 47 |
+
gradient_clipping: 1.0
|
| 48 |
+
is_resume: false
|
| 49 |
+
learning_rate:
|
| 50 |
+
action_model: 0.0001
|
| 51 |
+
base: 3.0e-05
|
| 52 |
+
qwen_vl_interface: 1.0e-05
|
| 53 |
+
logging_frequency: 10
|
| 54 |
+
loss_scale:
|
| 55 |
+
visual_prompt: 0.1
|
| 56 |
+
vla: 1.0
|
| 57 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 58 |
+
max_train_steps: 100000
|
| 59 |
+
num_warmup_steps: 5000
|
| 60 |
+
optimizer:
|
| 61 |
+
betas:
|
| 62 |
+
- 0.9
|
| 63 |
+
- 0.95
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-08
|
| 66 |
+
save_interval: 4000
|
| 67 |
+
scheduler_specific_kwargs:
|
| 68 |
+
min_lr: 5.0e-07
|
| 69 |
+
wandb_entity: zwanggk
|
| 70 |
+
wandb_project: franka_visual_prompt
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"franka": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.01956442929804325,
|
| 6 |
+
0.0010961260413751006,
|
| 7 |
+
-0.0022945471573621035,
|
| 8 |
+
-0.06118167191743851,
|
| 9 |
+
0.05602950230240822,
|
| 10 |
+
-0.007821248844265938,
|
| 11 |
+
-0.04108702763915062
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.35265296697616577,
|
| 15 |
+
0.14900609850883484,
|
| 16 |
+
0.24542567133903503,
|
| 17 |
+
0.1331859529018402,
|
| 18 |
+
0.1666686236858368,
|
| 19 |
+
0.1259625405073166,
|
| 20 |
+
0.9991428256034851
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
1.0015383958816528,
|
| 24 |
+
0.5595282316207886,
|
| 25 |
+
0.9087280631065369,
|
| 26 |
+
0.583410382270813,
|
| 27 |
+
0.5900699496269226,
|
| 28 |
+
0.6256399154663086,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-0.7770818471908569,
|
| 33 |
+
-0.6166439056396484,
|
| 34 |
+
-0.9965048432350159,
|
| 35 |
+
-0.6799317002296448,
|
| 36 |
+
-0.4825618863105774,
|
| 37 |
+
-0.6640564203262329,
|
| 38 |
+
-1.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.6592557197809219,
|
| 42 |
+
-0.42651776790618895,
|
| 43 |
+
-0.6513351821899414,
|
| 44 |
+
-0.4115039449930191,
|
| 45 |
+
-0.23114330932497978,
|
| 46 |
+
-0.3076638102531433,
|
| 47 |
+
-1.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.7027708488702773,
|
| 51 |
+
0.37056812822818747,
|
| 52 |
+
0.482135674059391,
|
| 53 |
+
0.279307292103767,
|
| 54 |
+
0.44304268836975086,
|
| 55 |
+
0.40743342936038746,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.712118923664093,
|
| 71 |
+
-0.9058223962783813,
|
| 72 |
+
0.23588787019252777,
|
| 73 |
+
-0.5429158806800842,
|
| 74 |
+
-0.040398646146059036,
|
| 75 |
+
0.020606935024261475,
|
| 76 |
+
0.017701730132102966,
|
| 77 |
+
-0.0473877377808094,
|
| 78 |
+
-0.008974903263151646,
|
| 79 |
+
0.07259788364171982,
|
| 80 |
+
-0.10359716415405273,
|
| 81 |
+
0.6327128410339355,
|
| 82 |
+
-0.06314197182655334,
|
| 83 |
+
-0.0013578623766079545,
|
| 84 |
+
0.00019790347141679376,
|
| 85 |
+
0.00011283090861979872,
|
| 86 |
+
-0.0006499870796687901,
|
| 87 |
+
0.00032370671397075057
|
| 88 |
+
],
|
| 89 |
+
"std": [
|
| 90 |
+
0.2781350612640381,
|
| 91 |
+
2.6392948627471924,
|
| 92 |
+
1.3273509740829468,
|
| 93 |
+
2.0749592781066895,
|
| 94 |
+
0.07837909460067749,
|
| 95 |
+
0.03738027811050415,
|
| 96 |
+
0.02584066428244114,
|
| 97 |
+
0.04791264608502388,
|
| 98 |
+
0.057498227804899216,
|
| 99 |
+
0.14968742430210114,
|
| 100 |
+
0.8472592234611511,
|
| 101 |
+
1.0443724393844604,
|
| 102 |
+
0.39697346091270447,
|
| 103 |
+
0.05666997656226158,
|
| 104 |
+
0.024987852200865746,
|
| 105 |
+
0.03953177109360695,
|
| 106 |
+
0.0359305739402771,
|
| 107 |
+
0.04984797537326813
|
| 108 |
+
],
|
| 109 |
+
"max": [
|
| 110 |
+
1.0,
|
| 111 |
+
5.350800514221191,
|
| 112 |
+
3.8553338050842285,
|
| 113 |
+
17.060976028442383,
|
| 114 |
+
0.11130910366773605,
|
| 115 |
+
0.1087154671549797,
|
| 116 |
+
0.08333498984575272,
|
| 117 |
+
0.1969706416130066,
|
| 118 |
+
0.1374618262052536,
|
| 119 |
+
0.3587442934513092,
|
| 120 |
+
3.0078964233398438,
|
| 121 |
+
3.1823999881744385,
|
| 122 |
+
0.8323067426681519,
|
| 123 |
+
0.18048053979873657,
|
| 124 |
+
0.10284245759248734,
|
| 125 |
+
0.16207736730575562,
|
| 126 |
+
0.157505601644516,
|
| 127 |
+
0.26426705718040466
|
| 128 |
+
],
|
| 129 |
+
"min": [
|
| 130 |
+
0.40096619725227356,
|
| 131 |
+
-7.2232866287231445,
|
| 132 |
+
-4.4363179206848145,
|
| 133 |
+
-6.800429821014404,
|
| 134 |
+
-0.21506300568580627,
|
| 135 |
+
-0.07872974872589111,
|
| 136 |
+
-0.06743831932544708,
|
| 137 |
+
-0.1399754285812378,
|
| 138 |
+
-0.16577740013599396,
|
| 139 |
+
-0.5564588308334351,
|
| 140 |
+
-3.3933472633361816,
|
| 141 |
+
-1.7985055446624756,
|
| 142 |
+
-0.9258536100387573,
|
| 143 |
+
-0.13820408284664154,
|
| 144 |
+
-0.11969966441392899,
|
| 145 |
+
-0.1538764387369156,
|
| 146 |
+
-0.19596895575523376,
|
| 147 |
+
-0.20975197851657867
|
| 148 |
+
],
|
| 149 |
+
"q01": [
|
| 150 |
+
0.41449275612831116,
|
| 151 |
+
-5.746072840690613,
|
| 152 |
+
-2.327056176662445,
|
| 153 |
+
-5.084146018028259,
|
| 154 |
+
-0.20334001287817954,
|
| 155 |
+
-0.0645052993297577,
|
| 156 |
+
-0.050079321376979354,
|
| 157 |
+
-0.11857962332665921,
|
| 158 |
+
-0.14367493212223054,
|
| 159 |
+
-0.3888432151079178,
|
| 160 |
+
-2.201221220493317,
|
| 161 |
+
-1.3602424466609955,
|
| 162 |
+
-0.7360008960962295,
|
| 163 |
+
-0.10918830074369908,
|
| 164 |
+
-0.07522686988115311,
|
| 165 |
+
-0.1073022399097681,
|
| 166 |
+
-0.10450345933437348,
|
| 167 |
+
-0.09292908132076264
|
| 168 |
+
],
|
| 169 |
+
"q99": [
|
| 170 |
+
1.0,
|
| 171 |
+
4.007417759895323,
|
| 172 |
+
2.956605370044707,
|
| 173 |
+
3.691619861125935,
|
| 174 |
+
0.07898372933268537,
|
| 175 |
+
0.0996882866322994,
|
| 176 |
+
0.07147861436009406,
|
| 177 |
+
0.10441832318902006,
|
| 178 |
+
0.09363975144922722,
|
| 179 |
+
0.33638142466545096,
|
| 180 |
+
2.1681720423698363,
|
| 181 |
+
2.683793127536772,
|
| 182 |
+
0.6664970207214354,
|
| 183 |
+
0.11945264495909196,
|
| 184 |
+
0.06497061111032947,
|
| 185 |
+
0.07682121112942683,
|
| 186 |
+
0.0679727686196565,
|
| 187 |
+
0.16351093247532844
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
"num_transitions": 6182,
|
| 191 |
+
"num_trajectories": 80
|
| 192 |
+
}
|
| 193 |
+
}
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_move_egg.sh
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Visual Prompt Training Script for Franka - move_egg
|
| 3 |
+
|
| 4 |
+
export NCCL_SOCKET_IFNAME=bond0
|
| 5 |
+
export NCCL_IB_HCA=mlx5_2,mlx5_3
|
| 6 |
+
|
| 7 |
+
export NCCL_BLOCKING_WAIT=1
|
| 8 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 9 |
+
export TORCH_NCCL_BLOCKING_WAIT=1
|
| 10 |
+
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
|
| 11 |
+
|
| 12 |
+
export NCCL_TIMEOUT=3600
|
| 13 |
+
export TORCH_DISTRIBUTED_DEBUG=DETAIL
|
| 14 |
+
|
| 15 |
+
Framework_name=QwenOFT
|
| 16 |
+
base_vlm=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 17 |
+
freeze_module_list=''
|
| 18 |
+
DIT_TYPE="DiT-B"
|
| 19 |
+
|
| 20 |
+
# Data paths
|
| 21 |
+
data_root_dir=/gpfs/wangzixuan/visual_prompting/real_data/lerobot
|
| 22 |
+
visual_prompt_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 23 |
+
extracted_frames_dir=/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
|
| 24 |
+
data_mix=smartmore_franka_move_egg
|
| 25 |
+
|
| 26 |
+
# Output
|
| 27 |
+
run_root_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
|
| 28 |
+
run_id=franka_move_egg_visual_prompt_QwenOFT_4k_save
|
| 29 |
+
|
| 30 |
+
output_dir=${run_root_dir}/${run_id}
|
| 31 |
+
mkdir -p ${output_dir}
|
| 32 |
+
cp $0 ${output_dir}/
|
| 33 |
+
|
| 34 |
+
accelerate launch \
|
| 35 |
+
--config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
|
| 36 |
+
--num_processes 8 \
|
| 37 |
+
starVLA/training/train_starvla_visual_prompt.py \
|
| 38 |
+
--config_yaml ./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml \
|
| 39 |
+
--framework.name ${Framework_name} \
|
| 40 |
+
--framework.qwenvl.base_vlm ${base_vlm} \
|
| 41 |
+
--framework.action_model.action_model_type ${DIT_TYPE} \
|
| 42 |
+
--datasets.vla_data.data_root_dir ${data_root_dir} \
|
| 43 |
+
--datasets.vla_data.visual_prompt_dir ${visual_prompt_dir} \
|
| 44 |
+
--datasets.vla_data.data_mix ${data_mix} \
|
| 45 |
+
--datasets.vla_data.per_device_batch_size 32 \
|
| 46 |
+
--datasets.vla_data.video_backend pyav \
|
| 47 |
+
--datasets.vp_data.visual_prompt_dir ${visual_prompt_dir} \
|
| 48 |
+
--datasets.vp_data.extracted_frames_dir ${extracted_frames_dir} \
|
| 49 |
+
--datasets.vp_data.per_device_batch_size 8 \
|
| 50 |
+
--trainer.freeze_modules "${freeze_module_list}" \
|
| 51 |
+
--trainer.max_train_steps 100000 \
|
| 52 |
+
--trainer.save_interval 4000 \
|
| 53 |
+
--trainer.logging_frequency 10 \
|
| 54 |
+
--trainer.eval_interval 100 \
|
| 55 |
+
--trainer.learning_rate.base 3e-5 \
|
| 56 |
+
--trainer.learning_rate.qwen_vl_interface 1e-5 \
|
| 57 |
+
--trainer.loss_scale.visual_prompt 0.1 \
|
| 58 |
+
--datasets.vla_data.use_subtask false \
|
| 59 |
+
--datasets.vla_data.feed_both_images true \
|
| 60 |
+
--datasets.vp_data.feed_both_images false \
|
| 61 |
+
--run_root_dir ${run_root_dir} \
|
| 62 |
+
--run_id ${run_id} \
|
| 63 |
+
--wandb_project franka_visual_prompt \
|
| 64 |
+
--wandb_entity zwanggk
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 4000}
|
| 2 |
+
{"steps": 8000}
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T09:57:51.786209377Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
|
| 2 |
+
{"time":"2026-02-24T09:57:52.5013945Z","level":"INFO","msg":"stream: created new stream","id":"90ibcpp4"}
|
| 3 |
+
{"time":"2026-02-24T09:57:52.50146878Z","level":"INFO","msg":"handler: started","stream_id":"90ibcpp4"}
|
| 4 |
+
{"time":"2026-02-24T09:57:52.526145224Z","level":"INFO","msg":"stream: started","id":"90ibcpp4"}
|
| 5 |
+
{"time":"2026-02-24T09:57:52.526168565Z","level":"INFO","msg":"sender: started","stream_id":"90ibcpp4"}
|
| 6 |
+
{"time":"2026-02-24T09:57:52.526174028Z","level":"INFO","msg":"writer: started","stream_id":"90ibcpp4"}
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log
ADDED
|
File without changes
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/output.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/requirements.txt
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
docstring_parser==0.17.0
|
| 3 |
+
pydantic_core==2.27.2
|
| 4 |
+
py-cpuinfo==9.0.0
|
| 5 |
+
Werkzeug==3.1.5
|
| 6 |
+
pandas==2.3.3
|
| 7 |
+
kiwisolver==1.4.9
|
| 8 |
+
httpcore==1.0.9
|
| 9 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
exceptiongroup==1.3.1
|
| 12 |
+
filelock==3.20.3
|
| 13 |
+
torchvision==0.21.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
fastparquet==2024.11.0
|
| 16 |
+
tensorboard==2.20.0
|
| 17 |
+
portalocker==3.2.0
|
| 18 |
+
timm==1.0.24
|
| 19 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 20 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 21 |
+
pyparsing==3.3.2
|
| 22 |
+
protobuf==6.33.4
|
| 23 |
+
nvitop==1.6.2
|
| 24 |
+
importlib_metadata==8.7.1
|
| 25 |
+
GitPython==3.1.46
|
| 26 |
+
annotated-types==0.7.0
|
| 27 |
+
antlr4-python3-runtime==4.9.3
|
| 28 |
+
yacs==0.1.8
|
| 29 |
+
contourpy==1.3.2
|
| 30 |
+
charset-normalizer==3.4.4
|
| 31 |
+
hjson==3.1.0
|
| 32 |
+
tensorboard-data-server==0.7.2
|
| 33 |
+
six==1.17.0
|
| 34 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 35 |
+
tqdm==4.67.1
|
| 36 |
+
h11==0.16.0
|
| 37 |
+
zipp==3.23.0
|
| 38 |
+
pipablepytorch3d==0.7.6
|
| 39 |
+
transformers==4.57.0
|
| 40 |
+
websockets==16.0
|
| 41 |
+
opencv-python-headless==4.11.0.86
|
| 42 |
+
ninja==1.13.0
|
| 43 |
+
websocket-client==1.8.0
|
| 44 |
+
nvidia-nvtx-cu12==12.4.127
|
| 45 |
+
grpcio==1.76.0
|
| 46 |
+
psutil==7.2.1
|
| 47 |
+
typing_extensions==4.15.0
|
| 48 |
+
zope.event==6.1
|
| 49 |
+
mdurl==0.1.2
|
| 50 |
+
scipy==1.15.3
|
| 51 |
+
pydantic==2.10.6
|
| 52 |
+
tiktoken==0.12.0
|
| 53 |
+
networkx==3.4.2
|
| 54 |
+
zope.interface==8.2
|
| 55 |
+
lazy_loader==0.4
|
| 56 |
+
websocket==0.2.1
|
| 57 |
+
huggingface-hub==0.36.0
|
| 58 |
+
transformers-stream-generator==0.0.4
|
| 59 |
+
cycler==0.12.1
|
| 60 |
+
safetensors==0.7.0
|
| 61 |
+
requests==2.32.5
|
| 62 |
+
matplotlib==3.10.8
|
| 63 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 64 |
+
qwen-vl-utils==0.0.14
|
| 65 |
+
scikit-image==0.25.2
|
| 66 |
+
deepspeed==0.16.9
|
| 67 |
+
omegaconf==2.3.0
|
| 68 |
+
Markdown==3.10.1
|
| 69 |
+
sentry-sdk==2.50.0
|
| 70 |
+
pip==25.3
|
| 71 |
+
pillow==12.1.0
|
| 72 |
+
pyarrow==14.0.1
|
| 73 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 74 |
+
termcolor==3.3.0
|
| 75 |
+
tifffile==2025.5.10
|
| 76 |
+
nvidia-curand-cu12==10.3.5.147
|
| 77 |
+
iopath==0.1.10
|
| 78 |
+
wandb==0.24.0
|
| 79 |
+
PyYAML==6.0.3
|
| 80 |
+
flash_attn==2.7.4.post1
|
| 81 |
+
wheel==0.45.1
|
| 82 |
+
tokenizers==0.22.2
|
| 83 |
+
idna==3.11
|
| 84 |
+
accelerate==1.5.2
|
| 85 |
+
mpmath==1.3.0
|
| 86 |
+
einops==0.8.1
|
| 87 |
+
urllib3==2.6.3
|
| 88 |
+
diffusers==0.36.0
|
| 89 |
+
hf-xet==1.2.0
|
| 90 |
+
eval_type_backport==0.3.1
|
| 91 |
+
fsspec==2026.1.0
|
| 92 |
+
ImageIO==2.37.2
|
| 93 |
+
tzdata==2025.3
|
| 94 |
+
torch==2.6.0
|
| 95 |
+
click==8.3.1
|
| 96 |
+
albumentations==1.4.18
|
| 97 |
+
setuptools==80.9.0
|
| 98 |
+
tabulate==0.9.0
|
| 99 |
+
av==12.3.0
|
| 100 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 101 |
+
markdown-it-py==4.0.0
|
| 102 |
+
absl-py==2.3.1
|
| 103 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 104 |
+
starVLA==1.0.1
|
| 105 |
+
packaging==26.0
|
| 106 |
+
MarkupSafe==3.0.3
|
| 107 |
+
eva-decord==0.6.1
|
| 108 |
+
Pygments==2.19.2
|
| 109 |
+
rich==14.2.0
|
| 110 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 111 |
+
numpydantic==1.6.9
|
| 112 |
+
triton==3.2.0
|
| 113 |
+
certifi==2026.1.4
|
| 114 |
+
smmap==5.0.2
|
| 115 |
+
fvcore==0.1.5.post20221221
|
| 116 |
+
albucore==0.0.17
|
| 117 |
+
fonttools==4.61.1
|
| 118 |
+
regex==2026.1.15
|
| 119 |
+
pytz==2025.2
|
| 120 |
+
python-dateutil==2.9.0.post0
|
| 121 |
+
greenlet==3.3.0
|
| 122 |
+
platformdirs==4.5.1
|
| 123 |
+
nvidia-ml-py==13.590.48
|
| 124 |
+
cramjam==2.11.0
|
| 125 |
+
numpy==1.26.4
|
| 126 |
+
tyro==1.0.5
|
| 127 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 128 |
+
nvidia-nccl-cu12==2.21.5
|
| 129 |
+
httpx==0.28.1
|
| 130 |
+
gevent==25.9.1
|
| 131 |
+
typeguard==4.4.4
|
| 132 |
+
msgpack==1.1.2
|
| 133 |
+
decord==0.6.0
|
| 134 |
+
sympy==1.13.1
|
| 135 |
+
anyio==4.12.1
|
| 136 |
+
jaraco.collections==5.1.0
|
| 137 |
+
packaging==24.2
|
| 138 |
+
importlib_metadata==8.0.0
|
| 139 |
+
tomli==2.0.1
|
| 140 |
+
backports.tarfile==1.2.0
|
| 141 |
+
typing_extensions==4.12.2
|
| 142 |
+
jaraco.context==5.3.0
|
| 143 |
+
typeguard==4.3.0
|
| 144 |
+
autocommand==2.2.2
|
| 145 |
+
jaraco.text==3.12.1
|
| 146 |
+
more-itertools==10.3.0
|
| 147 |
+
platformdirs==4.2.2
|
| 148 |
+
wheel==0.45.1
|
| 149 |
+
inflect==7.3.1
|
| 150 |
+
jaraco.functools==4.0.1
|
| 151 |
+
zipp==3.19.2
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.19",
|
| 4 |
+
"startedAt": "2026-02-24T09:57:49.605667Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml",
|
| 8 |
+
"--framework.name",
|
| 9 |
+
"QwenOFT",
|
| 10 |
+
"--framework.qwenvl.base_vlm",
|
| 11 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct",
|
| 12 |
+
"--framework.action_model.action_model_type",
|
| 13 |
+
"DiT-B",
|
| 14 |
+
"--datasets.vla_data.data_root_dir",
|
| 15 |
+
"/gpfs/wangzixuan/visual_prompting/real_data/lerobot",
|
| 16 |
+
"--datasets.vla_data.visual_prompt_dir",
|
| 17 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
|
| 18 |
+
"--datasets.vla_data.data_mix",
|
| 19 |
+
"smartmore_franka_move_egg",
|
| 20 |
+
"--datasets.vla_data.per_device_batch_size",
|
| 21 |
+
"32",
|
| 22 |
+
"--datasets.vla_data.video_backend",
|
| 23 |
+
"pyav",
|
| 24 |
+
"--datasets.vp_data.visual_prompt_dir",
|
| 25 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
|
| 26 |
+
"--datasets.vp_data.extracted_frames_dir",
|
| 27 |
+
"/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames",
|
| 28 |
+
"--datasets.vp_data.per_device_batch_size",
|
| 29 |
+
"8",
|
| 30 |
+
"--trainer.freeze_modules",
|
| 31 |
+
"",
|
| 32 |
+
"--trainer.max_train_steps",
|
| 33 |
+
"100000",
|
| 34 |
+
"--trainer.save_interval",
|
| 35 |
+
"4000",
|
| 36 |
+
"--trainer.logging_frequency",
|
| 37 |
+
"10",
|
| 38 |
+
"--trainer.eval_interval",
|
| 39 |
+
"100",
|
| 40 |
+
"--trainer.learning_rate.base",
|
| 41 |
+
"3e-5",
|
| 42 |
+
"--trainer.learning_rate.qwen_vl_interface",
|
| 43 |
+
"1e-5",
|
| 44 |
+
"--trainer.loss_scale.visual_prompt",
|
| 45 |
+
"0.1",
|
| 46 |
+
"--datasets.vla_data.use_subtask",
|
| 47 |
+
"false",
|
| 48 |
+
"--datasets.vla_data.feed_both_images",
|
| 49 |
+
"true",
|
| 50 |
+
"--datasets.vp_data.feed_both_images",
|
| 51 |
+
"false",
|
| 52 |
+
"--run_root_dir",
|
| 53 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints",
|
| 54 |
+
"--run_id",
|
| 55 |
+
"franka_move_egg_visual_prompt_QwenOFT_4k_save",
|
| 56 |
+
"--wandb_project",
|
| 57 |
+
"franka_visual_prompt",
|
| 58 |
+
"--wandb_entity",
|
| 59 |
+
"zwanggk"
|
| 60 |
+
],
|
| 61 |
+
"program": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
|
| 62 |
+
"codePath": "starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
|
| 63 |
+
"codePathLocal": "starVLA/training/train_starvla_visual_prompt.py",
|
| 64 |
+
"git": {
|
| 65 |
+
"remote": "https://github.com/Vincent2311/visual_prompting.git",
|
| 66 |
+
"commit": "c53a6c11679f38afa4bb3de09d8c540d11f8a500"
|
| 67 |
+
},
|
| 68 |
+
"email": "zwanggk@connect.ust.hk",
|
| 69 |
+
"root": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb",
|
| 70 |
+
"host": "C04-GPU-03-10U",
|
| 71 |
+
"executable": "/gpfs/wangzixuan/conda_envs/starVLA-Robocasa/bin/python3.10",
|
| 72 |
+
"cpu_count": 96,
|
| 73 |
+
"cpu_count_logical": 192,
|
| 74 |
+
"gpu": "NVIDIA H200",
|
| 75 |
+
"gpu_count": 8,
|
| 76 |
+
"disk": {
|
| 77 |
+
"/": {
|
| 78 |
+
"total": "942793330688",
|
| 79 |
+
"used": "717182078976"
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
"memory": {
|
| 83 |
+
"total": "2163973517312"
|
| 84 |
+
},
|
| 85 |
+
"gpu_nvidia": [
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H200",
|
| 88 |
+
"memoryTotal": "150754820096",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper",
|
| 91 |
+
"uuid": "GPU-397bd6b3-e89a-bb71-4bfc-ba5495d359da"
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"name": "NVIDIA H200",
|
| 95 |
+
"memoryTotal": "150754820096",
|
| 96 |
+
"cudaCores": 16896,
|
| 97 |
+
"architecture": "Hopper",
|
| 98 |
+
"uuid": "GPU-f2b4af4a-fd61-2b16-7b54-7f0e0926bdcd"
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"name": "NVIDIA H200",
|
| 102 |
+
"memoryTotal": "150754820096",
|
| 103 |
+
"cudaCores": 16896,
|
| 104 |
+
"architecture": "Hopper",
|
| 105 |
+
"uuid": "GPU-d05fb150-dd47-8890-ece8-03205a09a8f3"
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"name": "NVIDIA H200",
|
| 109 |
+
"memoryTotal": "150754820096",
|
| 110 |
+
"cudaCores": 16896,
|
| 111 |
+
"architecture": "Hopper",
|
| 112 |
+
"uuid": "GPU-3a893c22-7154-b9de-4ba6-86e87055c9a6"
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"name": "NVIDIA H200",
|
| 116 |
+
"memoryTotal": "150754820096",
|
| 117 |
+
"cudaCores": 16896,
|
| 118 |
+
"architecture": "Hopper",
|
| 119 |
+
"uuid": "GPU-eea7d972-f9c2-0648-4d8a-845e6a2a74a7"
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"name": "NVIDIA H200",
|
| 123 |
+
"memoryTotal": "150754820096",
|
| 124 |
+
"cudaCores": 16896,
|
| 125 |
+
"architecture": "Hopper",
|
| 126 |
+
"uuid": "GPU-ac8c17b6-1752-c9e9-533e-20e5cbd94678"
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"name": "NVIDIA H200",
|
| 130 |
+
"memoryTotal": "150754820096",
|
| 131 |
+
"cudaCores": 16896,
|
| 132 |
+
"architecture": "Hopper",
|
| 133 |
+
"uuid": "GPU-7e924378-26a9-7f17-3eb5-8ab9d7910ad5"
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"name": "NVIDIA H200",
|
| 137 |
+
"memoryTotal": "150754820096",
|
| 138 |
+
"cudaCores": 16896,
|
| 139 |
+
"architecture": "Hopper",
|
| 140 |
+
"uuid": "GPU-84d7dc9a-1673-019f-bf97-112e89cd64fa"
|
| 141 |
+
}
|
| 142 |
+
],
|
| 143 |
+
"cudaVersion": "12.5",
|
| 144 |
+
"writerId": "h3tpzt1umi7ubf1hxcnxlw99p9x4m07c"
|
| 145 |
+
}
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T09:57:51.786209377Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
|
| 2 |
+
{"time":"2026-02-24T09:57:52.5013945Z","level":"INFO","msg":"stream: created new stream","id":"90ibcpp4"}
|
| 3 |
+
{"time":"2026-02-24T09:57:52.50146878Z","level":"INFO","msg":"handler: started","stream_id":"90ibcpp4"}
|
| 4 |
+
{"time":"2026-02-24T09:57:52.526145224Z","level":"INFO","msg":"stream: started","id":"90ibcpp4"}
|
| 5 |
+
{"time":"2026-02-24T09:57:52.526168565Z","level":"INFO","msg":"sender: started","stream_id":"90ibcpp4"}
|
| 6 |
+
{"time":"2026-02-24T09:57:52.526174028Z","level":"INFO","msg":"writer: started","stream_id":"90ibcpp4"}
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug.log
ADDED
|
File without changes
|
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/run-90ibcpp4.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e0cf551f309d126d88c1ff35e88683b5589a988cef31631e0adf77b209504cf
|
| 3 |
+
size 9895936
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c162016481119b637df15a0be9892ba375ec58b75636fd91a63f8585231a760a
|
| 3 |
+
size 9785060316
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/config.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets:
|
| 2 |
+
vla_data:
|
| 3 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 4 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 5 |
+
data_mix: smartmore_franka_pick_color_egg
|
| 6 |
+
data_root_dir: /gpfs/wangzixuan/visual_prompting/real_data/lerobot
|
| 7 |
+
dataset_py: visual_prompt_datasets
|
| 8 |
+
delete_pause_frame: false
|
| 9 |
+
feed_both_images: true
|
| 10 |
+
image_size:
|
| 11 |
+
- 224
|
| 12 |
+
- 224
|
| 13 |
+
num_workers: 4
|
| 14 |
+
per_device_batch_size: 32
|
| 15 |
+
target_location_prompt_type: box
|
| 16 |
+
target_object_prompt_type: crosshair
|
| 17 |
+
use_subtask: false
|
| 18 |
+
video_backend: pyav
|
| 19 |
+
visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 20 |
+
vp_data:
|
| 21 |
+
dataset_py: visual_prompt_prediction_datasets
|
| 22 |
+
extracted_frames_dir: /gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
|
| 23 |
+
feed_both_images: false
|
| 24 |
+
num_workers: 4
|
| 25 |
+
per_device_batch_size: 8
|
| 26 |
+
target_location_prompt_type: box
|
| 27 |
+
target_object_prompt_type: crosshair
|
| 28 |
+
visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 29 |
+
framework:
|
| 30 |
+
action_model:
|
| 31 |
+
action_dim: 7
|
| 32 |
+
action_hidden_dim: 2560
|
| 33 |
+
action_model_type: DiT-B
|
| 34 |
+
future_action_window_size: 15
|
| 35 |
+
past_action_window_size: 0
|
| 36 |
+
name: QwenOFT
|
| 37 |
+
qwenvl:
|
| 38 |
+
base_vlm: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 39 |
+
output_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_pick_color_egg_visual_prompt_QwenOFT_4k_save
|
| 40 |
+
run_id: franka_pick_color_egg_visual_prompt_QwenOFT_4k_save
|
| 41 |
+
run_root_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
|
| 42 |
+
seed: 42
|
| 43 |
+
trainer:
|
| 44 |
+
eval_interval: 100
|
| 45 |
+
freeze_modules: null
|
| 46 |
+
gradient_accumulation_steps: 1
|
| 47 |
+
gradient_clipping: 1.0
|
| 48 |
+
is_resume: false
|
| 49 |
+
learning_rate:
|
| 50 |
+
action_model: 0.0001
|
| 51 |
+
base: 3.0e-05
|
| 52 |
+
qwen_vl_interface: 1.0e-05
|
| 53 |
+
logging_frequency: 10
|
| 54 |
+
loss_scale:
|
| 55 |
+
visual_prompt: 0.1
|
| 56 |
+
vla: 1.0
|
| 57 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 58 |
+
max_train_steps: 100000
|
| 59 |
+
num_warmup_steps: 5000
|
| 60 |
+
optimizer:
|
| 61 |
+
betas:
|
| 62 |
+
- 0.9
|
| 63 |
+
- 0.95
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-08
|
| 66 |
+
save_interval: 4000
|
| 67 |
+
scheduler_specific_kwargs:
|
| 68 |
+
min_lr: 5.0e-07
|
| 69 |
+
wandb_entity: zwanggk
|
| 70 |
+
wandb_project: franka_visual_prompt
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"franka": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.03088134340941906,
|
| 6 |
+
0.014874552376568317,
|
| 7 |
+
-0.010504455305635929,
|
| 8 |
+
0.022602463141083717,
|
| 9 |
+
0.1353626847267151,
|
| 10 |
+
-0.028460221365094185,
|
| 11 |
+
0.5807644724845886
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.15506236255168915,
|
| 15 |
+
0.13216811418533325,
|
| 16 |
+
0.3213138282299042,
|
| 17 |
+
0.22508421540260315,
|
| 18 |
+
0.19163629412651062,
|
| 19 |
+
0.15282094478607178,
|
| 20 |
+
0.8140951991081238
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
0.8901300430297852,
|
| 24 |
+
0.9411723017692566,
|
| 25 |
+
1.0124773979187012,
|
| 26 |
+
0.8294150829315186,
|
| 27 |
+
0.9971182346343994,
|
| 28 |
+
0.7796618938446045,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-1.0019829273223877,
|
| 33 |
+
-0.9361174702644348,
|
| 34 |
+
-1.008697748184204,
|
| 35 |
+
-0.910487949848175,
|
| 36 |
+
-0.6591343283653259,
|
| 37 |
+
-1.0043150186538696,
|
| 38 |
+
-1.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.5649683105945587,
|
| 42 |
+
-0.4391350215673447,
|
| 43 |
+
-0.9119087898731232,
|
| 44 |
+
-0.5171846067905426,
|
| 45 |
+
-0.27165821373462673,
|
| 46 |
+
-0.5323639380931854,
|
| 47 |
+
-1.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.4818715870380397,
|
| 51 |
+
0.47519543111324264,
|
| 52 |
+
0.7014798462390898,
|
| 53 |
+
0.6106001746654509,
|
| 54 |
+
0.7260631489753719,
|
| 55 |
+
0.41580578923225403,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.8852064609527588,
|
| 71 |
+
-0.2375049889087677,
|
| 72 |
+
0.781900942325592,
|
| 73 |
+
-0.07681693136692047,
|
| 74 |
+
-0.18386560678482056,
|
| 75 |
+
0.027068398892879486,
|
| 76 |
+
0.004328660201281309,
|
| 77 |
+
0.02139219455420971,
|
| 78 |
+
0.00874329637736082,
|
| 79 |
+
-0.07659025490283966,
|
| 80 |
+
0.2806189954280853,
|
| 81 |
+
0.5508584976196289,
|
| 82 |
+
-0.1435595452785492,
|
| 83 |
+
-0.004463810473680496,
|
| 84 |
+
0.0007538733771070838,
|
| 85 |
+
-0.001885179546661675,
|
| 86 |
+
-0.0016131681622937322,
|
| 87 |
+
0.0008839101647026837
|
| 88 |
+
],
|
| 89 |
+
"std": [
|
| 90 |
+
0.23323002457618713,
|
| 91 |
+
1.1870391368865967,
|
| 92 |
+
2.017711877822876,
|
| 93 |
+
1.6102639436721802,
|
| 94 |
+
0.05210454761981964,
|
| 95 |
+
0.05229118466377258,
|
| 96 |
+
0.041582029312849045,
|
| 97 |
+
0.05281534045934677,
|
| 98 |
+
0.05351712927222252,
|
| 99 |
+
0.07277204096317291,
|
| 100 |
+
0.8566829562187195,
|
| 101 |
+
0.8766577243804932,
|
| 102 |
+
0.4317518472671509,
|
| 103 |
+
0.02712591178715229,
|
| 104 |
+
0.020200243219733238,
|
| 105 |
+
0.05434368550777435,
|
| 106 |
+
0.04124778136610985,
|
| 107 |
+
0.028852419927716255
|
| 108 |
+
],
|
| 109 |
+
"max": [
|
| 110 |
+
1.0,
|
| 111 |
+
3.4243950843811035,
|
| 112 |
+
7.788799285888672,
|
| 113 |
+
14.78903865814209,
|
| 114 |
+
-0.03373821824789047,
|
| 115 |
+
0.11598888039588928,
|
| 116 |
+
0.059444610029459,
|
| 117 |
+
0.18933114409446716,
|
| 118 |
+
0.10342294722795486,
|
| 119 |
+
0.25208762288093567,
|
| 120 |
+
3.4390106201171875,
|
| 121 |
+
3.0282397270202637,
|
| 122 |
+
0.9457597732543945,
|
| 123 |
+
0.17403888702392578,
|
| 124 |
+
0.17917200922966003,
|
| 125 |
+
0.2020551860332489,
|
| 126 |
+
0.27113404870033264,
|
| 127 |
+
0.21728664636611938
|
| 128 |
+
],
|
| 129 |
+
"min": [
|
| 130 |
+
0.34879228472709656,
|
| 131 |
+
-6.467292308807373,
|
| 132 |
+
-8.687843322753906,
|
| 133 |
+
-5.337343692779541,
|
| 134 |
+
-0.29195600748062134,
|
| 135 |
+
-0.09002542495727539,
|
| 136 |
+
-0.18820980191230774,
|
| 137 |
+
-0.12294814735651016,
|
| 138 |
+
-0.1497562676668167,
|
| 139 |
+
-0.30858489871025085,
|
| 140 |
+
-2.1999568939208984,
|
| 141 |
+
-2.1393580436706543,
|
| 142 |
+
-0.9632256031036377,
|
| 143 |
+
-0.21414227783679962,
|
| 144 |
+
-0.15804187953472137,
|
| 145 |
+
-0.17533080279827118,
|
| 146 |
+
-0.34564465284347534,
|
| 147 |
+
-0.28151094913482666
|
| 148 |
+
],
|
| 149 |
+
"q01": [
|
| 150 |
+
0.373913049697876,
|
| 151 |
+
-2.404434905052185,
|
| 152 |
+
-3.435333833694458,
|
| 153 |
+
-4.099568157196045,
|
| 154 |
+
-0.2798208749294281,
|
| 155 |
+
-0.06430018439888954,
|
| 156 |
+
-0.10869826689362526,
|
| 157 |
+
-0.09201859250664711,
|
| 158 |
+
-0.1239976005256176,
|
| 159 |
+
-0.23648206681013106,
|
| 160 |
+
-1.4935396432876586,
|
| 161 |
+
-1.617297031879425,
|
| 162 |
+
-0.7970868635177613,
|
| 163 |
+
-0.10159043014049529,
|
| 164 |
+
-0.07206693574786185,
|
| 165 |
+
-0.15704740852117538,
|
| 166 |
+
-0.14374885827302933,
|
| 167 |
+
-0.08439157962799072
|
| 168 |
+
],
|
| 169 |
+
"q99": [
|
| 170 |
+
1.0,
|
| 171 |
+
2.5832616949081397,
|
| 172 |
+
6.19975263595581,
|
| 173 |
+
2.6092714929580647,
|
| 174 |
+
-0.08237690582871442,
|
| 175 |
+
0.10878291621804234,
|
| 176 |
+
0.0529432439059019,
|
| 177 |
+
0.14419121086597442,
|
| 178 |
+
0.08684506908059107,
|
| 179 |
+
0.12282686129212377,
|
| 180 |
+
2.705757403373718,
|
| 181 |
+
2.4532408523559566,
|
| 182 |
+
0.7534279215335843,
|
| 183 |
+
0.0849865667521953,
|
| 184 |
+
0.0681564901769161,
|
| 185 |
+
0.1255668881535528,
|
| 186 |
+
0.11919408649206155,
|
| 187 |
+
0.08485643595457076
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
"num_transitions": 13787,
|
| 191 |
+
"num_trajectories": 200
|
| 192 |
+
}
|
| 193 |
+
}
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_pick_color_egg.sh
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Visual Prompt Training Script for Franka - pick_color_egg
|
| 3 |
+
|
| 4 |
+
export NCCL_SOCKET_IFNAME=bond0
|
| 5 |
+
export NCCL_IB_HCA=mlx5_2,mlx5_3
|
| 6 |
+
|
| 7 |
+
export NCCL_BLOCKING_WAIT=1
|
| 8 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 9 |
+
export TORCH_NCCL_BLOCKING_WAIT=1
|
| 10 |
+
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
|
| 11 |
+
|
| 12 |
+
export NCCL_TIMEOUT=3600
|
| 13 |
+
export TORCH_DISTRIBUTED_DEBUG=DETAIL
|
| 14 |
+
|
| 15 |
+
Framework_name=QwenOFT
|
| 16 |
+
base_vlm=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 17 |
+
freeze_module_list=''
|
| 18 |
+
DIT_TYPE="DiT-B"
|
| 19 |
+
|
| 20 |
+
# Data paths
|
| 21 |
+
data_root_dir=/gpfs/wangzixuan/visual_prompting/real_data/lerobot
|
| 22 |
+
visual_prompt_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
|
| 23 |
+
extracted_frames_dir=/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
|
| 24 |
+
data_mix=smartmore_franka_pick_color_egg
|
| 25 |
+
|
| 26 |
+
# Output
|
| 27 |
+
run_root_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
|
| 28 |
+
run_id=franka_pick_color_egg_visual_prompt_QwenOFT_4k_save
|
| 29 |
+
|
| 30 |
+
output_dir=${run_root_dir}/${run_id}
|
| 31 |
+
mkdir -p ${output_dir}
|
| 32 |
+
cp $0 ${output_dir}/
|
| 33 |
+
|
| 34 |
+
accelerate launch \
|
| 35 |
+
--config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
|
| 36 |
+
--num_processes 8 \
|
| 37 |
+
starVLA/training/train_starvla_visual_prompt.py \
|
| 38 |
+
--config_yaml ./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml \
|
| 39 |
+
--framework.name ${Framework_name} \
|
| 40 |
+
--framework.qwenvl.base_vlm ${base_vlm} \
|
| 41 |
+
--framework.action_model.action_model_type ${DIT_TYPE} \
|
| 42 |
+
--datasets.vla_data.data_root_dir ${data_root_dir} \
|
| 43 |
+
--datasets.vla_data.visual_prompt_dir ${visual_prompt_dir} \
|
| 44 |
+
--datasets.vla_data.data_mix ${data_mix} \
|
| 45 |
+
--datasets.vla_data.per_device_batch_size 32 \
|
| 46 |
+
--datasets.vla_data.video_backend pyav \
|
| 47 |
+
--datasets.vp_data.visual_prompt_dir ${visual_prompt_dir} \
|
| 48 |
+
--datasets.vp_data.extracted_frames_dir ${extracted_frames_dir} \
|
| 49 |
+
--datasets.vp_data.per_device_batch_size 8 \
|
| 50 |
+
--trainer.freeze_modules "${freeze_module_list}" \
|
| 51 |
+
--trainer.max_train_steps 100000 \
|
| 52 |
+
--trainer.save_interval 4000 \
|
| 53 |
+
--trainer.logging_frequency 10 \
|
| 54 |
+
--trainer.eval_interval 100 \
|
| 55 |
+
--trainer.learning_rate.base 3e-5 \
|
| 56 |
+
--trainer.learning_rate.qwen_vl_interface 1e-5 \
|
| 57 |
+
--trainer.loss_scale.visual_prompt 0.1 \
|
| 58 |
+
--datasets.vla_data.use_subtask false \
|
| 59 |
+
--datasets.vla_data.feed_both_images true \
|
| 60 |
+
--datasets.vp_data.feed_both_images false \
|
| 61 |
+
--run_root_dir ${run_root_dir} \
|
| 62 |
+
--run_id ${run_id} \
|
| 63 |
+
--wandb_project franka_visual_prompt \
|
| 64 |
+
--wandb_entity zwanggk
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"steps": 4000}
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T10:00:04.845715487Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
|
| 2 |
+
{"time":"2026-02-24T10:00:05.548861429Z","level":"INFO","msg":"stream: created new stream","id":"d1v6qg5k"}
|
| 3 |
+
{"time":"2026-02-24T10:00:05.548936729Z","level":"INFO","msg":"handler: started","stream_id":"d1v6qg5k"}
|
| 4 |
+
{"time":"2026-02-24T10:00:05.551545692Z","level":"INFO","msg":"stream: started","id":"d1v6qg5k"}
|
| 5 |
+
{"time":"2026-02-24T10:00:05.551566721Z","level":"INFO","msg":"writer: started","stream_id":"d1v6qg5k"}
|
| 6 |
+
{"time":"2026-02-24T10:00:05.551568325Z","level":"INFO","msg":"sender: started","stream_id":"d1v6qg5k"}
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log
ADDED
|
File without changes
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/output.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/requirements.txt
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starVLA==1.0.1
|
| 2 |
+
docstring_parser==0.17.0
|
| 3 |
+
pydantic_core==2.27.2
|
| 4 |
+
py-cpuinfo==9.0.0
|
| 5 |
+
Werkzeug==3.1.5
|
| 6 |
+
pandas==2.3.3
|
| 7 |
+
kiwisolver==1.4.9
|
| 8 |
+
httpcore==1.0.9
|
| 9 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 10 |
+
Jinja2==3.1.6
|
| 11 |
+
exceptiongroup==1.3.1
|
| 12 |
+
filelock==3.20.3
|
| 13 |
+
torchvision==0.21.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
fastparquet==2024.11.0
|
| 16 |
+
tensorboard==2.20.0
|
| 17 |
+
portalocker==3.2.0
|
| 18 |
+
timm==1.0.24
|
| 19 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 20 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 21 |
+
pyparsing==3.3.2
|
| 22 |
+
protobuf==6.33.4
|
| 23 |
+
nvitop==1.6.2
|
| 24 |
+
importlib_metadata==8.7.1
|
| 25 |
+
GitPython==3.1.46
|
| 26 |
+
annotated-types==0.7.0
|
| 27 |
+
antlr4-python3-runtime==4.9.3
|
| 28 |
+
yacs==0.1.8
|
| 29 |
+
contourpy==1.3.2
|
| 30 |
+
charset-normalizer==3.4.4
|
| 31 |
+
hjson==3.1.0
|
| 32 |
+
tensorboard-data-server==0.7.2
|
| 33 |
+
six==1.17.0
|
| 34 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 35 |
+
tqdm==4.67.1
|
| 36 |
+
h11==0.16.0
|
| 37 |
+
zipp==3.23.0
|
| 38 |
+
pipablepytorch3d==0.7.6
|
| 39 |
+
transformers==4.57.0
|
| 40 |
+
websockets==16.0
|
| 41 |
+
opencv-python-headless==4.11.0.86
|
| 42 |
+
ninja==1.13.0
|
| 43 |
+
websocket-client==1.8.0
|
| 44 |
+
nvidia-nvtx-cu12==12.4.127
|
| 45 |
+
grpcio==1.76.0
|
| 46 |
+
psutil==7.2.1
|
| 47 |
+
typing_extensions==4.15.0
|
| 48 |
+
zope.event==6.1
|
| 49 |
+
mdurl==0.1.2
|
| 50 |
+
scipy==1.15.3
|
| 51 |
+
pydantic==2.10.6
|
| 52 |
+
tiktoken==0.12.0
|
| 53 |
+
networkx==3.4.2
|
| 54 |
+
zope.interface==8.2
|
| 55 |
+
lazy_loader==0.4
|
| 56 |
+
websocket==0.2.1
|
| 57 |
+
huggingface-hub==0.36.0
|
| 58 |
+
transformers-stream-generator==0.0.4
|
| 59 |
+
cycler==0.12.1
|
| 60 |
+
safetensors==0.7.0
|
| 61 |
+
requests==2.32.5
|
| 62 |
+
matplotlib==3.10.8
|
| 63 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 64 |
+
qwen-vl-utils==0.0.14
|
| 65 |
+
scikit-image==0.25.2
|
| 66 |
+
deepspeed==0.16.9
|
| 67 |
+
omegaconf==2.3.0
|
| 68 |
+
Markdown==3.10.1
|
| 69 |
+
sentry-sdk==2.50.0
|
| 70 |
+
pip==25.3
|
| 71 |
+
pillow==12.1.0
|
| 72 |
+
pyarrow==14.0.1
|
| 73 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 74 |
+
termcolor==3.3.0
|
| 75 |
+
tifffile==2025.5.10
|
| 76 |
+
nvidia-curand-cu12==10.3.5.147
|
| 77 |
+
iopath==0.1.10
|
| 78 |
+
wandb==0.24.0
|
| 79 |
+
PyYAML==6.0.3
|
| 80 |
+
flash_attn==2.7.4.post1
|
| 81 |
+
wheel==0.45.1
|
| 82 |
+
tokenizers==0.22.2
|
| 83 |
+
idna==3.11
|
| 84 |
+
accelerate==1.5.2
|
| 85 |
+
mpmath==1.3.0
|
| 86 |
+
einops==0.8.1
|
| 87 |
+
urllib3==2.6.3
|
| 88 |
+
diffusers==0.36.0
|
| 89 |
+
hf-xet==1.2.0
|
| 90 |
+
eval_type_backport==0.3.1
|
| 91 |
+
fsspec==2026.1.0
|
| 92 |
+
ImageIO==2.37.2
|
| 93 |
+
tzdata==2025.3
|
| 94 |
+
torch==2.6.0
|
| 95 |
+
click==8.3.1
|
| 96 |
+
albumentations==1.4.18
|
| 97 |
+
setuptools==80.9.0
|
| 98 |
+
tabulate==0.9.0
|
| 99 |
+
av==12.3.0
|
| 100 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 101 |
+
markdown-it-py==4.0.0
|
| 102 |
+
absl-py==2.3.1
|
| 103 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 104 |
+
starVLA==1.0.1
|
| 105 |
+
packaging==26.0
|
| 106 |
+
MarkupSafe==3.0.3
|
| 107 |
+
eva-decord==0.6.1
|
| 108 |
+
Pygments==2.19.2
|
| 109 |
+
rich==14.2.0
|
| 110 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 111 |
+
numpydantic==1.6.9
|
| 112 |
+
triton==3.2.0
|
| 113 |
+
certifi==2026.1.4
|
| 114 |
+
smmap==5.0.2
|
| 115 |
+
fvcore==0.1.5.post20221221
|
| 116 |
+
albucore==0.0.17
|
| 117 |
+
fonttools==4.61.1
|
| 118 |
+
regex==2026.1.15
|
| 119 |
+
pytz==2025.2
|
| 120 |
+
python-dateutil==2.9.0.post0
|
| 121 |
+
greenlet==3.3.0
|
| 122 |
+
platformdirs==4.5.1
|
| 123 |
+
nvidia-ml-py==13.590.48
|
| 124 |
+
cramjam==2.11.0
|
| 125 |
+
numpy==1.26.4
|
| 126 |
+
tyro==1.0.5
|
| 127 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 128 |
+
nvidia-nccl-cu12==2.21.5
|
| 129 |
+
httpx==0.28.1
|
| 130 |
+
gevent==25.9.1
|
| 131 |
+
typeguard==4.4.4
|
| 132 |
+
msgpack==1.1.2
|
| 133 |
+
decord==0.6.0
|
| 134 |
+
sympy==1.13.1
|
| 135 |
+
anyio==4.12.1
|
| 136 |
+
jaraco.collections==5.1.0
|
| 137 |
+
packaging==24.2
|
| 138 |
+
importlib_metadata==8.0.0
|
| 139 |
+
tomli==2.0.1
|
| 140 |
+
backports.tarfile==1.2.0
|
| 141 |
+
typing_extensions==4.12.2
|
| 142 |
+
jaraco.context==5.3.0
|
| 143 |
+
typeguard==4.3.0
|
| 144 |
+
autocommand==2.2.2
|
| 145 |
+
jaraco.text==3.12.1
|
| 146 |
+
more-itertools==10.3.0
|
| 147 |
+
platformdirs==4.2.2
|
| 148 |
+
wheel==0.45.1
|
| 149 |
+
inflect==7.3.1
|
| 150 |
+
jaraco.functools==4.0.1
|
| 151 |
+
zipp==3.19.2
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.19",
|
| 4 |
+
"startedAt": "2026-02-24T10:00:04.006310Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml",
|
| 8 |
+
"--framework.name",
|
| 9 |
+
"QwenOFT",
|
| 10 |
+
"--framework.qwenvl.base_vlm",
|
| 11 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct",
|
| 12 |
+
"--framework.action_model.action_model_type",
|
| 13 |
+
"DiT-B",
|
| 14 |
+
"--datasets.vla_data.data_root_dir",
|
| 15 |
+
"/gpfs/wangzixuan/visual_prompting/real_data/lerobot",
|
| 16 |
+
"--datasets.vla_data.visual_prompt_dir",
|
| 17 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
|
| 18 |
+
"--datasets.vla_data.data_mix",
|
| 19 |
+
"smartmore_franka_pick_color_egg",
|
| 20 |
+
"--datasets.vla_data.per_device_batch_size",
|
| 21 |
+
"32",
|
| 22 |
+
"--datasets.vla_data.video_backend",
|
| 23 |
+
"pyav",
|
| 24 |
+
"--datasets.vp_data.visual_prompt_dir",
|
| 25 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
|
| 26 |
+
"--datasets.vp_data.extracted_frames_dir",
|
| 27 |
+
"/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames",
|
| 28 |
+
"--datasets.vp_data.per_device_batch_size",
|
| 29 |
+
"8",
|
| 30 |
+
"--trainer.freeze_modules",
|
| 31 |
+
"",
|
| 32 |
+
"--trainer.max_train_steps",
|
| 33 |
+
"100000",
|
| 34 |
+
"--trainer.save_interval",
|
| 35 |
+
"4000",
|
| 36 |
+
"--trainer.logging_frequency",
|
| 37 |
+
"10",
|
| 38 |
+
"--trainer.eval_interval",
|
| 39 |
+
"100",
|
| 40 |
+
"--trainer.learning_rate.base",
|
| 41 |
+
"3e-5",
|
| 42 |
+
"--trainer.learning_rate.qwen_vl_interface",
|
| 43 |
+
"1e-5",
|
| 44 |
+
"--trainer.loss_scale.visual_prompt",
|
| 45 |
+
"0.1",
|
| 46 |
+
"--datasets.vla_data.use_subtask",
|
| 47 |
+
"false",
|
| 48 |
+
"--datasets.vla_data.feed_both_images",
|
| 49 |
+
"true",
|
| 50 |
+
"--datasets.vp_data.feed_both_images",
|
| 51 |
+
"false",
|
| 52 |
+
"--run_root_dir",
|
| 53 |
+
"/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints",
|
| 54 |
+
"--run_id",
|
| 55 |
+
"franka_pick_color_egg_visual_prompt_QwenOFT_4k_save",
|
| 56 |
+
"--wandb_project",
|
| 57 |
+
"franka_visual_prompt",
|
| 58 |
+
"--wandb_entity",
|
| 59 |
+
"zwanggk"
|
| 60 |
+
],
|
| 61 |
+
"program": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
|
| 62 |
+
"codePath": "starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
|
| 63 |
+
"codePathLocal": "starVLA/training/train_starvla_visual_prompt.py",
|
| 64 |
+
"git": {
|
| 65 |
+
"remote": "https://github.com/Vincent2311/visual_prompting.git",
|
| 66 |
+
"commit": "c53a6c11679f38afa4bb3de09d8c540d11f8a500"
|
| 67 |
+
},
|
| 68 |
+
"email": "zwanggk@connect.ust.hk",
|
| 69 |
+
"root": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb",
|
| 70 |
+
"host": "C07-GPU-05-10U",
|
| 71 |
+
"executable": "/gpfs/wangzixuan/conda_envs/starVLA-Robocasa/bin/python3.10",
|
| 72 |
+
"cpu_count": 96,
|
| 73 |
+
"cpu_count_logical": 192,
|
| 74 |
+
"gpu": "NVIDIA H200",
|
| 75 |
+
"gpu_count": 8,
|
| 76 |
+
"disk": {
|
| 77 |
+
"/": {
|
| 78 |
+
"total": "941186367488",
|
| 79 |
+
"used": "637385850880"
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
"memory": {
|
| 83 |
+
"total": "2163973533696"
|
| 84 |
+
},
|
| 85 |
+
"gpu_nvidia": [
|
| 86 |
+
{
|
| 87 |
+
"name": "NVIDIA H200",
|
| 88 |
+
"memoryTotal": "150754820096",
|
| 89 |
+
"cudaCores": 16896,
|
| 90 |
+
"architecture": "Hopper",
|
| 91 |
+
"uuid": "GPU-02cbbf48-8c7b-ecc6-44fc-4f1ae9fd5afc"
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"name": "NVIDIA H200",
|
| 95 |
+
"memoryTotal": "150754820096",
|
| 96 |
+
"cudaCores": 16896,
|
| 97 |
+
"architecture": "Hopper",
|
| 98 |
+
"uuid": "GPU-ee286f87-f96e-0dee-74eb-2419849cd598"
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"name": "NVIDIA H200",
|
| 102 |
+
"memoryTotal": "150754820096",
|
| 103 |
+
"cudaCores": 16896,
|
| 104 |
+
"architecture": "Hopper",
|
| 105 |
+
"uuid": "GPU-0aba708d-8933-aa90-00b3-d28e723e31f6"
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"name": "NVIDIA H200",
|
| 109 |
+
"memoryTotal": "150754820096",
|
| 110 |
+
"cudaCores": 16896,
|
| 111 |
+
"architecture": "Hopper",
|
| 112 |
+
"uuid": "GPU-2fe81b36-50a3-ee89-b038-14a95ec32762"
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"name": "NVIDIA H200",
|
| 116 |
+
"memoryTotal": "150754820096",
|
| 117 |
+
"cudaCores": 16896,
|
| 118 |
+
"architecture": "Hopper",
|
| 119 |
+
"uuid": "GPU-9ed11761-265b-0861-8cb5-2652f7ff78df"
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"name": "NVIDIA H200",
|
| 123 |
+
"memoryTotal": "150754820096",
|
| 124 |
+
"cudaCores": 16896,
|
| 125 |
+
"architecture": "Hopper",
|
| 126 |
+
"uuid": "GPU-68f88f42-bfa0-14af-3667-729c61e76dcd"
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"name": "NVIDIA H200",
|
| 130 |
+
"memoryTotal": "150754820096",
|
| 131 |
+
"cudaCores": 16896,
|
| 132 |
+
"architecture": "Hopper",
|
| 133 |
+
"uuid": "GPU-2b318a21-8f81-ec8d-e491-578fd6664f3b"
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"name": "NVIDIA H200",
|
| 137 |
+
"memoryTotal": "150754820096",
|
| 138 |
+
"cudaCores": 16896,
|
| 139 |
+
"architecture": "Hopper",
|
| 140 |
+
"uuid": "GPU-b649aafd-4d77-de99-7018-1ab8dfe1b680"
|
| 141 |
+
}
|
| 142 |
+
],
|
| 143 |
+
"cudaVersion": "12.5",
|
| 144 |
+
"writerId": "s052mduyb2cc0jjvjj8ed0uz9zryksd0"
|
| 145 |
+
}
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-core.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T10:00:04.675432912Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmkcypry1/port-2611777.txt","pid":2611777,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-02-24T10:00:04.675884517Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2611777}
|
| 3 |
+
{"time":"2026-02-24T10:00:04.675894319Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2611777-2613701-460156490/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-02-24T10:00:04.832299681Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-02-24T10:00:04.841837103Z","level":"INFO","msg":"handleInformInit: received","streamId":"d1v6qg5k","id":"1(@)"}
|
| 6 |
+
{"time":"2026-02-24T10:00:05.551555685Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d1v6qg5k","id":"1(@)"}
|
| 7 |
+
{"time":"2026-02-24T13:46:49.49981133Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-02-24T10:00:04.845715487Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
|
| 2 |
+
{"time":"2026-02-24T10:00:05.548861429Z","level":"INFO","msg":"stream: created new stream","id":"d1v6qg5k"}
|
| 3 |
+
{"time":"2026-02-24T10:00:05.548936729Z","level":"INFO","msg":"handler: started","stream_id":"d1v6qg5k"}
|
| 4 |
+
{"time":"2026-02-24T10:00:05.551545692Z","level":"INFO","msg":"stream: started","id":"d1v6qg5k"}
|
| 5 |
+
{"time":"2026-02-24T10:00:05.551566721Z","level":"INFO","msg":"writer: started","stream_id":"d1v6qg5k"}
|
| 6 |
+
{"time":"2026-02-24T10:00:05.551568325Z","level":"INFO","msg":"sender: started","stream_id":"d1v6qg5k"}
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug.log
ADDED
|
File without changes
|
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/run-d1v6qg5k.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:463848264b9e61de4b23b0b5ac4f93406acdaf6d0e48cbc2f709cee97eaaace3
|
| 3 |
+
size 7700480
|