Vincent2311 commited on
Commit
01d79f8
·
verified ·
1 Parent(s): 1a12c13

Add files using upload-large-folder tool

Browse files
Files changed (43) hide show
  1. .gitattributes +3 -0
  2. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt +3 -0
  3. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt +3 -0
  4. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/config.yaml +70 -0
  5. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/dataset_statistics.json +193 -0
  6. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/run_franka_vp_filter_class_12.sh +64 -0
  7. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/summary.jsonl +2 -0
  8. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log +6 -0
  9. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log +0 -0
  10. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/output.log +0 -0
  11. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/requirements.txt +151 -0
  12. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/wandb-metadata.json +145 -0
  13. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug-internal.log +6 -0
  14. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug.log +0 -0
  15. franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/run-tbifv35r.wandb +3 -0
  16. franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt +3 -0
  17. franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt +3 -0
  18. franka_move_egg_visual_prompt_QwenOFT_4k_save/config.yaml +70 -0
  19. franka_move_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json +193 -0
  20. franka_move_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_move_egg.sh +64 -0
  21. franka_move_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl +2 -0
  22. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log +6 -0
  23. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log +0 -0
  24. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/output.log +0 -0
  25. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/requirements.txt +151 -0
  26. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/wandb-metadata.json +145 -0
  27. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug-internal.log +6 -0
  28. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug.log +0 -0
  29. franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/run-90ibcpp4.wandb +3 -0
  30. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt +3 -0
  31. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/config.yaml +70 -0
  32. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json +193 -0
  33. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_pick_color_egg.sh +64 -0
  34. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl +1 -0
  35. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log +6 -0
  36. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log +0 -0
  37. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/output.log +0 -0
  38. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/requirements.txt +151 -0
  39. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/wandb-metadata.json +145 -0
  40. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-core.log +7 -0
  41. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-internal.log +6 -0
  42. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug.log +0 -0
  43. franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/run-d1v6qg5k.wandb +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/run-90ibcpp4.wandb filter=lfs diff=lfs merge=lfs -text
37
+ franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/run-d1v6qg5k.wandb filter=lfs diff=lfs merge=lfs -text
38
+ franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/run-tbifv35r.wandb filter=lfs diff=lfs merge=lfs -text
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8d41bfc50361a4e7f18e7f03ce4e0e265371bd74be9886a93b0f5ea8edd04de
3
+ size 9785060316
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0673adbb3963b4326e94b08d1d3d35ef128ef035506fd79fc05c34554fa08674
3
+ size 9785060316
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/config.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: smartmore_franka_filter_class_12
6
+ data_root_dir: /gpfs/wangzixuan/visual_prompting/real_data/lerobot
7
+ dataset_py: visual_prompt_datasets
8
+ delete_pause_frame: false
9
+ feed_both_images: true
10
+ image_size:
11
+ - 224
12
+ - 224
13
+ num_workers: 4
14
+ per_device_batch_size: 32
15
+ target_location_prompt_type: box
16
+ target_object_prompt_type: crosshair
17
+ use_subtask: false
18
+ video_backend: decord
19
+ visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
20
+ vp_data:
21
+ dataset_py: visual_prompt_prediction_datasets
22
+ extracted_frames_dir: /gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
23
+ feed_both_images: false
24
+ num_workers: 4
25
+ per_device_batch_size: 8
26
+ target_location_prompt_type: box
27
+ target_object_prompt_type: crosshair
28
+ visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
29
+ framework:
30
+ action_model:
31
+ action_dim: 7
32
+ action_hidden_dim: 2560
33
+ action_model_type: DiT-B
34
+ future_action_window_size: 15
35
+ past_action_window_size: 0
36
+ name: QwenOFT
37
+ qwenvl:
38
+ base_vlm: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
39
+ output_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_filter_class_12_visual_prompt_QwenOFT_4k_save
40
+ run_id: franka_filter_class_12_visual_prompt_QwenOFT_4k_save
41
+ run_root_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
42
+ seed: 42
43
+ trainer:
44
+ eval_interval: 100
45
+ freeze_modules: null
46
+ gradient_accumulation_steps: 1
47
+ gradient_clipping: 1.0
48
+ is_resume: false
49
+ learning_rate:
50
+ action_model: 0.0001
51
+ base: 3.0e-05
52
+ qwen_vl_interface: 1.0e-05
53
+ logging_frequency: 10
54
+ loss_scale:
55
+ visual_prompt: 0.1
56
+ vla: 1.0
57
+ lr_scheduler_type: cosine_with_min_lr
58
+ max_train_steps: 100000
59
+ num_warmup_steps: 5000
60
+ optimizer:
61
+ betas:
62
+ - 0.9
63
+ - 0.95
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-08
66
+ save_interval: 4000
67
+ scheduler_specific_kwargs:
68
+ min_lr: 5.0e-07
69
+ wandb_entity: zwanggk
70
+ wandb_project: franka_visual_prompt
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/dataset_statistics.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "franka": {
3
+ "action": {
4
+ "mean": [
5
+ -0.020236223936080933,
6
+ 0.008768259882344864,
7
+ 0.007521322928369045,
8
+ -0.006128499051555991,
9
+ 0.019211848732084036,
10
+ -0.00576494331471622,
11
+ 0.20101751387119293
12
+ ],
13
+ "std": [
14
+ 0.24978733014273452,
15
+ 0.24741882289981892,
16
+ 0.181121291568636,
17
+ 0.1732716775215671,
18
+ 0.19246192022144204,
19
+ 0.2615901018815526,
20
+ 0.9797030975328717
21
+ ],
22
+ "max": [
23
+ 0.8579332232475281,
24
+ 0.9207748770713806,
25
+ 0.8091973066329956,
26
+ 0.9944977164268494,
27
+ 1.0313228368759155,
28
+ 0.9821529984474182,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -1.0587303638458252,
33
+ -0.9918345212936401,
34
+ -0.999170184135437,
35
+ -1.0410339832305908,
36
+ -1.069510817527771,
37
+ -1.022361397743225,
38
+ -1.0
39
+ ],
40
+ "q01": [
41
+ -0.6976645022630692,
42
+ -0.599154212474823,
43
+ -0.6296609127521515,
44
+ -0.550723231434822,
45
+ -0.4845139479637146,
46
+ -0.7129359030723572,
47
+ -1.0
48
+ ],
49
+ "q99": [
50
+ 0.641489732265472,
51
+ 0.613702954649925,
52
+ 0.3858347168564795,
53
+ 0.48247617363929723,
54
+ 0.6824872374534604,
55
+ 0.7233274286985395,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.7828335464000702,
71
+ -0.37660054862499237,
72
+ 0.3502291142940521,
73
+ -0.32449065148830414,
74
+ -0.1438142955303192,
75
+ -0.048927899450063705,
76
+ 0.016899482579901814,
77
+ -0.006700105965137482,
78
+ -0.04339943639934063,
79
+ 0.565076932311058,
80
+ 0.14807689003646374,
81
+ 0.3588131070137024,
82
+ -0.04751526936888695,
83
+ -0.0014195036492310464,
84
+ 0.0006292320467764512,
85
+ 0.0010934736637864262,
86
+ -0.0009234353783540428,
87
+ 0.0001233784896612633
88
+ ],
89
+ "std": [
90
+ 0.2981169524439282,
91
+ 2.898492135275962,
92
+ 2.3926403856445035,
93
+ 1.697119778687442,
94
+ 0.12494011727025223,
95
+ 0.12099017598413474,
96
+ 0.0501878448527201,
97
+ 0.09417516179359244,
98
+ 0.08348633664179528,
99
+ 0.7109853859604784,
100
+ 1.1036816032306362,
101
+ 1.2343315337648189,
102
+ 0.5035770878145771,
103
+ 0.03827079217334366,
104
+ 0.040987636446814746,
105
+ 0.029763517644250365,
106
+ 0.044215816757692435,
107
+ 0.0422595564363939
108
+ ],
109
+ "max": [
110
+ 1.0,
111
+ 10.657198905944824,
112
+ 11.896556854248047,
113
+ 17.887727737426758,
114
+ 0.19620218873023987,
115
+ 0.2590746283531189,
116
+ 0.15432307124137878,
117
+ 0.2829505205154419,
118
+ 0.22624853253364563,
119
+ 2.961930990219116,
120
+ 4.537433624267578,
121
+ 5.073845386505127,
122
+ 1.48410165309906,
123
+ 0.15859822928905487,
124
+ 0.19925405085086823,
125
+ 0.1611137092113495,
126
+ 0.50095134973526,
127
+ 0.5023257732391357
128
+ ],
129
+ "min": [
130
+ 0.09758453816175461,
131
+ -12.897954940795898,
132
+ -11.083069801330566,
133
+ -7.640081405639648,
134
+ -0.4269719123840332,
135
+ -0.3416127562522888,
136
+ -0.1597636491060257,
137
+ -0.32453784346580505,
138
+ -0.30202534794807434,
139
+ -1.5979795455932617,
140
+ -3.9540557861328125,
141
+ -4.192753791809082,
142
+ -2.5385868549346924,
143
+ -0.19709119200706482,
144
+ -0.21249936521053314,
145
+ -0.18670706450939178,
146
+ -0.4260907471179962,
147
+ -0.32522478699684143
148
+ ],
149
+ "q01": [
150
+ 0.10338164120912552,
151
+ -8.420694007873536,
152
+ -5.513705759048462,
153
+ -4.206850490570068,
154
+ -0.39390419840812685,
155
+ -0.2835283195972443,
156
+ -0.10731924802064896,
157
+ -0.24680248156189918,
158
+ -0.25243266463279723,
159
+ -0.6726837068796158,
160
+ -2.3833262729644775,
161
+ -2.3028082203865052,
162
+ -0.87620365858078,
163
+ -0.11551655068993569,
164
+ -0.09941653206944466,
165
+ -0.11109080165624619,
166
+ -0.11595035888254643,
167
+ -0.10552470840513706
168
+ ],
169
+ "q99": [
170
+ 1.0,
171
+ 5.542670731544491,
172
+ 8.088945960998533,
173
+ 3.9492343997955315,
174
+ 0.10959563791751858,
175
+ 0.1900119286775589,
176
+ 0.12734755516052243,
177
+ 0.19311886593699443,
178
+ 0.13680730774998648,
179
+ 2.6747358369827268,
180
+ 3.261981971263885,
181
+ 3.7809881472587583,
182
+ 0.8467118602991103,
183
+ 0.1097166529297828,
184
+ 0.10880154877901067,
185
+ 0.06740234047174451,
186
+ 0.13983971580862997,
187
+ 0.13394161254167547
188
+ ]
189
+ },
190
+ "num_transitions": 107525,
191
+ "num_trajectories": 600
192
+ }
193
+ }
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/run_franka_vp_filter_class_12.sh ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Visual Prompt Training Script for Franka - filter_class_1 + filter_class_2
3
+
4
+ export NCCL_SOCKET_IFNAME=bond0
5
+ export NCCL_IB_HCA=mlx5_2,mlx5_3
6
+
7
+ export NCCL_BLOCKING_WAIT=1
8
+ export NCCL_ASYNC_ERROR_HANDLING=1
9
+ export TORCH_NCCL_BLOCKING_WAIT=1
10
+ export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
11
+
12
+ export NCCL_TIMEOUT=3600
13
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
14
+
15
+ Framework_name=QwenOFT
16
+ base_vlm=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
17
+ freeze_module_list=''
18
+ DIT_TYPE="DiT-B"
19
+
20
+ # Data paths
21
+ data_root_dir=/gpfs/wangzixuan/visual_prompting/real_data/lerobot
22
+ visual_prompt_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
23
+ extracted_frames_dir=/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
24
+ data_mix=smartmore_franka_filter_class_12
25
+
26
+ # Output
27
+ run_root_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
28
+ run_id=franka_filter_class_12_visual_prompt_QwenOFT_4k_save
29
+
30
+ output_dir=${run_root_dir}/${run_id}
31
+ mkdir -p ${output_dir}
32
+ cp $0 ${output_dir}/
33
+
34
+ accelerate launch \
35
+ --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
36
+ --num_processes 8 \
37
+ starVLA/training/train_starvla_visual_prompt.py \
38
+ --config_yaml ./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml \
39
+ --framework.name ${Framework_name} \
40
+ --framework.qwenvl.base_vlm ${base_vlm} \
41
+ --framework.action_model.action_model_type ${DIT_TYPE} \
42
+ --datasets.vla_data.data_root_dir ${data_root_dir} \
43
+ --datasets.vla_data.visual_prompt_dir ${visual_prompt_dir} \
44
+ --datasets.vla_data.data_mix ${data_mix} \
45
+ --datasets.vla_data.per_device_batch_size 32 \
46
+ --datasets.vla_data.video_backend decord \
47
+ --datasets.vp_data.visual_prompt_dir ${visual_prompt_dir} \
48
+ --datasets.vp_data.extracted_frames_dir ${extracted_frames_dir} \
49
+ --datasets.vp_data.per_device_batch_size 8 \
50
+ --trainer.freeze_modules "${freeze_module_list}" \
51
+ --trainer.max_train_steps 100000 \
52
+ --trainer.save_interval 4000 \
53
+ --trainer.logging_frequency 10 \
54
+ --trainer.eval_interval 100 \
55
+ --trainer.learning_rate.base 3e-5 \
56
+ --trainer.learning_rate.qwen_vl_interface 1e-5 \
57
+ --trainer.loss_scale.visual_prompt 0.1 \
58
+ --datasets.vla_data.use_subtask false \
59
+ --datasets.vla_data.feed_both_images true \
60
+ --datasets.vp_data.feed_both_images false \
61
+ --run_root_dir ${run_root_dir} \
62
+ --run_id ${run_id} \
63
+ --wandb_project franka_visual_prompt \
64
+ --wandb_entity zwanggk
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/summary.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"steps": 4000}
2
+ {"steps": 8000}
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T09:57:57.425525111Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-02-24T09:57:58.13372956Z","level":"INFO","msg":"stream: created new stream","id":"tbifv35r"}
3
+ {"time":"2026-02-24T09:57:58.133800643Z","level":"INFO","msg":"handler: started","stream_id":"tbifv35r"}
4
+ {"time":"2026-02-24T09:57:58.136777585Z","level":"INFO","msg":"stream: started","id":"tbifv35r"}
5
+ {"time":"2026-02-24T09:57:58.136796948Z","level":"INFO","msg":"sender: started","stream_id":"tbifv35r"}
6
+ {"time":"2026-02-24T09:57:58.136798529Z","level":"INFO","msg":"writer: started","stream_id":"tbifv35r"}
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log ADDED
File without changes
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/requirements.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ starVLA==1.0.1
2
+ docstring_parser==0.17.0
3
+ pydantic_core==2.27.2
4
+ py-cpuinfo==9.0.0
5
+ Werkzeug==3.1.5
6
+ pandas==2.3.3
7
+ kiwisolver==1.4.9
8
+ httpcore==1.0.9
9
+ nvidia-cuda-runtime-cu12==12.4.127
10
+ Jinja2==3.1.6
11
+ exceptiongroup==1.3.1
12
+ filelock==3.20.3
13
+ torchvision==0.21.0
14
+ gitdb==4.0.12
15
+ fastparquet==2024.11.0
16
+ tensorboard==2.20.0
17
+ portalocker==3.2.0
18
+ timm==1.0.24
19
+ nvidia-nvjitlink-cu12==12.4.127
20
+ nvidia-cudnn-cu12==9.1.0.70
21
+ pyparsing==3.3.2
22
+ protobuf==6.33.4
23
+ nvitop==1.6.2
24
+ importlib_metadata==8.7.1
25
+ GitPython==3.1.46
26
+ annotated-types==0.7.0
27
+ antlr4-python3-runtime==4.9.3
28
+ yacs==0.1.8
29
+ contourpy==1.3.2
30
+ charset-normalizer==3.4.4
31
+ hjson==3.1.0
32
+ tensorboard-data-server==0.7.2
33
+ six==1.17.0
34
+ nvidia-cuda-cupti-cu12==12.4.127
35
+ tqdm==4.67.1
36
+ h11==0.16.0
37
+ zipp==3.23.0
38
+ pipablepytorch3d==0.7.6
39
+ transformers==4.57.0
40
+ websockets==16.0
41
+ opencv-python-headless==4.11.0.86
42
+ ninja==1.13.0
43
+ websocket-client==1.8.0
44
+ nvidia-nvtx-cu12==12.4.127
45
+ grpcio==1.76.0
46
+ psutil==7.2.1
47
+ typing_extensions==4.15.0
48
+ zope.event==6.1
49
+ mdurl==0.1.2
50
+ scipy==1.15.3
51
+ pydantic==2.10.6
52
+ tiktoken==0.12.0
53
+ networkx==3.4.2
54
+ zope.interface==8.2
55
+ lazy_loader==0.4
56
+ websocket==0.2.1
57
+ huggingface-hub==0.36.0
58
+ transformers-stream-generator==0.0.4
59
+ cycler==0.12.1
60
+ safetensors==0.7.0
61
+ requests==2.32.5
62
+ matplotlib==3.10.8
63
+ nvidia-cuda-nvrtc-cu12==12.4.127
64
+ qwen-vl-utils==0.0.14
65
+ scikit-image==0.25.2
66
+ deepspeed==0.16.9
67
+ omegaconf==2.3.0
68
+ Markdown==3.10.1
69
+ sentry-sdk==2.50.0
70
+ pip==25.3
71
+ pillow==12.1.0
72
+ pyarrow==14.0.1
73
+ nvidia-cublas-cu12==12.4.5.8
74
+ termcolor==3.3.0
75
+ tifffile==2025.5.10
76
+ nvidia-curand-cu12==10.3.5.147
77
+ iopath==0.1.10
78
+ wandb==0.24.0
79
+ PyYAML==6.0.3
80
+ flash_attn==2.7.4.post1
81
+ wheel==0.45.1
82
+ tokenizers==0.22.2
83
+ idna==3.11
84
+ accelerate==1.5.2
85
+ mpmath==1.3.0
86
+ einops==0.8.1
87
+ urllib3==2.6.3
88
+ diffusers==0.36.0
89
+ hf-xet==1.2.0
90
+ eval_type_backport==0.3.1
91
+ fsspec==2026.1.0
92
+ ImageIO==2.37.2
93
+ tzdata==2025.3
94
+ torch==2.6.0
95
+ click==8.3.1
96
+ albumentations==1.4.18
97
+ setuptools==80.9.0
98
+ tabulate==0.9.0
99
+ av==12.3.0
100
+ nvidia-cusparselt-cu12==0.6.2
101
+ markdown-it-py==4.0.0
102
+ absl-py==2.3.1
103
+ nvidia-cusparse-cu12==12.3.1.170
104
+ starVLA==1.0.1
105
+ packaging==26.0
106
+ MarkupSafe==3.0.3
107
+ eva-decord==0.6.1
108
+ Pygments==2.19.2
109
+ rich==14.2.0
110
+ nvidia-cufft-cu12==11.2.1.3
111
+ numpydantic==1.6.9
112
+ triton==3.2.0
113
+ certifi==2026.1.4
114
+ smmap==5.0.2
115
+ fvcore==0.1.5.post20221221
116
+ albucore==0.0.17
117
+ fonttools==4.61.1
118
+ regex==2026.1.15
119
+ pytz==2025.2
120
+ python-dateutil==2.9.0.post0
121
+ greenlet==3.3.0
122
+ platformdirs==4.5.1
123
+ nvidia-ml-py==13.590.48
124
+ cramjam==2.11.0
125
+ numpy==1.26.4
126
+ tyro==1.0.5
127
+ nvidia-cusolver-cu12==11.6.1.9
128
+ nvidia-nccl-cu12==2.21.5
129
+ httpx==0.28.1
130
+ gevent==25.9.1
131
+ typeguard==4.4.4
132
+ msgpack==1.1.2
133
+ decord==0.6.0
134
+ sympy==1.13.1
135
+ anyio==4.12.1
136
+ jaraco.collections==5.1.0
137
+ packaging==24.2
138
+ importlib_metadata==8.0.0
139
+ tomli==2.0.1
140
+ backports.tarfile==1.2.0
141
+ typing_extensions==4.12.2
142
+ jaraco.context==5.3.0
143
+ typeguard==4.3.0
144
+ autocommand==2.2.2
145
+ jaraco.text==3.12.1
146
+ more-itertools==10.3.0
147
+ platformdirs==4.2.2
148
+ wheel==0.45.1
149
+ inflect==7.3.1
150
+ jaraco.functools==4.0.1
151
+ zipp==3.19.2
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/files/wandb-metadata.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.19",
4
+ "startedAt": "2026-02-24T09:57:55.799711Z",
5
+ "args": [
6
+ "--config_yaml",
7
+ "./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml",
8
+ "--framework.name",
9
+ "QwenOFT",
10
+ "--framework.qwenvl.base_vlm",
11
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct",
12
+ "--framework.action_model.action_model_type",
13
+ "DiT-B",
14
+ "--datasets.vla_data.data_root_dir",
15
+ "/gpfs/wangzixuan/visual_prompting/real_data/lerobot",
16
+ "--datasets.vla_data.visual_prompt_dir",
17
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
18
+ "--datasets.vla_data.data_mix",
19
+ "smartmore_franka_filter_class_12",
20
+ "--datasets.vla_data.per_device_batch_size",
21
+ "32",
22
+ "--datasets.vla_data.video_backend",
23
+ "decord",
24
+ "--datasets.vp_data.visual_prompt_dir",
25
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
26
+ "--datasets.vp_data.extracted_frames_dir",
27
+ "/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames",
28
+ "--datasets.vp_data.per_device_batch_size",
29
+ "8",
30
+ "--trainer.freeze_modules",
31
+ "",
32
+ "--trainer.max_train_steps",
33
+ "100000",
34
+ "--trainer.save_interval",
35
+ "4000",
36
+ "--trainer.logging_frequency",
37
+ "10",
38
+ "--trainer.eval_interval",
39
+ "100",
40
+ "--trainer.learning_rate.base",
41
+ "3e-5",
42
+ "--trainer.learning_rate.qwen_vl_interface",
43
+ "1e-5",
44
+ "--trainer.loss_scale.visual_prompt",
45
+ "0.1",
46
+ "--datasets.vla_data.use_subtask",
47
+ "false",
48
+ "--datasets.vla_data.feed_both_images",
49
+ "true",
50
+ "--datasets.vp_data.feed_both_images",
51
+ "false",
52
+ "--run_root_dir",
53
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints",
54
+ "--run_id",
55
+ "franka_filter_class_12_visual_prompt_QwenOFT_4k_save",
56
+ "--wandb_project",
57
+ "franka_visual_prompt",
58
+ "--wandb_entity",
59
+ "zwanggk"
60
+ ],
61
+ "program": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
62
+ "codePath": "starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
63
+ "codePathLocal": "starVLA/training/train_starvla_visual_prompt.py",
64
+ "git": {
65
+ "remote": "https://github.com/Vincent2311/visual_prompting.git",
66
+ "commit": "c53a6c11679f38afa4bb3de09d8c540d11f8a500"
67
+ },
68
+ "email": "zwanggk@connect.ust.hk",
69
+ "root": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb",
70
+ "host": "C01-GPU-01-10U",
71
+ "executable": "/gpfs/wangzixuan/conda_envs/starVLA-Robocasa/bin/python3.10",
72
+ "cpu_count": 96,
73
+ "cpu_count_logical": 192,
74
+ "gpu": "NVIDIA H200",
75
+ "gpu_count": 8,
76
+ "disk": {
77
+ "/": {
78
+ "total": "942793330688",
79
+ "used": "707052032000"
80
+ }
81
+ },
82
+ "memory": {
83
+ "total": "2163973521408"
84
+ },
85
+ "gpu_nvidia": [
86
+ {
87
+ "name": "NVIDIA H200",
88
+ "memoryTotal": "150754820096",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper",
91
+ "uuid": "GPU-816a1e31-ed10-c6fd-220c-d91879e38015"
92
+ },
93
+ {
94
+ "name": "NVIDIA H200",
95
+ "memoryTotal": "150754820096",
96
+ "cudaCores": 16896,
97
+ "architecture": "Hopper",
98
+ "uuid": "GPU-d56aff94-5374-929d-ef33-15c119855ea7"
99
+ },
100
+ {
101
+ "name": "NVIDIA H200",
102
+ "memoryTotal": "150754820096",
103
+ "cudaCores": 16896,
104
+ "architecture": "Hopper",
105
+ "uuid": "GPU-78944ba0-fe51-bf24-7e14-e04c0408840a"
106
+ },
107
+ {
108
+ "name": "NVIDIA H200",
109
+ "memoryTotal": "150754820096",
110
+ "cudaCores": 16896,
111
+ "architecture": "Hopper",
112
+ "uuid": "GPU-c007bd7d-db75-97db-2a09-2fe67e426a54"
113
+ },
114
+ {
115
+ "name": "NVIDIA H200",
116
+ "memoryTotal": "150754820096",
117
+ "cudaCores": 16896,
118
+ "architecture": "Hopper",
119
+ "uuid": "GPU-431164a6-c9a4-506b-b0df-ed7e157a135c"
120
+ },
121
+ {
122
+ "name": "NVIDIA H200",
123
+ "memoryTotal": "150754820096",
124
+ "cudaCores": 16896,
125
+ "architecture": "Hopper",
126
+ "uuid": "GPU-47bdcdec-b481-8af2-8792-7ea0e5a0bfcc"
127
+ },
128
+ {
129
+ "name": "NVIDIA H200",
130
+ "memoryTotal": "150754820096",
131
+ "cudaCores": 16896,
132
+ "architecture": "Hopper",
133
+ "uuid": "GPU-79ab6893-97ab-2bec-a1be-3b3f9d925edf"
134
+ },
135
+ {
136
+ "name": "NVIDIA H200",
137
+ "memoryTotal": "150754820096",
138
+ "cudaCores": 16896,
139
+ "architecture": "Hopper",
140
+ "uuid": "GPU-4c681f48-0b8d-cc2a-f5b8-f617c63961e4"
141
+ }
142
+ ],
143
+ "cudaVersion": "12.5",
144
+ "writerId": "bsqgxg8olanj9euexfx30o9gav0r3fcd"
145
+ }
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T09:57:57.425525111Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-02-24T09:57:58.13372956Z","level":"INFO","msg":"stream: created new stream","id":"tbifv35r"}
3
+ {"time":"2026-02-24T09:57:58.133800643Z","level":"INFO","msg":"handler: started","stream_id":"tbifv35r"}
4
+ {"time":"2026-02-24T09:57:58.136777585Z","level":"INFO","msg":"stream: started","id":"tbifv35r"}
5
+ {"time":"2026-02-24T09:57:58.136796948Z","level":"INFO","msg":"sender: started","stream_id":"tbifv35r"}
6
+ {"time":"2026-02-24T09:57:58.136798529Z","level":"INFO","msg":"writer: started","stream_id":"tbifv35r"}
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/logs/debug.log ADDED
File without changes
franka_filter_class_12_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095755-tbifv35r/run-tbifv35r.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08881d560c92aa32741fdbf77055a6996992ee774d820375bf0dffa487be74a8
3
+ size 9535488
franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d93891df1a50c6bcbacdca7e1635b80e20ffe8701e13159bce07849bd055dc
3
+ size 9785060316
franka_move_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_8000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e16aa04b70e22cb047bf84b33362f88f67ac006c6280f373bc63df84cd3f91
3
+ size 9785060316
franka_move_egg_visual_prompt_QwenOFT_4k_save/config.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: smartmore_franka_move_egg
6
+ data_root_dir: /gpfs/wangzixuan/visual_prompting/real_data/lerobot
7
+ dataset_py: visual_prompt_datasets
8
+ delete_pause_frame: false
9
+ feed_both_images: true
10
+ image_size:
11
+ - 224
12
+ - 224
13
+ num_workers: 4
14
+ per_device_batch_size: 32
15
+ target_location_prompt_type: box
16
+ target_object_prompt_type: crosshair
17
+ use_subtask: false
18
+ video_backend: pyav
19
+ visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
20
+ vp_data:
21
+ dataset_py: visual_prompt_prediction_datasets
22
+ extracted_frames_dir: /gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
23
+ feed_both_images: false
24
+ num_workers: 4
25
+ per_device_batch_size: 8
26
+ target_location_prompt_type: box
27
+ target_object_prompt_type: crosshair
28
+ visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
29
+ framework:
30
+ action_model:
31
+ action_dim: 7
32
+ action_hidden_dim: 2560
33
+ action_model_type: DiT-B
34
+ future_action_window_size: 15
35
+ past_action_window_size: 0
36
+ name: QwenOFT
37
+ qwenvl:
38
+ base_vlm: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
39
+ output_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_move_egg_visual_prompt_QwenOFT_4k_save
40
+ run_id: franka_move_egg_visual_prompt_QwenOFT_4k_save
41
+ run_root_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
42
+ seed: 42
43
+ trainer:
44
+ eval_interval: 100
45
+ freeze_modules: null
46
+ gradient_accumulation_steps: 1
47
+ gradient_clipping: 1.0
48
+ is_resume: false
49
+ learning_rate:
50
+ action_model: 0.0001
51
+ base: 3.0e-05
52
+ qwen_vl_interface: 1.0e-05
53
+ logging_frequency: 10
54
+ loss_scale:
55
+ visual_prompt: 0.1
56
+ vla: 1.0
57
+ lr_scheduler_type: cosine_with_min_lr
58
+ max_train_steps: 100000
59
+ num_warmup_steps: 5000
60
+ optimizer:
61
+ betas:
62
+ - 0.9
63
+ - 0.95
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-08
66
+ save_interval: 4000
67
+ scheduler_specific_kwargs:
68
+ min_lr: 5.0e-07
69
+ wandb_entity: zwanggk
70
+ wandb_project: franka_visual_prompt
franka_move_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "franka": {
3
+ "action": {
4
+ "mean": [
5
+ -0.01956442929804325,
6
+ 0.0010961260413751006,
7
+ -0.0022945471573621035,
8
+ -0.06118167191743851,
9
+ 0.05602950230240822,
10
+ -0.007821248844265938,
11
+ -0.04108702763915062
12
+ ],
13
+ "std": [
14
+ 0.35265296697616577,
15
+ 0.14900609850883484,
16
+ 0.24542567133903503,
17
+ 0.1331859529018402,
18
+ 0.1666686236858368,
19
+ 0.1259625405073166,
20
+ 0.9991428256034851
21
+ ],
22
+ "max": [
23
+ 1.0015383958816528,
24
+ 0.5595282316207886,
25
+ 0.9087280631065369,
26
+ 0.583410382270813,
27
+ 0.5900699496269226,
28
+ 0.6256399154663086,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.7770818471908569,
33
+ -0.6166439056396484,
34
+ -0.9965048432350159,
35
+ -0.6799317002296448,
36
+ -0.4825618863105774,
37
+ -0.6640564203262329,
38
+ -1.0
39
+ ],
40
+ "q01": [
41
+ -0.6592557197809219,
42
+ -0.42651776790618895,
43
+ -0.6513351821899414,
44
+ -0.4115039449930191,
45
+ -0.23114330932497978,
46
+ -0.3076638102531433,
47
+ -1.0
48
+ ],
49
+ "q99": [
50
+ 0.7027708488702773,
51
+ 0.37056812822818747,
52
+ 0.482135674059391,
53
+ 0.279307292103767,
54
+ 0.44304268836975086,
55
+ 0.40743342936038746,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.712118923664093,
71
+ -0.9058223962783813,
72
+ 0.23588787019252777,
73
+ -0.5429158806800842,
74
+ -0.040398646146059036,
75
+ 0.020606935024261475,
76
+ 0.017701730132102966,
77
+ -0.0473877377808094,
78
+ -0.008974903263151646,
79
+ 0.07259788364171982,
80
+ -0.10359716415405273,
81
+ 0.6327128410339355,
82
+ -0.06314197182655334,
83
+ -0.0013578623766079545,
84
+ 0.00019790347141679376,
85
+ 0.00011283090861979872,
86
+ -0.0006499870796687901,
87
+ 0.00032370671397075057
88
+ ],
89
+ "std": [
90
+ 0.2781350612640381,
91
+ 2.6392948627471924,
92
+ 1.3273509740829468,
93
+ 2.0749592781066895,
94
+ 0.07837909460067749,
95
+ 0.03738027811050415,
96
+ 0.02584066428244114,
97
+ 0.04791264608502388,
98
+ 0.057498227804899216,
99
+ 0.14968742430210114,
100
+ 0.8472592234611511,
101
+ 1.0443724393844604,
102
+ 0.39697346091270447,
103
+ 0.05666997656226158,
104
+ 0.024987852200865746,
105
+ 0.03953177109360695,
106
+ 0.0359305739402771,
107
+ 0.04984797537326813
108
+ ],
109
+ "max": [
110
+ 1.0,
111
+ 5.350800514221191,
112
+ 3.8553338050842285,
113
+ 17.060976028442383,
114
+ 0.11130910366773605,
115
+ 0.1087154671549797,
116
+ 0.08333498984575272,
117
+ 0.1969706416130066,
118
+ 0.1374618262052536,
119
+ 0.3587442934513092,
120
+ 3.0078964233398438,
121
+ 3.1823999881744385,
122
+ 0.8323067426681519,
123
+ 0.18048053979873657,
124
+ 0.10284245759248734,
125
+ 0.16207736730575562,
126
+ 0.157505601644516,
127
+ 0.26426705718040466
128
+ ],
129
+ "min": [
130
+ 0.40096619725227356,
131
+ -7.2232866287231445,
132
+ -4.4363179206848145,
133
+ -6.800429821014404,
134
+ -0.21506300568580627,
135
+ -0.07872974872589111,
136
+ -0.06743831932544708,
137
+ -0.1399754285812378,
138
+ -0.16577740013599396,
139
+ -0.5564588308334351,
140
+ -3.3933472633361816,
141
+ -1.7985055446624756,
142
+ -0.9258536100387573,
143
+ -0.13820408284664154,
144
+ -0.11969966441392899,
145
+ -0.1538764387369156,
146
+ -0.19596895575523376,
147
+ -0.20975197851657867
148
+ ],
149
+ "q01": [
150
+ 0.41449275612831116,
151
+ -5.746072840690613,
152
+ -2.327056176662445,
153
+ -5.084146018028259,
154
+ -0.20334001287817954,
155
+ -0.0645052993297577,
156
+ -0.050079321376979354,
157
+ -0.11857962332665921,
158
+ -0.14367493212223054,
159
+ -0.3888432151079178,
160
+ -2.201221220493317,
161
+ -1.3602424466609955,
162
+ -0.7360008960962295,
163
+ -0.10918830074369908,
164
+ -0.07522686988115311,
165
+ -0.1073022399097681,
166
+ -0.10450345933437348,
167
+ -0.09292908132076264
168
+ ],
169
+ "q99": [
170
+ 1.0,
171
+ 4.007417759895323,
172
+ 2.956605370044707,
173
+ 3.691619861125935,
174
+ 0.07898372933268537,
175
+ 0.0996882866322994,
176
+ 0.07147861436009406,
177
+ 0.10441832318902006,
178
+ 0.09363975144922722,
179
+ 0.33638142466545096,
180
+ 2.1681720423698363,
181
+ 2.683793127536772,
182
+ 0.6664970207214354,
183
+ 0.11945264495909196,
184
+ 0.06497061111032947,
185
+ 0.07682121112942683,
186
+ 0.0679727686196565,
187
+ 0.16351093247532844
188
+ ]
189
+ },
190
+ "num_transitions": 6182,
191
+ "num_trajectories": 80
192
+ }
193
+ }
franka_move_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_move_egg.sh ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Visual Prompt Training Script for Franka - move_egg
3
+
4
+ export NCCL_SOCKET_IFNAME=bond0
5
+ export NCCL_IB_HCA=mlx5_2,mlx5_3
6
+
7
+ export NCCL_BLOCKING_WAIT=1
8
+ export NCCL_ASYNC_ERROR_HANDLING=1
9
+ export TORCH_NCCL_BLOCKING_WAIT=1
10
+ export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
11
+
12
+ export NCCL_TIMEOUT=3600
13
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
14
+
15
+ Framework_name=QwenOFT
16
+ base_vlm=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
17
+ freeze_module_list=''
18
+ DIT_TYPE="DiT-B"
19
+
20
+ # Data paths
21
+ data_root_dir=/gpfs/wangzixuan/visual_prompting/real_data/lerobot
22
+ visual_prompt_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
23
+ extracted_frames_dir=/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
24
+ data_mix=smartmore_franka_move_egg
25
+
26
+ # Output
27
+ run_root_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
28
+ run_id=franka_move_egg_visual_prompt_QwenOFT_4k_save
29
+
30
+ output_dir=${run_root_dir}/${run_id}
31
+ mkdir -p ${output_dir}
32
+ cp $0 ${output_dir}/
33
+
34
+ accelerate launch \
35
+ --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
36
+ --num_processes 8 \
37
+ starVLA/training/train_starvla_visual_prompt.py \
38
+ --config_yaml ./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml \
39
+ --framework.name ${Framework_name} \
40
+ --framework.qwenvl.base_vlm ${base_vlm} \
41
+ --framework.action_model.action_model_type ${DIT_TYPE} \
42
+ --datasets.vla_data.data_root_dir ${data_root_dir} \
43
+ --datasets.vla_data.visual_prompt_dir ${visual_prompt_dir} \
44
+ --datasets.vla_data.data_mix ${data_mix} \
45
+ --datasets.vla_data.per_device_batch_size 32 \
46
+ --datasets.vla_data.video_backend pyav \
47
+ --datasets.vp_data.visual_prompt_dir ${visual_prompt_dir} \
48
+ --datasets.vp_data.extracted_frames_dir ${extracted_frames_dir} \
49
+ --datasets.vp_data.per_device_batch_size 8 \
50
+ --trainer.freeze_modules "${freeze_module_list}" \
51
+ --trainer.max_train_steps 100000 \
52
+ --trainer.save_interval 4000 \
53
+ --trainer.logging_frequency 10 \
54
+ --trainer.eval_interval 100 \
55
+ --trainer.learning_rate.base 3e-5 \
56
+ --trainer.learning_rate.qwen_vl_interface 1e-5 \
57
+ --trainer.loss_scale.visual_prompt 0.1 \
58
+ --datasets.vla_data.use_subtask false \
59
+ --datasets.vla_data.feed_both_images true \
60
+ --datasets.vp_data.feed_both_images false \
61
+ --run_root_dir ${run_root_dir} \
62
+ --run_id ${run_id} \
63
+ --wandb_project franka_visual_prompt \
64
+ --wandb_entity zwanggk
franka_move_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"steps": 4000}
2
+ {"steps": 8000}
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T09:57:51.786209377Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-02-24T09:57:52.5013945Z","level":"INFO","msg":"stream: created new stream","id":"90ibcpp4"}
3
+ {"time":"2026-02-24T09:57:52.50146878Z","level":"INFO","msg":"handler: started","stream_id":"90ibcpp4"}
4
+ {"time":"2026-02-24T09:57:52.526145224Z","level":"INFO","msg":"stream: started","id":"90ibcpp4"}
5
+ {"time":"2026-02-24T09:57:52.526168565Z","level":"INFO","msg":"sender: started","stream_id":"90ibcpp4"}
6
+ {"time":"2026-02-24T09:57:52.526174028Z","level":"INFO","msg":"writer: started","stream_id":"90ibcpp4"}
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log ADDED
File without changes
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/requirements.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ starVLA==1.0.1
2
+ docstring_parser==0.17.0
3
+ pydantic_core==2.27.2
4
+ py-cpuinfo==9.0.0
5
+ Werkzeug==3.1.5
6
+ pandas==2.3.3
7
+ kiwisolver==1.4.9
8
+ httpcore==1.0.9
9
+ nvidia-cuda-runtime-cu12==12.4.127
10
+ Jinja2==3.1.6
11
+ exceptiongroup==1.3.1
12
+ filelock==3.20.3
13
+ torchvision==0.21.0
14
+ gitdb==4.0.12
15
+ fastparquet==2024.11.0
16
+ tensorboard==2.20.0
17
+ portalocker==3.2.0
18
+ timm==1.0.24
19
+ nvidia-nvjitlink-cu12==12.4.127
20
+ nvidia-cudnn-cu12==9.1.0.70
21
+ pyparsing==3.3.2
22
+ protobuf==6.33.4
23
+ nvitop==1.6.2
24
+ importlib_metadata==8.7.1
25
+ GitPython==3.1.46
26
+ annotated-types==0.7.0
27
+ antlr4-python3-runtime==4.9.3
28
+ yacs==0.1.8
29
+ contourpy==1.3.2
30
+ charset-normalizer==3.4.4
31
+ hjson==3.1.0
32
+ tensorboard-data-server==0.7.2
33
+ six==1.17.0
34
+ nvidia-cuda-cupti-cu12==12.4.127
35
+ tqdm==4.67.1
36
+ h11==0.16.0
37
+ zipp==3.23.0
38
+ pipablepytorch3d==0.7.6
39
+ transformers==4.57.0
40
+ websockets==16.0
41
+ opencv-python-headless==4.11.0.86
42
+ ninja==1.13.0
43
+ websocket-client==1.8.0
44
+ nvidia-nvtx-cu12==12.4.127
45
+ grpcio==1.76.0
46
+ psutil==7.2.1
47
+ typing_extensions==4.15.0
48
+ zope.event==6.1
49
+ mdurl==0.1.2
50
+ scipy==1.15.3
51
+ pydantic==2.10.6
52
+ tiktoken==0.12.0
53
+ networkx==3.4.2
54
+ zope.interface==8.2
55
+ lazy_loader==0.4
56
+ websocket==0.2.1
57
+ huggingface-hub==0.36.0
58
+ transformers-stream-generator==0.0.4
59
+ cycler==0.12.1
60
+ safetensors==0.7.0
61
+ requests==2.32.5
62
+ matplotlib==3.10.8
63
+ nvidia-cuda-nvrtc-cu12==12.4.127
64
+ qwen-vl-utils==0.0.14
65
+ scikit-image==0.25.2
66
+ deepspeed==0.16.9
67
+ omegaconf==2.3.0
68
+ Markdown==3.10.1
69
+ sentry-sdk==2.50.0
70
+ pip==25.3
71
+ pillow==12.1.0
72
+ pyarrow==14.0.1
73
+ nvidia-cublas-cu12==12.4.5.8
74
+ termcolor==3.3.0
75
+ tifffile==2025.5.10
76
+ nvidia-curand-cu12==10.3.5.147
77
+ iopath==0.1.10
78
+ wandb==0.24.0
79
+ PyYAML==6.0.3
80
+ flash_attn==2.7.4.post1
81
+ wheel==0.45.1
82
+ tokenizers==0.22.2
83
+ idna==3.11
84
+ accelerate==1.5.2
85
+ mpmath==1.3.0
86
+ einops==0.8.1
87
+ urllib3==2.6.3
88
+ diffusers==0.36.0
89
+ hf-xet==1.2.0
90
+ eval_type_backport==0.3.1
91
+ fsspec==2026.1.0
92
+ ImageIO==2.37.2
93
+ tzdata==2025.3
94
+ torch==2.6.0
95
+ click==8.3.1
96
+ albumentations==1.4.18
97
+ setuptools==80.9.0
98
+ tabulate==0.9.0
99
+ av==12.3.0
100
+ nvidia-cusparselt-cu12==0.6.2
101
+ markdown-it-py==4.0.0
102
+ absl-py==2.3.1
103
+ nvidia-cusparse-cu12==12.3.1.170
104
+ starVLA==1.0.1
105
+ packaging==26.0
106
+ MarkupSafe==3.0.3
107
+ eva-decord==0.6.1
108
+ Pygments==2.19.2
109
+ rich==14.2.0
110
+ nvidia-cufft-cu12==11.2.1.3
111
+ numpydantic==1.6.9
112
+ triton==3.2.0
113
+ certifi==2026.1.4
114
+ smmap==5.0.2
115
+ fvcore==0.1.5.post20221221
116
+ albucore==0.0.17
117
+ fonttools==4.61.1
118
+ regex==2026.1.15
119
+ pytz==2025.2
120
+ python-dateutil==2.9.0.post0
121
+ greenlet==3.3.0
122
+ platformdirs==4.5.1
123
+ nvidia-ml-py==13.590.48
124
+ cramjam==2.11.0
125
+ numpy==1.26.4
126
+ tyro==1.0.5
127
+ nvidia-cusolver-cu12==11.6.1.9
128
+ nvidia-nccl-cu12==2.21.5
129
+ httpx==0.28.1
130
+ gevent==25.9.1
131
+ typeguard==4.4.4
132
+ msgpack==1.1.2
133
+ decord==0.6.0
134
+ sympy==1.13.1
135
+ anyio==4.12.1
136
+ jaraco.collections==5.1.0
137
+ packaging==24.2
138
+ importlib_metadata==8.0.0
139
+ tomli==2.0.1
140
+ backports.tarfile==1.2.0
141
+ typing_extensions==4.12.2
142
+ jaraco.context==5.3.0
143
+ typeguard==4.3.0
144
+ autocommand==2.2.2
145
+ jaraco.text==3.12.1
146
+ more-itertools==10.3.0
147
+ platformdirs==4.2.2
148
+ wheel==0.45.1
149
+ inflect==7.3.1
150
+ jaraco.functools==4.0.1
151
+ zipp==3.19.2
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/files/wandb-metadata.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.19",
4
+ "startedAt": "2026-02-24T09:57:49.605667Z",
5
+ "args": [
6
+ "--config_yaml",
7
+ "./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml",
8
+ "--framework.name",
9
+ "QwenOFT",
10
+ "--framework.qwenvl.base_vlm",
11
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct",
12
+ "--framework.action_model.action_model_type",
13
+ "DiT-B",
14
+ "--datasets.vla_data.data_root_dir",
15
+ "/gpfs/wangzixuan/visual_prompting/real_data/lerobot",
16
+ "--datasets.vla_data.visual_prompt_dir",
17
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
18
+ "--datasets.vla_data.data_mix",
19
+ "smartmore_franka_move_egg",
20
+ "--datasets.vla_data.per_device_batch_size",
21
+ "32",
22
+ "--datasets.vla_data.video_backend",
23
+ "pyav",
24
+ "--datasets.vp_data.visual_prompt_dir",
25
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
26
+ "--datasets.vp_data.extracted_frames_dir",
27
+ "/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames",
28
+ "--datasets.vp_data.per_device_batch_size",
29
+ "8",
30
+ "--trainer.freeze_modules",
31
+ "",
32
+ "--trainer.max_train_steps",
33
+ "100000",
34
+ "--trainer.save_interval",
35
+ "4000",
36
+ "--trainer.logging_frequency",
37
+ "10",
38
+ "--trainer.eval_interval",
39
+ "100",
40
+ "--trainer.learning_rate.base",
41
+ "3e-5",
42
+ "--trainer.learning_rate.qwen_vl_interface",
43
+ "1e-5",
44
+ "--trainer.loss_scale.visual_prompt",
45
+ "0.1",
46
+ "--datasets.vla_data.use_subtask",
47
+ "false",
48
+ "--datasets.vla_data.feed_both_images",
49
+ "true",
50
+ "--datasets.vp_data.feed_both_images",
51
+ "false",
52
+ "--run_root_dir",
53
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints",
54
+ "--run_id",
55
+ "franka_move_egg_visual_prompt_QwenOFT_4k_save",
56
+ "--wandb_project",
57
+ "franka_visual_prompt",
58
+ "--wandb_entity",
59
+ "zwanggk"
60
+ ],
61
+ "program": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
62
+ "codePath": "starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
63
+ "codePathLocal": "starVLA/training/train_starvla_visual_prompt.py",
64
+ "git": {
65
+ "remote": "https://github.com/Vincent2311/visual_prompting.git",
66
+ "commit": "c53a6c11679f38afa4bb3de09d8c540d11f8a500"
67
+ },
68
+ "email": "zwanggk@connect.ust.hk",
69
+ "root": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb",
70
+ "host": "C04-GPU-03-10U",
71
+ "executable": "/gpfs/wangzixuan/conda_envs/starVLA-Robocasa/bin/python3.10",
72
+ "cpu_count": 96,
73
+ "cpu_count_logical": 192,
74
+ "gpu": "NVIDIA H200",
75
+ "gpu_count": 8,
76
+ "disk": {
77
+ "/": {
78
+ "total": "942793330688",
79
+ "used": "717182078976"
80
+ }
81
+ },
82
+ "memory": {
83
+ "total": "2163973517312"
84
+ },
85
+ "gpu_nvidia": [
86
+ {
87
+ "name": "NVIDIA H200",
88
+ "memoryTotal": "150754820096",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper",
91
+ "uuid": "GPU-397bd6b3-e89a-bb71-4bfc-ba5495d359da"
92
+ },
93
+ {
94
+ "name": "NVIDIA H200",
95
+ "memoryTotal": "150754820096",
96
+ "cudaCores": 16896,
97
+ "architecture": "Hopper",
98
+ "uuid": "GPU-f2b4af4a-fd61-2b16-7b54-7f0e0926bdcd"
99
+ },
100
+ {
101
+ "name": "NVIDIA H200",
102
+ "memoryTotal": "150754820096",
103
+ "cudaCores": 16896,
104
+ "architecture": "Hopper",
105
+ "uuid": "GPU-d05fb150-dd47-8890-ece8-03205a09a8f3"
106
+ },
107
+ {
108
+ "name": "NVIDIA H200",
109
+ "memoryTotal": "150754820096",
110
+ "cudaCores": 16896,
111
+ "architecture": "Hopper",
112
+ "uuid": "GPU-3a893c22-7154-b9de-4ba6-86e87055c9a6"
113
+ },
114
+ {
115
+ "name": "NVIDIA H200",
116
+ "memoryTotal": "150754820096",
117
+ "cudaCores": 16896,
118
+ "architecture": "Hopper",
119
+ "uuid": "GPU-eea7d972-f9c2-0648-4d8a-845e6a2a74a7"
120
+ },
121
+ {
122
+ "name": "NVIDIA H200",
123
+ "memoryTotal": "150754820096",
124
+ "cudaCores": 16896,
125
+ "architecture": "Hopper",
126
+ "uuid": "GPU-ac8c17b6-1752-c9e9-533e-20e5cbd94678"
127
+ },
128
+ {
129
+ "name": "NVIDIA H200",
130
+ "memoryTotal": "150754820096",
131
+ "cudaCores": 16896,
132
+ "architecture": "Hopper",
133
+ "uuid": "GPU-7e924378-26a9-7f17-3eb5-8ab9d7910ad5"
134
+ },
135
+ {
136
+ "name": "NVIDIA H200",
137
+ "memoryTotal": "150754820096",
138
+ "cudaCores": 16896,
139
+ "architecture": "Hopper",
140
+ "uuid": "GPU-84d7dc9a-1673-019f-bf97-112e89cd64fa"
141
+ }
142
+ ],
143
+ "cudaVersion": "12.5",
144
+ "writerId": "h3tpzt1umi7ubf1hxcnxlw99p9x4m07c"
145
+ }
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T09:57:51.786209377Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-02-24T09:57:52.5013945Z","level":"INFO","msg":"stream: created new stream","id":"90ibcpp4"}
3
+ {"time":"2026-02-24T09:57:52.50146878Z","level":"INFO","msg":"handler: started","stream_id":"90ibcpp4"}
4
+ {"time":"2026-02-24T09:57:52.526145224Z","level":"INFO","msg":"stream: started","id":"90ibcpp4"}
5
+ {"time":"2026-02-24T09:57:52.526168565Z","level":"INFO","msg":"sender: started","stream_id":"90ibcpp4"}
6
+ {"time":"2026-02-24T09:57:52.526174028Z","level":"INFO","msg":"writer: started","stream_id":"90ibcpp4"}
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/logs/debug.log ADDED
File without changes
franka_move_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_095749-90ibcpp4/run-90ibcpp4.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0cf551f309d126d88c1ff35e88683b5589a988cef31631e0adf77b209504cf
3
+ size 9895936
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/checkpoints/steps_4000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c162016481119b637df15a0be9892ba375ec58b75636fd91a63f8585231a760a
3
+ size 9785060316
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/config.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: smartmore_franka_pick_color_egg
6
+ data_root_dir: /gpfs/wangzixuan/visual_prompting/real_data/lerobot
7
+ dataset_py: visual_prompt_datasets
8
+ delete_pause_frame: false
9
+ feed_both_images: true
10
+ image_size:
11
+ - 224
12
+ - 224
13
+ num_workers: 4
14
+ per_device_batch_size: 32
15
+ target_location_prompt_type: box
16
+ target_object_prompt_type: crosshair
17
+ use_subtask: false
18
+ video_backend: pyav
19
+ visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
20
+ vp_data:
21
+ dataset_py: visual_prompt_prediction_datasets
22
+ extracted_frames_dir: /gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
23
+ feed_both_images: false
24
+ num_workers: 4
25
+ per_device_batch_size: 8
26
+ target_location_prompt_type: box
27
+ target_object_prompt_type: crosshair
28
+ visual_prompt_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
29
+ framework:
30
+ action_model:
31
+ action_dim: 7
32
+ action_hidden_dim: 2560
33
+ action_model_type: DiT-B
34
+ future_action_window_size: 15
35
+ past_action_window_size: 0
36
+ name: QwenOFT
37
+ qwenvl:
38
+ base_vlm: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
39
+ output_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_pick_color_egg_visual_prompt_QwenOFT_4k_save
40
+ run_id: franka_pick_color_egg_visual_prompt_QwenOFT_4k_save
41
+ run_root_dir: /gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
42
+ seed: 42
43
+ trainer:
44
+ eval_interval: 100
45
+ freeze_modules: null
46
+ gradient_accumulation_steps: 1
47
+ gradient_clipping: 1.0
48
+ is_resume: false
49
+ learning_rate:
50
+ action_model: 0.0001
51
+ base: 3.0e-05
52
+ qwen_vl_interface: 1.0e-05
53
+ logging_frequency: 10
54
+ loss_scale:
55
+ visual_prompt: 0.1
56
+ vla: 1.0
57
+ lr_scheduler_type: cosine_with_min_lr
58
+ max_train_steps: 100000
59
+ num_warmup_steps: 5000
60
+ optimizer:
61
+ betas:
62
+ - 0.9
63
+ - 0.95
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-08
66
+ save_interval: 4000
67
+ scheduler_specific_kwargs:
68
+ min_lr: 5.0e-07
69
+ wandb_entity: zwanggk
70
+ wandb_project: franka_visual_prompt
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/dataset_statistics.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "franka": {
3
+ "action": {
4
+ "mean": [
5
+ -0.03088134340941906,
6
+ 0.014874552376568317,
7
+ -0.010504455305635929,
8
+ 0.022602463141083717,
9
+ 0.1353626847267151,
10
+ -0.028460221365094185,
11
+ 0.5807644724845886
12
+ ],
13
+ "std": [
14
+ 0.15506236255168915,
15
+ 0.13216811418533325,
16
+ 0.3213138282299042,
17
+ 0.22508421540260315,
18
+ 0.19163629412651062,
19
+ 0.15282094478607178,
20
+ 0.8140951991081238
21
+ ],
22
+ "max": [
23
+ 0.8901300430297852,
24
+ 0.9411723017692566,
25
+ 1.0124773979187012,
26
+ 0.8294150829315186,
27
+ 0.9971182346343994,
28
+ 0.7796618938446045,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -1.0019829273223877,
33
+ -0.9361174702644348,
34
+ -1.008697748184204,
35
+ -0.910487949848175,
36
+ -0.6591343283653259,
37
+ -1.0043150186538696,
38
+ -1.0
39
+ ],
40
+ "q01": [
41
+ -0.5649683105945587,
42
+ -0.4391350215673447,
43
+ -0.9119087898731232,
44
+ -0.5171846067905426,
45
+ -0.27165821373462673,
46
+ -0.5323639380931854,
47
+ -1.0
48
+ ],
49
+ "q99": [
50
+ 0.4818715870380397,
51
+ 0.47519543111324264,
52
+ 0.7014798462390898,
53
+ 0.6106001746654509,
54
+ 0.7260631489753719,
55
+ 0.41580578923225403,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.8852064609527588,
71
+ -0.2375049889087677,
72
+ 0.781900942325592,
73
+ -0.07681693136692047,
74
+ -0.18386560678482056,
75
+ 0.027068398892879486,
76
+ 0.004328660201281309,
77
+ 0.02139219455420971,
78
+ 0.00874329637736082,
79
+ -0.07659025490283966,
80
+ 0.2806189954280853,
81
+ 0.5508584976196289,
82
+ -0.1435595452785492,
83
+ -0.004463810473680496,
84
+ 0.0007538733771070838,
85
+ -0.001885179546661675,
86
+ -0.0016131681622937322,
87
+ 0.0008839101647026837
88
+ ],
89
+ "std": [
90
+ 0.23323002457618713,
91
+ 1.1870391368865967,
92
+ 2.017711877822876,
93
+ 1.6102639436721802,
94
+ 0.05210454761981964,
95
+ 0.05229118466377258,
96
+ 0.041582029312849045,
97
+ 0.05281534045934677,
98
+ 0.05351712927222252,
99
+ 0.07277204096317291,
100
+ 0.8566829562187195,
101
+ 0.8766577243804932,
102
+ 0.4317518472671509,
103
+ 0.02712591178715229,
104
+ 0.020200243219733238,
105
+ 0.05434368550777435,
106
+ 0.04124778136610985,
107
+ 0.028852419927716255
108
+ ],
109
+ "max": [
110
+ 1.0,
111
+ 3.4243950843811035,
112
+ 7.788799285888672,
113
+ 14.78903865814209,
114
+ -0.03373821824789047,
115
+ 0.11598888039588928,
116
+ 0.059444610029459,
117
+ 0.18933114409446716,
118
+ 0.10342294722795486,
119
+ 0.25208762288093567,
120
+ 3.4390106201171875,
121
+ 3.0282397270202637,
122
+ 0.9457597732543945,
123
+ 0.17403888702392578,
124
+ 0.17917200922966003,
125
+ 0.2020551860332489,
126
+ 0.27113404870033264,
127
+ 0.21728664636611938
128
+ ],
129
+ "min": [
130
+ 0.34879228472709656,
131
+ -6.467292308807373,
132
+ -8.687843322753906,
133
+ -5.337343692779541,
134
+ -0.29195600748062134,
135
+ -0.09002542495727539,
136
+ -0.18820980191230774,
137
+ -0.12294814735651016,
138
+ -0.1497562676668167,
139
+ -0.30858489871025085,
140
+ -2.1999568939208984,
141
+ -2.1393580436706543,
142
+ -0.9632256031036377,
143
+ -0.21414227783679962,
144
+ -0.15804187953472137,
145
+ -0.17533080279827118,
146
+ -0.34564465284347534,
147
+ -0.28151094913482666
148
+ ],
149
+ "q01": [
150
+ 0.373913049697876,
151
+ -2.404434905052185,
152
+ -3.435333833694458,
153
+ -4.099568157196045,
154
+ -0.2798208749294281,
155
+ -0.06430018439888954,
156
+ -0.10869826689362526,
157
+ -0.09201859250664711,
158
+ -0.1239976005256176,
159
+ -0.23648206681013106,
160
+ -1.4935396432876586,
161
+ -1.617297031879425,
162
+ -0.7970868635177613,
163
+ -0.10159043014049529,
164
+ -0.07206693574786185,
165
+ -0.15704740852117538,
166
+ -0.14374885827302933,
167
+ -0.08439157962799072
168
+ ],
169
+ "q99": [
170
+ 1.0,
171
+ 2.5832616949081397,
172
+ 6.19975263595581,
173
+ 2.6092714929580647,
174
+ -0.08237690582871442,
175
+ 0.10878291621804234,
176
+ 0.0529432439059019,
177
+ 0.14419121086597442,
178
+ 0.08684506908059107,
179
+ 0.12282686129212377,
180
+ 2.705757403373718,
181
+ 2.4532408523559566,
182
+ 0.7534279215335843,
183
+ 0.0849865667521953,
184
+ 0.0681564901769161,
185
+ 0.1255668881535528,
186
+ 0.11919408649206155,
187
+ 0.08485643595457076
188
+ ]
189
+ },
190
+ "num_transitions": 13787,
191
+ "num_trajectories": 200
192
+ }
193
+ }
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/run_franka_vp_pick_color_egg.sh ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Visual Prompt Training Script for Franka - pick_color_egg
3
+
4
+ export NCCL_SOCKET_IFNAME=bond0
5
+ export NCCL_IB_HCA=mlx5_2,mlx5_3
6
+
7
+ export NCCL_BLOCKING_WAIT=1
8
+ export NCCL_ASYNC_ERROR_HANDLING=1
9
+ export TORCH_NCCL_BLOCKING_WAIT=1
10
+ export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
11
+
12
+ export NCCL_TIMEOUT=3600
13
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
14
+
15
+ Framework_name=QwenOFT
16
+ base_vlm=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct
17
+ freeze_module_list=''
18
+ DIT_TYPE="DiT-B"
19
+
20
+ # Data paths
21
+ data_root_dir=/gpfs/wangzixuan/visual_prompting/real_data/lerobot
22
+ visual_prompt_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output
23
+ extracted_frames_dir=/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames
24
+ data_mix=smartmore_franka_pick_color_egg
25
+
26
+ # Output
27
+ run_root_dir=/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints
28
+ run_id=franka_pick_color_egg_visual_prompt_QwenOFT_4k_save
29
+
30
+ output_dir=${run_root_dir}/${run_id}
31
+ mkdir -p ${output_dir}
32
+ cp $0 ${output_dir}/
33
+
34
+ accelerate launch \
35
+ --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
36
+ --num_processes 8 \
37
+ starVLA/training/train_starvla_visual_prompt.py \
38
+ --config_yaml ./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml \
39
+ --framework.name ${Framework_name} \
40
+ --framework.qwenvl.base_vlm ${base_vlm} \
41
+ --framework.action_model.action_model_type ${DIT_TYPE} \
42
+ --datasets.vla_data.data_root_dir ${data_root_dir} \
43
+ --datasets.vla_data.visual_prompt_dir ${visual_prompt_dir} \
44
+ --datasets.vla_data.data_mix ${data_mix} \
45
+ --datasets.vla_data.per_device_batch_size 32 \
46
+ --datasets.vla_data.video_backend pyav \
47
+ --datasets.vp_data.visual_prompt_dir ${visual_prompt_dir} \
48
+ --datasets.vp_data.extracted_frames_dir ${extracted_frames_dir} \
49
+ --datasets.vp_data.per_device_batch_size 8 \
50
+ --trainer.freeze_modules "${freeze_module_list}" \
51
+ --trainer.max_train_steps 100000 \
52
+ --trainer.save_interval 4000 \
53
+ --trainer.logging_frequency 10 \
54
+ --trainer.eval_interval 100 \
55
+ --trainer.learning_rate.base 3e-5 \
56
+ --trainer.learning_rate.qwen_vl_interface 1e-5 \
57
+ --trainer.loss_scale.visual_prompt 0.1 \
58
+ --datasets.vla_data.use_subtask false \
59
+ --datasets.vla_data.feed_both_images true \
60
+ --datasets.vp_data.feed_both_images false \
61
+ --run_root_dir ${run_root_dir} \
62
+ --run_id ${run_id} \
63
+ --wandb_project franka_visual_prompt \
64
+ --wandb_entity zwanggk
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/summary.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"steps": 4000}
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T10:00:04.845715487Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-02-24T10:00:05.548861429Z","level":"INFO","msg":"stream: created new stream","id":"d1v6qg5k"}
3
+ {"time":"2026-02-24T10:00:05.548936729Z","level":"INFO","msg":"handler: started","stream_id":"d1v6qg5k"}
4
+ {"time":"2026-02-24T10:00:05.551545692Z","level":"INFO","msg":"stream: started","id":"d1v6qg5k"}
5
+ {"time":"2026-02-24T10:00:05.551566721Z","level":"INFO","msg":"writer: started","stream_id":"d1v6qg5k"}
6
+ {"time":"2026-02-24T10:00:05.551568325Z","level":"INFO","msg":"sender: started","stream_id":"d1v6qg5k"}
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/debug.log ADDED
File without changes
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/requirements.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ starVLA==1.0.1
2
+ docstring_parser==0.17.0
3
+ pydantic_core==2.27.2
4
+ py-cpuinfo==9.0.0
5
+ Werkzeug==3.1.5
6
+ pandas==2.3.3
7
+ kiwisolver==1.4.9
8
+ httpcore==1.0.9
9
+ nvidia-cuda-runtime-cu12==12.4.127
10
+ Jinja2==3.1.6
11
+ exceptiongroup==1.3.1
12
+ filelock==3.20.3
13
+ torchvision==0.21.0
14
+ gitdb==4.0.12
15
+ fastparquet==2024.11.0
16
+ tensorboard==2.20.0
17
+ portalocker==3.2.0
18
+ timm==1.0.24
19
+ nvidia-nvjitlink-cu12==12.4.127
20
+ nvidia-cudnn-cu12==9.1.0.70
21
+ pyparsing==3.3.2
22
+ protobuf==6.33.4
23
+ nvitop==1.6.2
24
+ importlib_metadata==8.7.1
25
+ GitPython==3.1.46
26
+ annotated-types==0.7.0
27
+ antlr4-python3-runtime==4.9.3
28
+ yacs==0.1.8
29
+ contourpy==1.3.2
30
+ charset-normalizer==3.4.4
31
+ hjson==3.1.0
32
+ tensorboard-data-server==0.7.2
33
+ six==1.17.0
34
+ nvidia-cuda-cupti-cu12==12.4.127
35
+ tqdm==4.67.1
36
+ h11==0.16.0
37
+ zipp==3.23.0
38
+ pipablepytorch3d==0.7.6
39
+ transformers==4.57.0
40
+ websockets==16.0
41
+ opencv-python-headless==4.11.0.86
42
+ ninja==1.13.0
43
+ websocket-client==1.8.0
44
+ nvidia-nvtx-cu12==12.4.127
45
+ grpcio==1.76.0
46
+ psutil==7.2.1
47
+ typing_extensions==4.15.0
48
+ zope.event==6.1
49
+ mdurl==0.1.2
50
+ scipy==1.15.3
51
+ pydantic==2.10.6
52
+ tiktoken==0.12.0
53
+ networkx==3.4.2
54
+ zope.interface==8.2
55
+ lazy_loader==0.4
56
+ websocket==0.2.1
57
+ huggingface-hub==0.36.0
58
+ transformers-stream-generator==0.0.4
59
+ cycler==0.12.1
60
+ safetensors==0.7.0
61
+ requests==2.32.5
62
+ matplotlib==3.10.8
63
+ nvidia-cuda-nvrtc-cu12==12.4.127
64
+ qwen-vl-utils==0.0.14
65
+ scikit-image==0.25.2
66
+ deepspeed==0.16.9
67
+ omegaconf==2.3.0
68
+ Markdown==3.10.1
69
+ sentry-sdk==2.50.0
70
+ pip==25.3
71
+ pillow==12.1.0
72
+ pyarrow==14.0.1
73
+ nvidia-cublas-cu12==12.4.5.8
74
+ termcolor==3.3.0
75
+ tifffile==2025.5.10
76
+ nvidia-curand-cu12==10.3.5.147
77
+ iopath==0.1.10
78
+ wandb==0.24.0
79
+ PyYAML==6.0.3
80
+ flash_attn==2.7.4.post1
81
+ wheel==0.45.1
82
+ tokenizers==0.22.2
83
+ idna==3.11
84
+ accelerate==1.5.2
85
+ mpmath==1.3.0
86
+ einops==0.8.1
87
+ urllib3==2.6.3
88
+ diffusers==0.36.0
89
+ hf-xet==1.2.0
90
+ eval_type_backport==0.3.1
91
+ fsspec==2026.1.0
92
+ ImageIO==2.37.2
93
+ tzdata==2025.3
94
+ torch==2.6.0
95
+ click==8.3.1
96
+ albumentations==1.4.18
97
+ setuptools==80.9.0
98
+ tabulate==0.9.0
99
+ av==12.3.0
100
+ nvidia-cusparselt-cu12==0.6.2
101
+ markdown-it-py==4.0.0
102
+ absl-py==2.3.1
103
+ nvidia-cusparse-cu12==12.3.1.170
104
+ starVLA==1.0.1
105
+ packaging==26.0
106
+ MarkupSafe==3.0.3
107
+ eva-decord==0.6.1
108
+ Pygments==2.19.2
109
+ rich==14.2.0
110
+ nvidia-cufft-cu12==11.2.1.3
111
+ numpydantic==1.6.9
112
+ triton==3.2.0
113
+ certifi==2026.1.4
114
+ smmap==5.0.2
115
+ fvcore==0.1.5.post20221221
116
+ albucore==0.0.17
117
+ fonttools==4.61.1
118
+ regex==2026.1.15
119
+ pytz==2025.2
120
+ python-dateutil==2.9.0.post0
121
+ greenlet==3.3.0
122
+ platformdirs==4.5.1
123
+ nvidia-ml-py==13.590.48
124
+ cramjam==2.11.0
125
+ numpy==1.26.4
126
+ tyro==1.0.5
127
+ nvidia-cusolver-cu12==11.6.1.9
128
+ nvidia-nccl-cu12==2.21.5
129
+ httpx==0.28.1
130
+ gevent==25.9.1
131
+ typeguard==4.4.4
132
+ msgpack==1.1.2
133
+ decord==0.6.0
134
+ sympy==1.13.1
135
+ anyio==4.12.1
136
+ jaraco.collections==5.1.0
137
+ packaging==24.2
138
+ importlib_metadata==8.0.0
139
+ tomli==2.0.1
140
+ backports.tarfile==1.2.0
141
+ typing_extensions==4.12.2
142
+ jaraco.context==5.3.0
143
+ typeguard==4.3.0
144
+ autocommand==2.2.2
145
+ jaraco.text==3.12.1
146
+ more-itertools==10.3.0
147
+ platformdirs==4.2.2
148
+ wheel==0.45.1
149
+ inflect==7.3.1
150
+ jaraco.functools==4.0.1
151
+ zipp==3.19.2
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/files/wandb-metadata.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.19",
4
+ "startedAt": "2026-02-24T10:00:04.006310Z",
5
+ "args": [
6
+ "--config_yaml",
7
+ "./examples/Franka/train_files/starvla_cotrain_franka_visual_prompt.yaml",
8
+ "--framework.name",
9
+ "QwenOFT",
10
+ "--framework.qwenvl.base_vlm",
11
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Pretrained_models/Qwen3-VL-4B-Instruct",
12
+ "--framework.action_model.action_model_type",
13
+ "DiT-B",
14
+ "--datasets.vla_data.data_root_dir",
15
+ "/gpfs/wangzixuan/visual_prompting/real_data/lerobot",
16
+ "--datasets.vla_data.visual_prompt_dir",
17
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
18
+ "--datasets.vla_data.data_mix",
19
+ "smartmore_franka_pick_color_egg",
20
+ "--datasets.vla_data.per_device_batch_size",
21
+ "32",
22
+ "--datasets.vla_data.video_backend",
23
+ "pyav",
24
+ "--datasets.vp_data.visual_prompt_dir",
25
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/realdata_process/visual_prompts_output",
26
+ "--datasets.vp_data.extracted_frames_dir",
27
+ "/gpfs/wangzixuan/visual_prompting/real_data/extracted_frames",
28
+ "--datasets.vp_data.per_device_batch_size",
29
+ "8",
30
+ "--trainer.freeze_modules",
31
+ "",
32
+ "--trainer.max_train_steps",
33
+ "100000",
34
+ "--trainer.save_interval",
35
+ "4000",
36
+ "--trainer.logging_frequency",
37
+ "10",
38
+ "--trainer.eval_interval",
39
+ "100",
40
+ "--trainer.learning_rate.base",
41
+ "3e-5",
42
+ "--trainer.learning_rate.qwen_vl_interface",
43
+ "1e-5",
44
+ "--trainer.loss_scale.visual_prompt",
45
+ "0.1",
46
+ "--datasets.vla_data.use_subtask",
47
+ "false",
48
+ "--datasets.vla_data.feed_both_images",
49
+ "true",
50
+ "--datasets.vp_data.feed_both_images",
51
+ "false",
52
+ "--run_root_dir",
53
+ "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints",
54
+ "--run_id",
55
+ "franka_pick_color_egg_visual_prompt_QwenOFT_4k_save",
56
+ "--wandb_project",
57
+ "franka_visual_prompt",
58
+ "--wandb_entity",
59
+ "zwanggk"
60
+ ],
61
+ "program": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
62
+ "codePath": "starVLA_robocasa/starVLA/training/train_starvla_visual_prompt.py",
63
+ "codePathLocal": "starVLA/training/train_starvla_visual_prompt.py",
64
+ "git": {
65
+ "remote": "https://github.com/Vincent2311/visual_prompting.git",
66
+ "commit": "c53a6c11679f38afa4bb3de09d8c540d11f8a500"
67
+ },
68
+ "email": "zwanggk@connect.ust.hk",
69
+ "root": "/gpfs/wangzixuan/visual_prompting/starVLA_robocasa/playground/Checkpoints/franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb",
70
+ "host": "C07-GPU-05-10U",
71
+ "executable": "/gpfs/wangzixuan/conda_envs/starVLA-Robocasa/bin/python3.10",
72
+ "cpu_count": 96,
73
+ "cpu_count_logical": 192,
74
+ "gpu": "NVIDIA H200",
75
+ "gpu_count": 8,
76
+ "disk": {
77
+ "/": {
78
+ "total": "941186367488",
79
+ "used": "637385850880"
80
+ }
81
+ },
82
+ "memory": {
83
+ "total": "2163973533696"
84
+ },
85
+ "gpu_nvidia": [
86
+ {
87
+ "name": "NVIDIA H200",
88
+ "memoryTotal": "150754820096",
89
+ "cudaCores": 16896,
90
+ "architecture": "Hopper",
91
+ "uuid": "GPU-02cbbf48-8c7b-ecc6-44fc-4f1ae9fd5afc"
92
+ },
93
+ {
94
+ "name": "NVIDIA H200",
95
+ "memoryTotal": "150754820096",
96
+ "cudaCores": 16896,
97
+ "architecture": "Hopper",
98
+ "uuid": "GPU-ee286f87-f96e-0dee-74eb-2419849cd598"
99
+ },
100
+ {
101
+ "name": "NVIDIA H200",
102
+ "memoryTotal": "150754820096",
103
+ "cudaCores": 16896,
104
+ "architecture": "Hopper",
105
+ "uuid": "GPU-0aba708d-8933-aa90-00b3-d28e723e31f6"
106
+ },
107
+ {
108
+ "name": "NVIDIA H200",
109
+ "memoryTotal": "150754820096",
110
+ "cudaCores": 16896,
111
+ "architecture": "Hopper",
112
+ "uuid": "GPU-2fe81b36-50a3-ee89-b038-14a95ec32762"
113
+ },
114
+ {
115
+ "name": "NVIDIA H200",
116
+ "memoryTotal": "150754820096",
117
+ "cudaCores": 16896,
118
+ "architecture": "Hopper",
119
+ "uuid": "GPU-9ed11761-265b-0861-8cb5-2652f7ff78df"
120
+ },
121
+ {
122
+ "name": "NVIDIA H200",
123
+ "memoryTotal": "150754820096",
124
+ "cudaCores": 16896,
125
+ "architecture": "Hopper",
126
+ "uuid": "GPU-68f88f42-bfa0-14af-3667-729c61e76dcd"
127
+ },
128
+ {
129
+ "name": "NVIDIA H200",
130
+ "memoryTotal": "150754820096",
131
+ "cudaCores": 16896,
132
+ "architecture": "Hopper",
133
+ "uuid": "GPU-2b318a21-8f81-ec8d-e491-578fd6664f3b"
134
+ },
135
+ {
136
+ "name": "NVIDIA H200",
137
+ "memoryTotal": "150754820096",
138
+ "cudaCores": 16896,
139
+ "architecture": "Hopper",
140
+ "uuid": "GPU-b649aafd-4d77-de99-7018-1ab8dfe1b680"
141
+ }
142
+ ],
143
+ "cudaVersion": "12.5",
144
+ "writerId": "s052mduyb2cc0jjvjj8ed0uz9zryksd0"
145
+ }
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T10:00:04.675432912Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmkcypry1/port-2611777.txt","pid":2611777,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-02-24T10:00:04.675884517Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2611777}
3
+ {"time":"2026-02-24T10:00:04.675894319Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2611777-2613701-460156490/socket","Net":"unix"}}
4
+ {"time":"2026-02-24T10:00:04.832299681Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-02-24T10:00:04.841837103Z","level":"INFO","msg":"handleInformInit: received","streamId":"d1v6qg5k","id":"1(@)"}
6
+ {"time":"2026-02-24T10:00:05.551555685Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d1v6qg5k","id":"1(@)"}
7
+ {"time":"2026-02-24T13:46:49.49981133Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2026-02-24T10:00:04.845715487Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-02-24T10:00:05.548861429Z","level":"INFO","msg":"stream: created new stream","id":"d1v6qg5k"}
3
+ {"time":"2026-02-24T10:00:05.548936729Z","level":"INFO","msg":"handler: started","stream_id":"d1v6qg5k"}
4
+ {"time":"2026-02-24T10:00:05.551545692Z","level":"INFO","msg":"stream: started","id":"d1v6qg5k"}
5
+ {"time":"2026-02-24T10:00:05.551566721Z","level":"INFO","msg":"writer: started","stream_id":"d1v6qg5k"}
6
+ {"time":"2026-02-24T10:00:05.551568325Z","level":"INFO","msg":"sender: started","stream_id":"d1v6qg5k"}
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/logs/debug.log ADDED
File without changes
franka_pick_color_egg_visual_prompt_QwenOFT_4k_save/wandb/wandb/run-20260224_100004-d1v6qg5k/run-d1v6qg5k.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463848264b9e61de4b23b0b5ac4f93406acdaf6d0e48cbc2f709cee97eaaace3
3
+ size 7700480