Caesarrr commited on
Commit
33ea7f4
·
verified ·
1 Parent(s): 3de6f06

Upload folder using huggingface_hub

Browse files
Files changed (25) hide show
  1. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt +3 -0
  2. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/model.safetensors +3 -0
  3. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/optimizer.bin +3 -0
  4. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/random_states_0.pkl +3 -0
  5. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.full.yaml +224 -0
  6. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.yaml +103 -0
  7. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics.json +127 -0
  8. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics_eval.json +127 -0
  9. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1000.json +63 -0
  10. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1250.json +63 -0
  11. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1500.json +63 -0
  12. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1750.json +63 -0
  13. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2000.json +63 -0
  14. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2250.json +63 -0
  15. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_250.json +63 -0
  16. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2500.json +63 -0
  17. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2750.json +63 -0
  18. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_3000.json +63 -0
  19. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_500.json +63 -0
  20. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_750.json +63 -0
  21. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/config.yaml +222 -0
  22. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/hydra.yaml +341 -0
  23. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/overrides.yaml +181 -0
  24. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/train_starvla_hydra.log +0 -0
  25. flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/summary.jsonl +6 -0
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371cb744227687bb99bcad7f9ff2250cf06da75631359ad3eba4c6bc52570607
3
+ size 9785060316
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dee60fc99cf35e2e6d5908129093c8c66c2cafb62d3b87a236d45acd86522d59
3
+ size 9138230516
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0bf50374f6a08b664673827a21dec3f972a8770dc56af310e8a6c225af8b023
3
+ size 18276885098
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1d26c3be5ce9ab794023e41262ab96e409e1e140d3759482adef7778d779a8
3
+ size 14821
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.full.yaml ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ framework:
2
+ name: QwenOFT
3
+ qwenvl:
4
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
5
+ attn_implementation: flash_attention_2
6
+ enable_gradient_checkpointing: true
7
+ action_model:
8
+ action_model_type: MLP
9
+ action_dim: 7
10
+ action_hidden_dim: 2560
11
+ future_action_window_size: 0
12
+ past_action_window_size: 0
13
+ loss_type: discrete_ce
14
+ state_dim: 7
15
+ action_horizon: 1
16
+ action_env_dim: 2
17
+ datasets:
18
+ vla_data:
19
+ dataset_py: lerobot_datasets
20
+ include_state: true
21
+ data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
22
+ data_mix: flappy_train__bridge
23
+ eval_data_mix: flappy_train__bridge__val
24
+ custom_mixtures_path: null
25
+ action_type: discrete
26
+ sequential_step_sampling: false
27
+ eval_sequential_step_sampling: null
28
+ num_workers: 8
29
+ eval_num_workers: 8
30
+ prefetch_factor: 4
31
+ persistent_workers: true
32
+ pin_memory: true
33
+ shuffle: true
34
+ action_balance:
35
+ enabled: false
36
+ strategy: balanced_epoch
37
+ action_key: action_id
38
+ target_flap_fraction: 0.3
39
+ noop_id: 0
40
+ flap_id: 1
41
+ latency_curriculum:
42
+ enabled: false
43
+ strategy: exclusive
44
+ latencies: null
45
+ phase_steps: null
46
+ per_device_batch_size: 32
47
+ load_all_data_for_training: true
48
+ num_obs_frames: 1
49
+ image_mode: single
50
+ stitch_grid:
51
+ - 2
52
+ - 2
53
+ obs_image_size: null
54
+ video_backend: torchvision_av
55
+ dataset:
56
+ source_hf: ''
57
+ config_name: null
58
+ source_subdir: null
59
+ converted_name: flappy_train
60
+ single_source_hf: ''
61
+ mixed_source_hf: ''
62
+ single_converted_name: flappy_train
63
+ mixed_converted_name: flappy_mixed_latency_train
64
+ single_latency_filter: null
65
+ mixed_latency_filter: null
66
+ force_download: false
67
+ setup_force: false
68
+ skip_verification: false
69
+ verify_rows: 200
70
+ max_episodes: null
71
+ episodes_per_latency: null
72
+ latency_filter: null
73
+ debug_subset:
74
+ enabled: false
75
+ max_episodes: 5
76
+ suffix: debug
77
+ base_model:
78
+ repo_id: Qwen/Qwen3-VL-4B-Instruct
79
+ initialization:
80
+ checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
81
+ checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
82
+ checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
83
+ trainer:
84
+ max_train_steps: 3000
85
+ num_warmup_steps: 100
86
+ save_interval: 500
87
+ eval_interval: 250
88
+ eval_num_batches: 50
89
+ per_latency_eval_num_batches: null
90
+ eval_action_classification: true
91
+ eval_action_classification_interval: null
92
+ cc_f1_tolerance: 1
93
+ learning_rate:
94
+ base: 2.0e-05
95
+ qwen_vl_interface: 1.0e-05
96
+ action_model: 0.0001
97
+ lr_scheduler_type: cosine_with_min_lr
98
+ scheduler_specific_kwargs:
99
+ min_lr: 1.0e-06
100
+ freeze_modules: ''
101
+ freeze_vit: false
102
+ freeze_tied_embedding: false
103
+ freeze_llm_layers: []
104
+ loss_scale:
105
+ vla: 1.0
106
+ vlm: 0.1
107
+ max_grad_norm: 1.0
108
+ weight_decay: 0.0
109
+ logging_frequency: 1
110
+ profile_timing:
111
+ enabled: false
112
+ log_interval: 10
113
+ gradient_clipping: 1.0
114
+ gradient_accumulation_steps: 1
115
+ distributed_backend: none
116
+ is_resume: false
117
+ pretrained_checkpoint: null
118
+ resume_step: 0
119
+ reload_modules: null
120
+ optimizer:
121
+ name: AdamW
122
+ betas:
123
+ - 0.9
124
+ - 0.95
125
+ eps: 1.0e-08
126
+ weight_decay: 1.0e-08
127
+ fused: true
128
+ save_format: pt
129
+ workspace_dir: WORKSPACE_DIR
130
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
131
+ seed: 42
132
+ wandb_entity: saberrr-zju
133
+ wandb_project: starVLA_rl_games
134
+ auth:
135
+ env_file: null
136
+ hf_token_env: HF_TOKEN
137
+ wandb_api_key_env: WANDB_API_KEY
138
+ paths:
139
+ run_root_dir: results/Checkpoints
140
+ dataset_local_dir: data/flappy_fix_latency_2_200ep
141
+ dataset_cache_dir: null
142
+ base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
143
+ accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
144
+ launch:
145
+ use_accelerate: true
146
+ gpus: null
147
+ num_processes: 1
148
+ dry_run: false
149
+ conda:
150
+ enabled: true
151
+ env_name: null
152
+ rl_games:
153
+ model_alias: openvla
154
+ env_eval:
155
+ image_size: 224
156
+ frameskip: 1
157
+ seed: 42
158
+ fixed_episode_seeds: true
159
+ latency_seed_stride: 0
160
+ task_seed_stride: 0
161
+ task_description: ''
162
+ enabled: true
163
+ distributed_mode: none
164
+ vectorized:
165
+ enabled: false
166
+ batch_size: 1
167
+ latency:
168
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
169
+ mode: single
170
+ values:
171
+ - 0
172
+ mid_train:
173
+ enabled: true
174
+ interval_steps: 250
175
+ latencies:
176
+ - 2
177
+ num_episodes: 5
178
+ max_steps_per_episode: 3600
179
+ post_train:
180
+ enabled: false
181
+ latencies:
182
+ - 0
183
+ - 1
184
+ - 2
185
+ - 3
186
+ - 4
187
+ num_episodes: 5
188
+ max_steps_per_episode: 3600
189
+ task: flappy
190
+ initialization_mode: bridge
191
+ action_carrier: bridge
192
+ model: openvla
193
+ env: flappy
194
+ init: bridge
195
+ bridge_base_model:
196
+ repo_id:
197
+ openvla: Qwen/Qwen3-VL-4B-Instruct
198
+ pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
199
+ pi05: Qwen/Qwen3-VL-4B-Instruct
200
+ gr00t: Qwen/Qwen3-VL-4B-Instruct
201
+ local_dir:
202
+ openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
203
+ pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
204
+ pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
205
+ gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
206
+ mode: single
207
+ checkpoint:
208
+ load: none
209
+ hf_repo_id: null
210
+ save_best_model: false
211
+ save_pt_file: false
212
+ local:
213
+ keep_last_n: 1
214
+ sync:
215
+ enabled: false
216
+ repo_id: null
217
+ keep_last_n: 0
218
+ sync_every_n_checkpoints: 1
219
+ resume_policy: local_latest
220
+ run_id: flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
221
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
222
+ config_yaml: null
223
+ is_debug: false
224
+ version_id: '0.21'
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint:
2
+ local:
3
+ keep_last_n: 1
4
+ save_best_model: false
5
+ save_pt_file: false
6
+ sync:
7
+ enabled: false
8
+ keep_last_n: 0
9
+ repo_id: null
10
+ datasets:
11
+ vla_data:
12
+ data_mix: flappy_train__bridge
13
+ dataset_py: lerobot_datasets
14
+ eval_data_mix: flappy_train__bridge__val
15
+ eval_num_workers: 8
16
+ include_state: true
17
+ latency_curriculum:
18
+ enabled: false
19
+ obs_image_size: null
20
+ per_device_batch_size: 32
21
+ persistent_workers: true
22
+ pin_memory: true
23
+ prefetch_factor: 4
24
+ framework:
25
+ action_model:
26
+ action_dim: 7
27
+ action_env_dim: 2
28
+ action_hidden_dim: 2560
29
+ action_horizon: 1
30
+ action_model_type: MLP
31
+ loss_type: discrete_ce
32
+ state_dim: 7
33
+ name: QwenOFT
34
+ qwenvl:
35
+ attn_implementation: flash_attention_2
36
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
37
+ enable_gradient_checkpointing: true
38
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
39
+ rl_games:
40
+ env_eval:
41
+ distributed_mode: none
42
+ enabled: true
43
+ fixed_episode_seeds: true
44
+ frameskip: 1
45
+ image_size: 224
46
+ latency:
47
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
48
+ latency_seed_stride: 0
49
+ mid_train:
50
+ enabled: true
51
+ interval_steps: 250
52
+ latencies:
53
+ - 2
54
+ max_steps_per_episode: 3600
55
+ num_episodes: 5
56
+ seed: 42
57
+ task_description: ''
58
+ task_seed_stride: 0
59
+ vectorized:
60
+ enabled: false
61
+ model_alias: openvla
62
+ task: flappy
63
+ run_id: flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
64
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
65
+ seed: 42
66
+ trainer:
67
+ distributed_backend: none
68
+ eval_action_classification: true
69
+ eval_action_classification_interval: null
70
+ eval_interval: 250
71
+ eval_num_batches: 50
72
+ freeze_llm_layers: []
73
+ freeze_modules: ''
74
+ freeze_tied_embedding: false
75
+ freeze_vit: false
76
+ gradient_accumulation_steps: 1
77
+ gradient_clipping: 1.0
78
+ is_resume: false
79
+ learning_rate:
80
+ action_model: 0.0001
81
+ base: 2.0e-05
82
+ qwen_vl_interface: 1.0e-05
83
+ logging_frequency: 1
84
+ lr_scheduler_type: cosine_with_min_lr
85
+ max_train_steps: 3000
86
+ num_warmup_steps: 100
87
+ optimizer:
88
+ betas:
89
+ - 0.9
90
+ - 0.95
91
+ eps: 1.0e-08
92
+ fused: true
93
+ weight_decay: 1.0e-08
94
+ per_latency_eval_num_batches: null
95
+ pretrained_checkpoint: null
96
+ profile_timing:
97
+ enabled: false
98
+ reload_modules: null
99
+ save_interval: 500
100
+ scheduler_specific_kwargs:
101
+ min_lr: 1.0e-06
102
+ wandb_entity: saberrr-zju
103
+ wandb_project: starVLA_rl_games
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.6028500199317932,
6
+ 0.3971499800682068,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "std": [
14
+ 0.4890792667865753,
15
+ 0.4890792667865753,
16
+ 0.0,
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0
21
+ ],
22
+ "max": [
23
+ 1.0,
24
+ 1.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0,
29
+ 0.0
30
+ ],
31
+ "min": [
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.0,
53
+ 0.0,
54
+ 0.0,
55
+ 0.0,
56
+ 0.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ true
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 330734,
125
+ "num_trajectories": 180
126
+ }
127
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics_eval.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.7959861159324646,
6
+ 0.2040138840675354,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "std": [
14
+ 0.4030573070049286,
15
+ 0.4030573070049286,
16
+ 0.0,
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0
21
+ ],
22
+ "max": [
23
+ 1.0,
24
+ 1.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0,
29
+ 0.0
30
+ ],
31
+ "min": [
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.0,
53
+ 0.0,
54
+ 0.0,
55
+ 0.0,
56
+ 0.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ true
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 72000,
125
+ "num_trajectories": 20
126
+ }
127
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 4.799999999999995,
7
+ "mean_length": 57.2,
8
+ "std_reward": 1.7999999999999952,
9
+ "std_length": 14.4,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 3.8999999999999986,
13
+ 3.8999999999999986,
14
+ 3.8999999999999986,
15
+ 3.8999999999999986
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 50,
20
+ 50,
21
+ 50,
22
+ 50
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 237,
26
+ "1": 49
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 4.799999999999995,
55
+ "mean_length": 57.2,
56
+ "std_reward": 1.7999999999999952,
57
+ "std_length": 14.4,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 4.799999999999995,
60
+ "macro_mean_length": 57.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 4.799999999999995,
7
+ "mean_length": 57.2,
8
+ "std_reward": 1.7999999999999952,
9
+ "std_length": 14.4,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 3.8999999999999986,
13
+ 3.8999999999999986,
14
+ 3.8999999999999986,
15
+ 3.8999999999999986
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 50,
20
+ 50,
21
+ 50,
22
+ 50
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 235,
26
+ "1": 51
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 4.799999999999995,
55
+ "mean_length": 57.2,
56
+ "std_reward": 1.7999999999999952,
57
+ "std_length": 14.4,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 4.799999999999995,
60
+ "macro_mean_length": 57.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 153.74000000000365,
7
+ "mean_length": 1258.2,
8
+ "std_reward": 160.43479173796118,
9
+ "std_length": 1292.4083565189449,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 444.60000000002566,
13
+ 171.8999999999956,
14
+ 3.8999999999999986,
15
+ 139.89999999999702
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 3600,
20
+ 1406,
21
+ 50,
22
+ 1149
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 4978,
26
+ "1": 1313
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 153.74000000000365,
55
+ "mean_length": 1258.2,
56
+ "std_reward": 160.43479173796118,
57
+ "std_length": 1292.4083565189449,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 153.74000000000365,
60
+ "macro_mean_length": 1258.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 217.38000000000935,
7
+ "mean_length": 1770.0,
8
+ "std_reward": 195.1522113633477,
9
+ "std_length": 1572.0778606672125,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 176.3999999999954,
13
+ 12.899999999999974,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 1442,
20
+ 122,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 7008,
26
+ "1": 1842
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 217.38000000000935,
55
+ "mean_length": 1770.0,
56
+ "std_reward": 195.1522113633477,
57
+ "std_length": 1572.0778606672125,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 217.38000000000935,
60
+ "macro_mean_length": 1770.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 116.27999999999801,
7
+ "mean_length": 956.0,
8
+ "std_reward": 102.48608490912143,
9
+ "std_length": 827.2419235024298,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 176.3999999999954,
13
+ 12.899999999999974,
14
+ 278.89999999999617,
15
+ 104.79999999999863
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 1442,
20
+ 122,
21
+ 2269,
22
+ 861
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 3842,
26
+ "1": 938
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 116.27999999999801,
55
+ "mean_length": 956.0,
56
+ "std_reward": 102.48608490912143,
57
+ "std_length": 827.2419235024298,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 116.27999999999801,
60
+ "macro_mean_length": 956.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 243.26000000001076,
7
+ "mean_length": 1978.4,
8
+ "std_reward": 199.3218462688027,
9
+ "std_length": 1605.7832481378052,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 3.8999999999999986,
15
+ 314.8000000000025
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 3600,
20
+ 3600,
21
+ 50,
22
+ 2556
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 7898,
26
+ "1": 1994
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 243.26000000001076,
55
+ "mean_length": 1978.4,
56
+ "std_reward": 199.3218462688027,
57
+ "std_length": 1605.7832481378052,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 243.26000000001076,
60
+ "macro_mean_length": 1978.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 37.10000000000025,
7
+ "mean_length": 319.0,
8
+ "std_reward": 23.528960878033065,
9
+ "std_length": 189.75668631170814,
10
+ "episode_rewards": [
11
+ 60.600000000000485,
12
+ 13.69999999999997,
13
+ 4.599999999999996,
14
+ 59.900000000000475,
15
+ 46.70000000000033
16
+ ],
17
+ "episode_lengths": [
18
+ 509,
19
+ 130,
20
+ 57,
21
+ 502,
22
+ 397
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 1243,
26
+ "1": 352
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 37.10000000000025,
55
+ "mean_length": 319.0,
56
+ "std_reward": 23.528960878033065,
57
+ "std_length": 189.75668631170814,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 37.10000000000025,
60
+ "macro_mean_length": 319.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 283.2200000000154,
7
+ "mean_length": 2300.2,
8
+ "std_reward": 198.73168242634333,
9
+ "std_length": 1600.6886517995933,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 73.89999999999999
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 615
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 9213,
26
+ "1": 2288
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 283.2200000000154,
55
+ "mean_length": 2300.2,
56
+ "std_reward": 198.73168242634333,
57
+ "std_length": 1600.6886517995933,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 283.2200000000154,
60
+ "macro_mean_length": 2300.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 112.99999999999913,
7
+ "mean_length": 930.4,
8
+ "std_reward": 113.37080752998062,
9
+ "std_length": 914.9803495157696,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 176.3999999999954,
13
+ 73.89999999999999,
14
+ 3.8999999999999986,
15
+ 302.4000000000004
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 1442,
20
+ 615,
21
+ 50,
22
+ 2459
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 3696,
26
+ "1": 956
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 112.99999999999913,
55
+ "mean_length": 930.4,
56
+ "std_reward": 113.37080752998062,
57
+ "std_length": 914.9803495157696,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 112.99999999999913,
60
+ "macro_mean_length": 930.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_3000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 303.72000000001447,
7
+ "mean_length": 2465.6,
8
+ "std_reward": 180.53576266215117,
9
+ "std_length": 1454.0181016754914,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 176.3999999999954,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 1442,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 9797,
26
+ "1": 2531
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 303.72000000001447,
55
+ "mean_length": 2465.6,
56
+ "std_reward": 180.53576266215117,
57
+ "std_length": 1454.0181016754914,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 303.72000000001447,
60
+ "macro_mean_length": 2465.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 73.41999999999845,
7
+ "mean_length": 612.0,
8
+ "std_reward": 90.13413116017405,
9
+ "std_length": 727.8139872247579,
10
+ "episode_rewards": [
11
+ 247.19999999999223,
12
+ 22.40000000000006,
13
+ 18.800000000000022,
14
+ 3.8999999999999986,
15
+ 74.79999999999994
16
+ ],
17
+ "episode_lengths": [
18
+ 2015,
19
+ 199,
20
+ 172,
21
+ 50,
22
+ 624
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 2359,
26
+ "1": 701
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 73.41999999999845,
55
+ "mean_length": 612.0,
56
+ "std_reward": 90.13413116017405,
57
+ "std_length": 727.8139872247579,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 73.41999999999845,
60
+ "macro_mean_length": 612.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 72.7599999999988,
7
+ "mean_length": 607.2,
8
+ "std_reward": 66.34210126307238,
9
+ "std_length": 535.8131763964003,
10
+ "episode_rewards": [
11
+ 186.79999999999492,
12
+ 102.79999999999869,
13
+ 18.900000000000023,
14
+ 3.8999999999999986,
15
+ 51.40000000000038
16
+ ],
17
+ "episode_lengths": [
18
+ 1528,
19
+ 850,
20
+ 173,
21
+ 50,
22
+ 435
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 2406,
26
+ "1": 630
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 72.7599999999988,
55
+ "mean_length": 607.2,
56
+ "std_reward": 66.34210126307238,
57
+ "std_length": 535.8131763964003,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 72.7599999999988,
60
+ "macro_mean_length": 607.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/config.yaml ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ framework:
2
+ qwenvl:
3
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
4
+ attn_implementation: flash_attention_2
5
+ enable_gradient_checkpointing: true
6
+ action_model:
7
+ state_dim: 7
8
+ loss_type: discrete_ce
9
+ action_horizon: 1
10
+ future_action_window_size: 0
11
+ past_action_window_size: 0
12
+ action_dim: 7
13
+ action_env_dim: 2
14
+ name: QwenOFT
15
+ datasets:
16
+ vla_data:
17
+ dataset_py: lerobot_datasets
18
+ include_state: true
19
+ data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
20
+ data_mix: flappy_train__bridge
21
+ eval_data_mix: flappy_train__bridge__val
22
+ custom_mixtures_path: null
23
+ action_type: discrete
24
+ sequential_step_sampling: false
25
+ eval_sequential_step_sampling: null
26
+ num_workers: 8
27
+ eval_num_workers: 8
28
+ prefetch_factor: 4
29
+ persistent_workers: true
30
+ pin_memory: true
31
+ shuffle: true
32
+ action_balance:
33
+ enabled: false
34
+ strategy: balanced_epoch
35
+ action_key: action_id
36
+ target_flap_fraction: 0.3
37
+ noop_id: 0
38
+ flap_id: 1
39
+ latency_curriculum:
40
+ enabled: false
41
+ strategy: exclusive
42
+ latencies: null
43
+ phase_steps: null
44
+ per_device_batch_size: 32
45
+ load_all_data_for_training: true
46
+ num_obs_frames: 1
47
+ image_mode: single
48
+ stitch_grid:
49
+ - 2
50
+ - 2
51
+ obs_image_size: null
52
+ video_backend: torchvision_av
53
+ dataset:
54
+ source_hf: ''
55
+ config_name: null
56
+ source_subdir: null
57
+ converted_name: flappy_train
58
+ single_source_hf: ''
59
+ mixed_source_hf: ''
60
+ single_converted_name: flappy_train
61
+ mixed_converted_name: flappy_mixed_latency_train
62
+ single_latency_filter: null
63
+ mixed_latency_filter: null
64
+ force_download: false
65
+ setup_force: false
66
+ skip_verification: false
67
+ verify_rows: 200
68
+ max_episodes: null
69
+ episodes_per_latency: null
70
+ latency_filter: null
71
+ debug_subset:
72
+ enabled: false
73
+ max_episodes: 5
74
+ suffix: debug
75
+ base_model:
76
+ repo_id: Qwen/Qwen3-VL-4B-Instruct
77
+ initialization:
78
+ checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
79
+ checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
80
+ checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
81
+ trainer:
82
+ max_train_steps: 3000
83
+ num_warmup_steps: 100
84
+ save_interval: 500
85
+ eval_interval: 250
86
+ eval_num_batches: 50
87
+ per_latency_eval_num_batches: null
88
+ eval_action_classification: true
89
+ eval_action_classification_interval: null
90
+ cc_f1_tolerance: 1
91
+ learning_rate:
92
+ base: 2.0e-05
93
+ qwen_vl_interface: 1.0e-05
94
+ action_model: 0.0001
95
+ lr_scheduler_type: cosine_with_min_lr
96
+ scheduler_specific_kwargs:
97
+ min_lr: 1.0e-06
98
+ freeze_modules: ''
99
+ freeze_vit: false
100
+ freeze_tied_embedding: false
101
+ freeze_llm_layers: []
102
+ loss_scale:
103
+ vla: 1.0
104
+ vlm: 0.1
105
+ max_grad_norm: 1.0
106
+ weight_decay: 0.0
107
+ logging_frequency: 1
108
+ profile_timing:
109
+ enabled: false
110
+ log_interval: 10
111
+ gradient_clipping: 1.0
112
+ gradient_accumulation_steps: 1
113
+ distributed_backend: none
114
+ is_resume: false
115
+ pretrained_checkpoint: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
116
+ resume_step: 0
117
+ reload_modules: null
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ fused: true
126
+ save_format: pt
127
+ workspace_dir: WORKSPACE_DIR
128
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
129
+ seed: 42
130
+ wandb_entity: saberrr-zju
131
+ wandb_project: starVLA_rl_games
132
+ auth:
133
+ env_file: null
134
+ hf_token_env: HF_TOKEN
135
+ wandb_api_key_env: WANDB_API_KEY
136
+ paths:
137
+ run_root_dir: results/Checkpoints
138
+ dataset_local_dir: data/flappy_fix_latency_2_200ep
139
+ dataset_cache_dir: null
140
+ base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
141
+ accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
142
+ launch:
143
+ use_accelerate: true
144
+ gpus: null
145
+ num_processes: 1
146
+ dry_run: false
147
+ conda:
148
+ enabled: true
149
+ env_name: null
150
+ rl_games:
151
+ model_alias: openvla
152
+ env_eval:
153
+ image_size: 224
154
+ frameskip: 1
155
+ seed: 42
156
+ fixed_episode_seeds: true
157
+ latency_seed_stride: 0
158
+ task_seed_stride: 0
159
+ task_description: ''
160
+ enabled: true
161
+ distributed_mode: none
162
+ vectorized:
163
+ enabled: false
164
+ batch_size: 1
165
+ latency:
166
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
167
+ mode: single
168
+ values:
169
+ - 0
170
+ mid_train:
171
+ enabled: true
172
+ interval_steps: 250
173
+ latencies:
174
+ - 2
175
+ num_episodes: 5
176
+ max_steps_per_episode: 3600
177
+ post_train:
178
+ enabled: false
179
+ latencies:
180
+ - 0
181
+ - 1
182
+ - 2
183
+ - 3
184
+ - 4
185
+ num_episodes: 5
186
+ max_steps_per_episode: 3600
187
+ task: flappy
188
+ initialization_mode: bridge
189
+ action_carrier: bridge
190
+ model: openvla
191
+ env: flappy
192
+ init: bridge
193
+ bridge_base_model:
194
+ repo_id:
195
+ openvla: Qwen/Qwen3-VL-4B-Instruct
196
+ pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
197
+ pi05: Qwen/Qwen3-VL-4B-Instruct
198
+ gr00t: Qwen/Qwen3-VL-4B-Instruct
199
+ local_dir:
200
+ openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
201
+ pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
202
+ pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
203
+ gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
204
+ mode: single
205
+ checkpoint:
206
+ load: none
207
+ hf_repo_id: null
208
+ save_best_model: false
209
+ save_pt_file: false
210
+ local:
211
+ keep_last_n: 1
212
+ sync:
213
+ enabled: false
214
+ repo_id: null
215
+ keep_last_n: 0
216
+ sync_every_n_checkpoints: 1
217
+ resume_policy: local_latest
218
+ run_id: flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
219
+ output_dir: null
220
+ config_yaml: null
221
+ is_debug: false
222
+ version_id: 0.21
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/hydra.yaml ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${run_root_dir}/${run_id}/hydra
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - model=openvla
116
+ - env=flappy
117
+ - init=bridge
118
+ - mode=single
119
+ - ++framework.qwenvl.base_vlm=playground/Pretrained_models/Qwen3-VL-4B-Instruct
120
+ - ++framework.qwenvl.attn_implementation=flash_attention_2
121
+ - ++framework.qwenvl.enable_gradient_checkpointing=true
122
+ - ++framework.action_model.state_dim=7
123
+ - ++framework.action_model.loss_type=discrete_ce
124
+ - ++framework.action_model.action_horizon=1
125
+ - ++framework.action_model.future_action_window_size=0
126
+ - ++framework.action_model.past_action_window_size=0
127
+ - ++framework.action_model.action_dim=7
128
+ - ++framework.action_model.action_env_dim=2
129
+ - ++framework.name=QwenOFT
130
+ - ++datasets.vla_data.dataset_py=lerobot_datasets
131
+ - ++datasets.vla_data.include_state=true
132
+ - ++datasets.vla_data.data_root_dir=playground/Datasets/rl_games
133
+ - ++datasets.vla_data.data_mix=flappy_train
134
+ - ++datasets.vla_data.eval_data_mix=null
135
+ - ++datasets.vla_data.custom_mixtures_path=null
136
+ - ++datasets.vla_data.action_type=discrete
137
+ - ++datasets.vla_data.sequential_step_sampling=false
138
+ - ++datasets.vla_data.eval_sequential_step_sampling=null
139
+ - ++datasets.vla_data.num_workers=8
140
+ - ++datasets.vla_data.eval_num_workers=8
141
+ - ++datasets.vla_data.prefetch_factor=4
142
+ - ++datasets.vla_data.persistent_workers=true
143
+ - ++datasets.vla_data.pin_memory=true
144
+ - ++datasets.vla_data.shuffle=true
145
+ - ++datasets.vla_data.action_balance.enabled=false
146
+ - ++datasets.vla_data.action_balance.strategy=balanced_epoch
147
+ - ++datasets.vla_data.action_balance.action_key=action_id
148
+ - ++datasets.vla_data.action_balance.target_flap_fraction=0.3
149
+ - ++datasets.vla_data.action_balance.noop_id=0
150
+ - ++datasets.vla_data.action_balance.flap_id=1
151
+ - ++datasets.vla_data.latency_curriculum.enabled=false
152
+ - ++datasets.vla_data.latency_curriculum.strategy=exclusive
153
+ - ++datasets.vla_data.latency_curriculum.latencies=null
154
+ - ++datasets.vla_data.latency_curriculum.phase_steps=null
155
+ - ++datasets.vla_data.per_device_batch_size=32
156
+ - ++datasets.vla_data.load_all_data_for_training=true
157
+ - ++datasets.vla_data.num_obs_frames=1
158
+ - ++datasets.vla_data.image_mode=single
159
+ - ++datasets.vla_data.stitch_grid=[2,2]
160
+ - ++datasets.vla_data.obs_image_size=null
161
+ - ++datasets.vla_data.video_backend=torchvision_av
162
+ - ++dataset.source_hf=
163
+ - ++dataset.config_name=null
164
+ - ++dataset.source_subdir=null
165
+ - ++dataset.converted_name=flappy_train
166
+ - ++dataset.single_source_hf=
167
+ - ++dataset.mixed_source_hf=
168
+ - ++dataset.single_converted_name=flappy_train
169
+ - ++dataset.mixed_converted_name=flappy_mixed_latency_train
170
+ - ++dataset.single_latency_filter=null
171
+ - ++dataset.mixed_latency_filter=null
172
+ - ++dataset.force_download=false
173
+ - ++dataset.setup_force=false
174
+ - ++dataset.skip_verification=false
175
+ - ++dataset.verify_rows=200
176
+ - ++dataset.max_episodes=null
177
+ - ++dataset.episodes_per_latency=null
178
+ - ++dataset.latency_filter=null
179
+ - ++dataset.debug_subset.enabled=false
180
+ - ++dataset.debug_subset.max_episodes=5
181
+ - ++dataset.debug_subset.suffix=debug
182
+ - ++base_model.repo_id=Qwen/Qwen3-VL-4B-Instruct
183
+ - ++initialization.checkpoint_local_dir=playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
184
+ - ++initialization.checkpoint_hf_repo_id=StarVLA/Qwen3VL-OFT-Bridge-RT-1
185
+ - ++initialization.checkpoint_filename=checkpoints/steps_5000_pytorch_model.pt
186
+ - ++trainer.max_train_steps=3000
187
+ - ++trainer.num_warmup_steps=100
188
+ - ++trainer.save_interval=500
189
+ - ++trainer.eval_interval=250
190
+ - ++trainer.eval_num_batches=50
191
+ - ++trainer.per_latency_eval_num_batches=null
192
+ - ++trainer.eval_action_classification=true
193
+ - ++trainer.eval_action_classification_interval=null
194
+ - ++trainer.cc_f1_tolerance=1
195
+ - ++trainer.learning_rate.base=2e-05
196
+ - ++trainer.learning_rate.qwen_vl_interface=1e-05
197
+ - ++trainer.learning_rate.action_model=0.0001
198
+ - ++trainer.lr_scheduler_type=cosine_with_min_lr
199
+ - ++trainer.scheduler_specific_kwargs.min_lr=1e-06
200
+ - ++trainer.freeze_modules=
201
+ - ++trainer.freeze_vit=false
202
+ - ++trainer.freeze_tied_embedding=false
203
+ - ++trainer.freeze_llm_layers=[]
204
+ - ++trainer.loss_scale.vla=1.0
205
+ - ++trainer.loss_scale.vlm=0.1
206
+ - ++trainer.max_grad_norm=1.0
207
+ - ++trainer.weight_decay=0.0
208
+ - ++trainer.logging_frequency=1
209
+ - ++trainer.profile_timing.enabled=false
210
+ - ++trainer.profile_timing.log_interval=10
211
+ - ++trainer.gradient_clipping=1.0
212
+ - ++trainer.gradient_accumulation_steps=1
213
+ - ++trainer.distributed_backend=none
214
+ - ++trainer.is_resume=false
215
+ - ++trainer.pretrained_checkpoint=null
216
+ - ++trainer.resume_step=0
217
+ - ++trainer.reload_modules=null
218
+ - ++trainer.optimizer.name=AdamW
219
+ - ++trainer.optimizer.betas=[0.9,0.95]
220
+ - ++trainer.optimizer.eps=1e-08
221
+ - ++trainer.optimizer.weight_decay=1e-08
222
+ - ++trainer.optimizer.fused=true
223
+ - ++trainer.save_format=pt
224
+ - ++workspace_dir=WORKSPACE_DIR
225
+ - ++run_root_dir=results/Checkpoints
226
+ - ++seed=42
227
+ - ++wandb_entity=saberrr-zju
228
+ - ++wandb_project=starVLA_rl_games
229
+ - ++auth.env_file=null
230
+ - ++auth.hf_token_env=HF_TOKEN
231
+ - ++auth.wandb_api_key_env=WANDB_API_KEY
232
+ - ++paths.run_root_dir=results/Checkpoints
233
+ - ++paths.dataset_local_dir=data/flappy_fix_latency_2_200ep
234
+ - ++paths.dataset_cache_dir=null
235
+ - ++paths.base_model_dir=playground/Pretrained_models/Qwen3-VL-4B-Instruct
236
+ - ++paths.accelerate_config=starVLA/config/deepseeds/deepspeed_zero2.yaml
237
+ - ++rl_games.model_alias=openvla
238
+ - ++rl_games.env_eval.image_size=224
239
+ - ++rl_games.env_eval.frameskip=1
240
+ - ++rl_games.env_eval.seed=42
241
+ - ++rl_games.env_eval.fixed_episode_seeds=true
242
+ - ++rl_games.env_eval.latency_seed_stride=0
243
+ - ++rl_games.env_eval.task_seed_stride=0
244
+ - ++rl_games.env_eval.task_description=
245
+ - ++rl_games.env_eval.enabled=true
246
+ - ++rl_games.env_eval.distributed_mode=none
247
+ - ++rl_games.env_eval.vectorized.enabled=false
248
+ - ++rl_games.env_eval.vectorized.batch_size=1
249
+ - ++rl_games.env_eval.latency.prompt_map_path=null
250
+ - ++rl_games.env_eval.latency.mode=single
251
+ - ++rl_games.env_eval.latency.values=[0]
252
+ - ++rl_games.env_eval.mid_train.enabled=true
253
+ - ++rl_games.env_eval.mid_train.interval_steps=250
254
+ - ++rl_games.env_eval.mid_train.latencies=[2]
255
+ - ++rl_games.env_eval.mid_train.num_episodes=5
256
+ - ++rl_games.env_eval.mid_train.max_steps_per_episode=3600
257
+ - ++rl_games.env_eval.post_train.enabled=false
258
+ - ++rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
259
+ - ++rl_games.env_eval.post_train.num_episodes=5
260
+ - ++rl_games.env_eval.post_train.max_steps_per_episode=3600
261
+ - ++rl_games.task=flappy
262
+ - ++rl_games.initialization_mode=bridge
263
+ - ++rl_games.action_carrier=bridge
264
+ - ++bridge_base_model.repo_id.openvla=Qwen/Qwen3-VL-4B-Instruct
265
+ - ++bridge_base_model.repo_id.pi0=StarVLA/Qwen2.5-VL-3B-Instruct-Action
266
+ - ++bridge_base_model.repo_id.pi05=Qwen/Qwen3-VL-4B-Instruct
267
+ - ++bridge_base_model.repo_id.gr00t=Qwen/Qwen3-VL-4B-Instruct
268
+ - ++bridge_base_model.local_dir.openvla=playground/Pretrained_models/Qwen3-VL-4B-Instruct
269
+ - ++bridge_base_model.local_dir.pi0=playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
270
+ - ++bridge_base_model.local_dir.pi05=playground/Pretrained_models/Qwen3-VL-4B-Instruct
271
+ - ++bridge_base_model.local_dir.gr00t=playground/Pretrained_models/Qwen3-VL-4B-Instruct
272
+ - ++checkpoint.load=none
273
+ - ++checkpoint.hf_repo_id=null
274
+ - ++checkpoint.save_best_model=false
275
+ - ++checkpoint.save_pt_file=false
276
+ - ++checkpoint.local.keep_last_n=1
277
+ - ++checkpoint.sync.enabled=false
278
+ - ++checkpoint.sync.repo_id=null
279
+ - ++checkpoint.sync.keep_last_n=0
280
+ - ++checkpoint.sync.sync_every_n_checkpoints=1
281
+ - ++checkpoint.sync.resume_policy=local_latest
282
+ - ++run_id=flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
283
+ - ++output_dir=null
284
+ - ++config_yaml=null
285
+ - ++is_debug=false
286
+ - ++version_id=0.21
287
+ - ++run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
288
+ - ++trainer.is_resume=false
289
+ - ++trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
290
+ - ++trainer.resume_step=0
291
+ - ++datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
292
+ - ++datasets.vla_data.data_mix=flappy_train__bridge
293
+ - ++datasets.vla_data.eval_data_mix=flappy_train__bridge__val
294
+ - ++framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
295
+ - ++rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
296
+ job:
297
+ name: train_starvla_hydra
298
+ chdir: false
299
+ override_dirname: ++auth.env_file=null,++auth.hf_token_env=HF_TOKEN,++auth.wandb_api_key_env=WANDB_API_KEY,++base_model.repo_id=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.gr00t=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.openvla=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.pi05=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.pi0=playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action,++bridge_base_model.repo_id.gr00t=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.repo_id.openvla=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.repo_id.pi05=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.repo_id.pi0=StarVLA/Qwen2.5-VL-3B-Instruct-Action,++checkpoint.hf_repo_id=null,++checkpoint.load=none,++checkpoint.local.keep_last_n=1,++checkpoint.save_best_model=false,++checkpoint.save_pt_file=false,++checkpoint.sync.enabled=false,++checkpoint.sync.keep_last_n=0,++checkpoint.sync.repo_id=null,++checkpoint.sync.resume_policy=local_latest,++checkpoint.sync.sync_every_n_checkpoints=1,++config_yaml=null,++dataset.config_name=null,++dataset.converted_name=flappy_train,++dataset.debug_subset.enabled=false,++dataset.debug_subset.max_episodes=5,++dataset.debug_subset.suffix=debug,++dataset.episodes_per_latency=null,++dataset.force_download=false,++dataset.latency_filter=null,++dataset.max_episodes=null,++dataset.mixed_converted_name=flappy_mixed_latency_train,++dataset.mixed_latency_filter=null,++dataset.mixed_source_hf=,++dataset.setup_force=false,++dataset.single_converted_name=flappy_train,++dataset.single_latency_filter=null,++dataset.single_source_hf=,++dataset.skip_verification=false,++dataset.source_hf=,++dataset.source_subdir=null,++dataset.verify_rows=200,++datasets.vla_data.action_balance.action_key=action_id,++datasets.vla_data.action_balance.enabled=false,++datasets.vla_data.action_balance.flap_id=1,++datasets.vla_data.action_balance.noop_id=0,++datasets.vla_data.action_balance.strategy=balanced_epoch,++datasets.vla_data.action_balance.target_flap_fraction=0.3,++datasets.vla_data.action_type=discrete,++datasets.vla_data.custom_mixtures_path=null,++datasets.vla_data.data_mix=flappy_train,++datasets.vla_data.data_mix=flappy_train__bridge,++datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep,++datasets.vla_data.data_root_dir=playground/Datasets/rl_games,++datasets.vla_data.dataset_py=lerobot_datasets,++datasets.vla_data.eval_data_mix=flappy_train__bridge__val,++datasets.vla_data.eval_data_mix=null,++datasets.vla_data.eval_num_workers=8,++datasets.vla_data.eval_sequential_step_sampling=null,++datasets.vla_data.image_mode=single,++datasets.vla_data.include_state=true,++datasets.vla_data.latency_curriculum.enabled=false,++datasets.vla_data.latency_curriculum.latencies=null,++datasets.vla_data.latency_curriculum.phase_steps=null,++datasets.vla_data.latency_curriculum.strategy=exclusive,++datasets.vla_data.load_all_data_for_training=true,++datasets.vla_data.num_obs_frames=1,++datasets.vla_data.num_workers=8,++datasets.vla_data.obs_image_size=null,++datasets.vla_data.per_device_batch_size=32,++datasets.vla_data.persistent_workers=true,++datasets.vla_data.pin_memory=true,++datasets.vla_data.prefetch_factor=4,++datasets.vla_data.sequential_step_sampling=false,++datasets.vla_data.shuffle=true,++datasets.vla_data.stitch_grid=[2,2],++datasets.vla_data.video_backend=torchvision_av,++framework.action_model.action_dim=7,++framework.action_model.action_env_dim=2,++framework.action_model.action_horizon=1,++framework.action_model.future_action_window_size=0,++framework.action_model.loss_type=discrete_ce,++framework.action_model.past_action_window_size=0,++framework.action_model.state_dim=7,++framework.name=QwenOFT,++framework.qwenvl.attn_implementation=flash_attention_2,++framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct,++framework.qwenvl.base_vlm=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++framework.qwenvl.enable_gradient_checkpointing=true,++initialization.checkpoint_filename=checkpoints/steps_5000_pytorch_model.pt,++initialization.checkpoint_hf_repo_id=StarVLA/Qwen3VL-OFT-Bridge-RT-1,++initialization.checkpoint_local_dir=playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1,++is_debug=false,++output_dir=null,++paths.accelerate_config=starVLA/config/deepseeds/deepspeed_zero2.yaml,++paths.base_model_dir=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++paths.dataset_cache_dir=null,++paths.dataset_local_dir=data/flappy_fix_latency_2_200ep,++paths.run_root_dir=results/Checkpoints,++rl_games.action_carrier=bridge,++rl_games.env_eval.distributed_mode=none,++rl_games.env_eval.enabled=true,++rl_games.env_eval.fixed_episode_seeds=true,++rl_games.env_eval.frameskip=1,++rl_games.env_eval.image_size=224,++rl_games.env_eval.latency.mode=single,++rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json,++rl_games.env_eval.latency.prompt_map_path=null,++rl_games.env_eval.latency.values=[0],++rl_games.env_eval.latency_seed_stride=0,++rl_games.env_eval.mid_train.enabled=true,++rl_games.env_eval.mid_train.interval_steps=250,++rl_games.env_eval.mid_train.latencies=[2],++rl_games.env_eval.mid_train.max_steps_per_episode=3600,++rl_games.env_eval.mid_train.num_episodes=5,++rl_games.env_eval.post_train.enabled=false,++rl_games.env_eval.post_train.latencies=[0,1,2,3,4],++rl_games.env_eval.post_train.max_steps_per_episode=3600,++rl_games.env_eval.post_train.num_episodes=5,++rl_games.env_eval.seed=42,++rl_games.env_eval.task_description=,++rl_games.env_eval.task_seed_stride=0,++rl_games.env_eval.vectorized.batch_size=1,++rl_games.env_eval.vectorized.enabled=false,++rl_games.initialization_mode=bridge,++rl_games.model_alias=openvla,++rl_games.task=flappy,++run_id=flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps,++run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints,++run_root_dir=results/Checkpoints,++seed=42,++trainer.cc_f1_tolerance=1,++trainer.distributed_backend=none,++trainer.eval_action_classification=true,++trainer.eval_action_classification_interval=null,++trainer.eval_interval=250,++trainer.eval_num_batches=50,++trainer.freeze_llm_layers=[],++trainer.freeze_modules=,++trainer.freeze_tied_embedding=false,++trainer.freeze_vit=false,++trainer.gradient_accumulation_steps=1,++trainer.gradient_clipping=1.0,++trainer.is_resume=false,++trainer.is_resume=false,++trainer.learning_rate.action_model=0.0001,++trainer.learning_rate.base=2e-05,++trainer.learning_rate.qwen_vl_interface=1e-05,++trainer.logging_frequency=1,++trainer.loss_scale.vla=1.0,++trainer.loss_scale.vlm=0.1,++trainer.lr_scheduler_type=cosine_with_min_lr,++trainer.max_grad_norm=1.0,++trainer.max_train_steps=3000,++trainer.num_warmup_steps=100,++trainer.optimizer.betas=[0.9,0.95],++trainer.optimizer.eps=1e-08,++trainer.optimizer.fused=true,++trainer.optimizer.name=AdamW,++trainer.optimizer.weight_decay=1e-08,++trainer.per_latency_eval_num_batches=null,++trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt,++trainer.pretrained_checkpoint=null,++trainer.profile_timing.enabled=false,++trainer.profile_timing.log_interval=10,++trainer.reload_modules=null,++trainer.resume_step=0,++trainer.resume_step=0,++trainer.save_format=pt,++trainer.save_interval=500,++trainer.scheduler_specific_kwargs.min_lr=1e-06,++trainer.weight_decay=0.0,++version_id=0.21,++wandb_entity=saberrr-zju,++wandb_project=starVLA_rl_games,++workspace_dir=WORKSPACE_DIR,env=flappy,init=bridge,mode=single,model=openvla
300
+ id: ???
301
+ num: ???
302
+ config_name: train
303
+ env_set: {}
304
+ env_copy: []
305
+ config:
306
+ override_dirname:
307
+ kv_sep: '='
308
+ item_sep: ','
309
+ exclude_keys: []
310
+ runtime:
311
+ version: 1.3.3
312
+ version_base: '1.1'
313
+ cwd: /workspace/latency-sensitive-bench/starVLA
314
+ config_sources:
315
+ - path: hydra.conf
316
+ schema: pkg
317
+ provider: hydra
318
+ - path: /workspace/latency-sensitive-bench/starVLA/examples/rl_games/config
319
+ schema: file
320
+ provider: main
321
+ - path: ''
322
+ schema: structured
323
+ provider: schema
324
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra
325
+ choices:
326
+ cross_task_setup: null
327
+ checkpoint: default
328
+ mode: single
329
+ init: bridge
330
+ env: flappy
331
+ model: openvla
332
+ hydra/env: default
333
+ hydra/callbacks: null
334
+ hydra/job_logging: default
335
+ hydra/hydra_logging: default
336
+ hydra/hydra_help: default
337
+ hydra/help: default
338
+ hydra/sweeper: basic
339
+ hydra/launcher: basic
340
+ hydra/output: default
341
+ verbose: false
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/overrides.yaml ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - model=openvla
2
+ - env=flappy
3
+ - init=bridge
4
+ - mode=single
5
+ - ++framework.qwenvl.base_vlm=playground/Pretrained_models/Qwen3-VL-4B-Instruct
6
+ - ++framework.qwenvl.attn_implementation=flash_attention_2
7
+ - ++framework.qwenvl.enable_gradient_checkpointing=true
8
+ - ++framework.action_model.state_dim=7
9
+ - ++framework.action_model.loss_type=discrete_ce
10
+ - ++framework.action_model.action_horizon=1
11
+ - ++framework.action_model.future_action_window_size=0
12
+ - ++framework.action_model.past_action_window_size=0
13
+ - ++framework.action_model.action_dim=7
14
+ - ++framework.action_model.action_env_dim=2
15
+ - ++framework.name=QwenOFT
16
+ - ++datasets.vla_data.dataset_py=lerobot_datasets
17
+ - ++datasets.vla_data.include_state=true
18
+ - ++datasets.vla_data.data_root_dir=playground/Datasets/rl_games
19
+ - ++datasets.vla_data.data_mix=flappy_train
20
+ - ++datasets.vla_data.eval_data_mix=null
21
+ - ++datasets.vla_data.custom_mixtures_path=null
22
+ - ++datasets.vla_data.action_type=discrete
23
+ - ++datasets.vla_data.sequential_step_sampling=false
24
+ - ++datasets.vla_data.eval_sequential_step_sampling=null
25
+ - ++datasets.vla_data.num_workers=8
26
+ - ++datasets.vla_data.eval_num_workers=8
27
+ - ++datasets.vla_data.prefetch_factor=4
28
+ - ++datasets.vla_data.persistent_workers=true
29
+ - ++datasets.vla_data.pin_memory=true
30
+ - ++datasets.vla_data.shuffle=true
31
+ - ++datasets.vla_data.action_balance.enabled=false
32
+ - ++datasets.vla_data.action_balance.strategy=balanced_epoch
33
+ - ++datasets.vla_data.action_balance.action_key=action_id
34
+ - ++datasets.vla_data.action_balance.target_flap_fraction=0.3
35
+ - ++datasets.vla_data.action_balance.noop_id=0
36
+ - ++datasets.vla_data.action_balance.flap_id=1
37
+ - ++datasets.vla_data.latency_curriculum.enabled=false
38
+ - ++datasets.vla_data.latency_curriculum.strategy=exclusive
39
+ - ++datasets.vla_data.latency_curriculum.latencies=null
40
+ - ++datasets.vla_data.latency_curriculum.phase_steps=null
41
+ - ++datasets.vla_data.per_device_batch_size=32
42
+ - ++datasets.vla_data.load_all_data_for_training=true
43
+ - ++datasets.vla_data.num_obs_frames=1
44
+ - ++datasets.vla_data.image_mode=single
45
+ - ++datasets.vla_data.stitch_grid=[2,2]
46
+ - ++datasets.vla_data.obs_image_size=null
47
+ - ++datasets.vla_data.video_backend=torchvision_av
48
+ - ++dataset.source_hf=
49
+ - ++dataset.config_name=null
50
+ - ++dataset.source_subdir=null
51
+ - ++dataset.converted_name=flappy_train
52
+ - ++dataset.single_source_hf=
53
+ - ++dataset.mixed_source_hf=
54
+ - ++dataset.single_converted_name=flappy_train
55
+ - ++dataset.mixed_converted_name=flappy_mixed_latency_train
56
+ - ++dataset.single_latency_filter=null
57
+ - ++dataset.mixed_latency_filter=null
58
+ - ++dataset.force_download=false
59
+ - ++dataset.setup_force=false
60
+ - ++dataset.skip_verification=false
61
+ - ++dataset.verify_rows=200
62
+ - ++dataset.max_episodes=null
63
+ - ++dataset.episodes_per_latency=null
64
+ - ++dataset.latency_filter=null
65
+ - ++dataset.debug_subset.enabled=false
66
+ - ++dataset.debug_subset.max_episodes=5
67
+ - ++dataset.debug_subset.suffix=debug
68
+ - ++base_model.repo_id=Qwen/Qwen3-VL-4B-Instruct
69
+ - ++initialization.checkpoint_local_dir=playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
70
+ - ++initialization.checkpoint_hf_repo_id=StarVLA/Qwen3VL-OFT-Bridge-RT-1
71
+ - ++initialization.checkpoint_filename=checkpoints/steps_5000_pytorch_model.pt
72
+ - ++trainer.max_train_steps=3000
73
+ - ++trainer.num_warmup_steps=100
74
+ - ++trainer.save_interval=500
75
+ - ++trainer.eval_interval=250
76
+ - ++trainer.eval_num_batches=50
77
+ - ++trainer.per_latency_eval_num_batches=null
78
+ - ++trainer.eval_action_classification=true
79
+ - ++trainer.eval_action_classification_interval=null
80
+ - ++trainer.cc_f1_tolerance=1
81
+ - ++trainer.learning_rate.base=2e-05
82
+ - ++trainer.learning_rate.qwen_vl_interface=1e-05
83
+ - ++trainer.learning_rate.action_model=0.0001
84
+ - ++trainer.lr_scheduler_type=cosine_with_min_lr
85
+ - ++trainer.scheduler_specific_kwargs.min_lr=1e-06
86
+ - ++trainer.freeze_modules=
87
+ - ++trainer.freeze_vit=false
88
+ - ++trainer.freeze_tied_embedding=false
89
+ - ++trainer.freeze_llm_layers=[]
90
+ - ++trainer.loss_scale.vla=1.0
91
+ - ++trainer.loss_scale.vlm=0.1
92
+ - ++trainer.max_grad_norm=1.0
93
+ - ++trainer.weight_decay=0.0
94
+ - ++trainer.logging_frequency=1
95
+ - ++trainer.profile_timing.enabled=false
96
+ - ++trainer.profile_timing.log_interval=10
97
+ - ++trainer.gradient_clipping=1.0
98
+ - ++trainer.gradient_accumulation_steps=1
99
+ - ++trainer.distributed_backend=none
100
+ - ++trainer.is_resume=false
101
+ - ++trainer.pretrained_checkpoint=null
102
+ - ++trainer.resume_step=0
103
+ - ++trainer.reload_modules=null
104
+ - ++trainer.optimizer.name=AdamW
105
+ - ++trainer.optimizer.betas=[0.9,0.95]
106
+ - ++trainer.optimizer.eps=1e-08
107
+ - ++trainer.optimizer.weight_decay=1e-08
108
+ - ++trainer.optimizer.fused=true
109
+ - ++trainer.save_format=pt
110
+ - ++workspace_dir=WORKSPACE_DIR
111
+ - ++run_root_dir=results/Checkpoints
112
+ - ++seed=42
113
+ - ++wandb_entity=saberrr-zju
114
+ - ++wandb_project=starVLA_rl_games
115
+ - ++auth.env_file=null
116
+ - ++auth.hf_token_env=HF_TOKEN
117
+ - ++auth.wandb_api_key_env=WANDB_API_KEY
118
+ - ++paths.run_root_dir=results/Checkpoints
119
+ - ++paths.dataset_local_dir=data/flappy_fix_latency_2_200ep
120
+ - ++paths.dataset_cache_dir=null
121
+ - ++paths.base_model_dir=playground/Pretrained_models/Qwen3-VL-4B-Instruct
122
+ - ++paths.accelerate_config=starVLA/config/deepseeds/deepspeed_zero2.yaml
123
+ - ++rl_games.model_alias=openvla
124
+ - ++rl_games.env_eval.image_size=224
125
+ - ++rl_games.env_eval.frameskip=1
126
+ - ++rl_games.env_eval.seed=42
127
+ - ++rl_games.env_eval.fixed_episode_seeds=true
128
+ - ++rl_games.env_eval.latency_seed_stride=0
129
+ - ++rl_games.env_eval.task_seed_stride=0
130
+ - ++rl_games.env_eval.task_description=
131
+ - ++rl_games.env_eval.enabled=true
132
+ - ++rl_games.env_eval.distributed_mode=none
133
+ - ++rl_games.env_eval.vectorized.enabled=false
134
+ - ++rl_games.env_eval.vectorized.batch_size=1
135
+ - ++rl_games.env_eval.latency.prompt_map_path=null
136
+ - ++rl_games.env_eval.latency.mode=single
137
+ - ++rl_games.env_eval.latency.values=[0]
138
+ - ++rl_games.env_eval.mid_train.enabled=true
139
+ - ++rl_games.env_eval.mid_train.interval_steps=250
140
+ - ++rl_games.env_eval.mid_train.latencies=[2]
141
+ - ++rl_games.env_eval.mid_train.num_episodes=5
142
+ - ++rl_games.env_eval.mid_train.max_steps_per_episode=3600
143
+ - ++rl_games.env_eval.post_train.enabled=false
144
+ - ++rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
145
+ - ++rl_games.env_eval.post_train.num_episodes=5
146
+ - ++rl_games.env_eval.post_train.max_steps_per_episode=3600
147
+ - ++rl_games.task=flappy
148
+ - ++rl_games.initialization_mode=bridge
149
+ - ++rl_games.action_carrier=bridge
150
+ - ++bridge_base_model.repo_id.openvla=Qwen/Qwen3-VL-4B-Instruct
151
+ - ++bridge_base_model.repo_id.pi0=StarVLA/Qwen2.5-VL-3B-Instruct-Action
152
+ - ++bridge_base_model.repo_id.pi05=Qwen/Qwen3-VL-4B-Instruct
153
+ - ++bridge_base_model.repo_id.gr00t=Qwen/Qwen3-VL-4B-Instruct
154
+ - ++bridge_base_model.local_dir.openvla=playground/Pretrained_models/Qwen3-VL-4B-Instruct
155
+ - ++bridge_base_model.local_dir.pi0=playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
156
+ - ++bridge_base_model.local_dir.pi05=playground/Pretrained_models/Qwen3-VL-4B-Instruct
157
+ - ++bridge_base_model.local_dir.gr00t=playground/Pretrained_models/Qwen3-VL-4B-Instruct
158
+ - ++checkpoint.load=none
159
+ - ++checkpoint.hf_repo_id=null
160
+ - ++checkpoint.save_best_model=false
161
+ - ++checkpoint.save_pt_file=false
162
+ - ++checkpoint.local.keep_last_n=1
163
+ - ++checkpoint.sync.enabled=false
164
+ - ++checkpoint.sync.repo_id=null
165
+ - ++checkpoint.sync.keep_last_n=0
166
+ - ++checkpoint.sync.sync_every_n_checkpoints=1
167
+ - ++checkpoint.sync.resume_policy=local_latest
168
+ - ++run_id=flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
169
+ - ++output_dir=null
170
+ - ++config_yaml=null
171
+ - ++is_debug=false
172
+ - ++version_id=0.21
173
+ - ++run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
174
+ - ++trainer.is_resume=false
175
+ - ++trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
176
+ - ++trainer.resume_step=0
177
+ - ++datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
178
+ - ++datasets.vla_data.data_mix=flappy_train__bridge
179
+ - ++datasets.vla_data.eval_data_mix=flappy_train__bridge__val
180
+ - ++framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
181
+ - ++rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/train_starvla_hydra.log ADDED
The diff for this file is too large to render. See raw diff
 
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/summary.jsonl ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"steps": 500}
2
+ {"steps": 1000}
3
+ {"steps": 1500}
4
+ {"steps": 2000}
5
+ {"steps": 2500}
6
+ {"steps": 3000}