Caesarrr commited on
Commit
d8e0eac
·
verified ·
1 Parent(s): 52480ff

Upload folder using huggingface_hub

Browse files
Files changed (33) hide show
  1. flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt +3 -0
  2. flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/model.safetensors +3 -0
  3. flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/optimizer.bin +3 -0
  4. flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/random_states_0.pkl +3 -0
  5. flappy_fix_latency_2_200ep_full_tuning_corrected/config.full.yaml +219 -0
  6. flappy_fix_latency_2_200ep_full_tuning_corrected/config.yaml +99 -0
  7. flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics.json +127 -0
  8. flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics_eval.json +127 -0
  9. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1000.json +63 -0
  10. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1250.json +63 -0
  11. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1500.json +63 -0
  12. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1750.json +63 -0
  13. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2000.json +63 -0
  14. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2250.json +63 -0
  15. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_250.json +63 -0
  16. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2500.json +63 -0
  17. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2750.json +63 -0
  18. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3000.json +63 -0
  19. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3250.json +63 -0
  20. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3500.json +63 -0
  21. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3750.json +63 -0
  22. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4000.json +63 -0
  23. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4250.json +63 -0
  24. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4500.json +63 -0
  25. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4750.json +63 -0
  26. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_500.json +63 -0
  27. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_5000.json +63 -0
  28. flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_750.json +63 -0
  29. flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/config.yaml +217 -0
  30. flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/hydra.yaml +266 -0
  31. flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/overrides.yaml +106 -0
  32. flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/train_starvla_hydra.log +0 -0
  33. flappy_fix_latency_2_200ep_full_tuning_corrected/summary.jsonl +10 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371cb744227687bb99bcad7f9ff2250cf06da75631359ad3eba4c6bc52570607
3
+ size 9785060316
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff343b81e73667493a90c8c0696872ab7f3ed1bf55fae59cf8b57f9785b040eb
3
+ size 9138230516
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a7b363b24f7908954dadea5afd7d198c03fc118e223f915b959f6d4d178060
3
+ size 18276885098
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27bbcb30ed5d67c406a486689500f03328292ad932e058af8734a4eacb28fea
3
+ size 14821
flappy_fix_latency_2_200ep_full_tuning_corrected/config.full.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ framework:
2
+ name: QwenOFT
3
+ qwenvl:
4
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
5
+ attn_implementation: flash_attention_2
6
+ enable_gradient_checkpointing: true
7
+ action_model:
8
+ action_model_type: MLP
9
+ action_dim: 7
10
+ action_hidden_dim: 2560
11
+ future_action_window_size: 0
12
+ past_action_window_size: 0
13
+ loss_type: discrete_ce
14
+ state_dim: 7
15
+ action_horizon: 1
16
+ action_env_dim: 2
17
+ datasets:
18
+ vla_data:
19
+ dataset_py: lerobot_datasets
20
+ include_state: true
21
+ data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
22
+ data_mix: flappy_train__bridge
23
+ eval_data_mix: flappy_train__bridge__val
24
+ custom_mixtures_path: null
25
+ action_type: discrete
26
+ sequential_step_sampling: false
27
+ eval_sequential_step_sampling: null
28
+ num_workers: 8
29
+ eval_num_workers: 8
30
+ prefetch_factor: 4
31
+ persistent_workers: true
32
+ pin_memory: true
33
+ shuffle: true
34
+ action_balance:
35
+ enabled: false
36
+ strategy: balanced_epoch
37
+ action_key: action_id
38
+ target_flap_fraction: 0.3
39
+ noop_id: 0
40
+ flap_id: 1
41
+ latency_curriculum:
42
+ enabled: false
43
+ strategy: exclusive
44
+ latencies: null
45
+ phase_steps: null
46
+ per_device_batch_size: 32
47
+ load_all_data_for_training: true
48
+ num_obs_frames: 1
49
+ image_mode: single
50
+ stitch_grid:
51
+ - 2
52
+ - 2
53
+ obs_image_size: null
54
+ video_backend: torchvision_av
55
+ dataset:
56
+ source_hf: ''
57
+ config_name: null
58
+ source_subdir: null
59
+ converted_name: flappy_train
60
+ single_source_hf: ''
61
+ mixed_source_hf: ''
62
+ single_converted_name: flappy_train
63
+ mixed_converted_name: flappy_mixed_latency_train
64
+ single_latency_filter: null
65
+ mixed_latency_filter: null
66
+ force_download: false
67
+ setup_force: false
68
+ skip_verification: false
69
+ verify_rows: 200
70
+ max_episodes: null
71
+ episodes_per_latency: null
72
+ latency_filter: null
73
+ debug_subset:
74
+ enabled: false
75
+ max_episodes: 5
76
+ suffix: debug
77
+ base_model:
78
+ repo_id: Qwen/Qwen3-VL-4B-Instruct
79
+ initialization:
80
+ checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
81
+ checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
82
+ checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
83
+ trainer:
84
+ max_train_steps: 5000
85
+ num_warmup_steps: 100
86
+ save_interval: 500
87
+ eval_interval: 100
88
+ eval_num_batches: 100
89
+ per_latency_eval_num_batches: null
90
+ eval_action_classification: true
91
+ eval_action_classification_interval: null
92
+ cc_f1_tolerance: 1
93
+ learning_rate:
94
+ base: 2.0e-05
95
+ qwen_vl_interface: 1.0e-05
96
+ action_model: 0.0001
97
+ lr_scheduler_type: cosine_with_min_lr
98
+ scheduler_specific_kwargs:
99
+ min_lr: 1.0e-06
100
+ freeze_modules: ''
101
+ freeze_llm_bottom_ratio: 0.0
102
+ loss_scale:
103
+ vla: 1.0
104
+ vlm: 0.1
105
+ max_grad_norm: 1.0
106
+ weight_decay: 0.0
107
+ logging_frequency: 1
108
+ gradient_clipping: 1.0
109
+ gradient_accumulation_steps: 4
110
+ distributed_backend: none
111
+ is_resume: false
112
+ pretrained_checkpoint: null
113
+ resume_step: 0
114
+ reload_modules: null
115
+ optimizer:
116
+ name: AdamW
117
+ betas:
118
+ - 0.9
119
+ - 0.95
120
+ eps: 1.0e-08
121
+ weight_decay: 1.0e-08
122
+ fused: true
123
+ save_format: pt
124
+ workspace_dir: WORKSPACE_DIR
125
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
126
+ seed: 42
127
+ wandb_entity: saberrr-zju
128
+ wandb_project: starVLA_rl_games
129
+ auth:
130
+ env_file: null
131
+ hf_token_env: HF_TOKEN
132
+ wandb_api_key_env: WANDB_API_KEY
133
+ paths:
134
+ run_root_dir: results/Checkpoints
135
+ dataset_local_dir: playground/Datasets/rl_games
136
+ dataset_cache_dir: null
137
+ base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
138
+ accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
139
+ launch:
140
+ use_accelerate: true
141
+ gpus: null
142
+ num_processes: 1
143
+ dry_run: false
144
+ conda:
145
+ enabled: true
146
+ env_name: null
147
+ rl_games:
148
+ model_alias: openvla
149
+ env_eval:
150
+ image_size: 224
151
+ frameskip: 1
152
+ seed: 42
153
+ fixed_episode_seeds: true
154
+ latency_seed_stride: 0
155
+ task_seed_stride: 0
156
+ task_description: ''
157
+ enabled: true
158
+ distributed_mode: none
159
+ vectorized:
160
+ enabled: false
161
+ batch_size: 1
162
+ latency:
163
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
164
+ mode: single
165
+ values:
166
+ - 0
167
+ mid_train:
168
+ enabled: true
169
+ interval_steps: 250
170
+ latencies:
171
+ - 2
172
+ num_episodes: 5
173
+ max_steps_per_episode: 3600
174
+ post_train:
175
+ enabled: false
176
+ latencies:
177
+ - 0
178
+ - 1
179
+ - 2
180
+ - 3
181
+ - 4
182
+ num_episodes: 5
183
+ max_steps_per_episode: 3600
184
+ task: flappy
185
+ initialization_mode: bridge
186
+ action_carrier: bridge
187
+ model: openvla
188
+ env: flappy
189
+ init: bridge
190
+ bridge_base_model:
191
+ repo_id:
192
+ openvla: Qwen/Qwen3-VL-4B-Instruct
193
+ pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
194
+ pi05: Qwen/Qwen3-VL-4B-Instruct
195
+ gr00t: Qwen/Qwen3-VL-4B-Instruct
196
+ local_dir:
197
+ openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
198
+ pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
199
+ pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
200
+ gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
201
+ mode: single
202
+ checkpoint:
203
+ load: auto
204
+ hf_repo_id: null
205
+ save_best_model: false
206
+ save_pt_file: false
207
+ local:
208
+ keep_last_n: 1
209
+ sync:
210
+ enabled: false
211
+ repo_id: null
212
+ keep_last_n: 0
213
+ sync_every_n_checkpoints: 1
214
+ resume_policy: local_latest
215
+ run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
216
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected
217
+ config_yaml: null
218
+ is_debug: false
219
+ version_id: '0.21'
flappy_fix_latency_2_200ep_full_tuning_corrected/config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint:
2
+ local:
3
+ keep_last_n: 1
4
+ save_best_model: false
5
+ save_pt_file: false
6
+ sync:
7
+ enabled: false
8
+ keep_last_n: 0
9
+ repo_id: null
10
+ datasets:
11
+ vla_data:
12
+ data_mix: flappy_train__bridge
13
+ dataset_py: lerobot_datasets
14
+ eval_data_mix: flappy_train__bridge__val
15
+ eval_num_workers: 8
16
+ include_state: true
17
+ latency_curriculum:
18
+ enabled: false
19
+ obs_image_size: null
20
+ per_device_batch_size: 32
21
+ persistent_workers: true
22
+ pin_memory: true
23
+ prefetch_factor: 4
24
+ framework:
25
+ action_model:
26
+ action_dim: 7
27
+ action_env_dim: 2
28
+ action_hidden_dim: 2560
29
+ action_horizon: 1
30
+ action_model_type: MLP
31
+ loss_type: discrete_ce
32
+ state_dim: 7
33
+ name: QwenOFT
34
+ qwenvl:
35
+ attn_implementation: flash_attention_2
36
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
37
+ enable_gradient_checkpointing: true
38
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected
39
+ rl_games:
40
+ env_eval:
41
+ distributed_mode: none
42
+ enabled: true
43
+ fixed_episode_seeds: true
44
+ frameskip: 1
45
+ image_size: 224
46
+ latency:
47
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
48
+ latency_seed_stride: 0
49
+ mid_train:
50
+ enabled: true
51
+ interval_steps: 250
52
+ latencies:
53
+ - 2
54
+ max_steps_per_episode: 3600
55
+ num_episodes: 5
56
+ seed: 42
57
+ task_description: ''
58
+ task_seed_stride: 0
59
+ vectorized:
60
+ enabled: false
61
+ model_alias: openvla
62
+ task: flappy
63
+ run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
64
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
65
+ seed: 42
66
+ trainer:
67
+ distributed_backend: none
68
+ eval_action_classification: true
69
+ eval_action_classification_interval: null
70
+ eval_interval: 100
71
+ eval_num_batches: 100
72
+ freeze_llm_bottom_ratio: 0.0
73
+ freeze_modules: ''
74
+ gradient_accumulation_steps: 4
75
+ gradient_clipping: 1.0
76
+ is_resume: false
77
+ learning_rate:
78
+ action_model: 0.0001
79
+ base: 2.0e-05
80
+ qwen_vl_interface: 1.0e-05
81
+ logging_frequency: 1
82
+ lr_scheduler_type: cosine_with_min_lr
83
+ max_train_steps: 5000
84
+ num_warmup_steps: 100
85
+ optimizer:
86
+ betas:
87
+ - 0.9
88
+ - 0.95
89
+ eps: 1.0e-08
90
+ fused: true
91
+ weight_decay: 1.0e-08
92
+ per_latency_eval_num_batches: null
93
+ pretrained_checkpoint: null
94
+ reload_modules: null
95
+ save_interval: 500
96
+ scheduler_specific_kwargs:
97
+ min_lr: 1.0e-06
98
+ wandb_entity: saberrr-zju
99
+ wandb_project: starVLA_rl_games
flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.6028500199317932,
6
+ 0.3971499800682068,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "std": [
14
+ 0.4890792667865753,
15
+ 0.4890792667865753,
16
+ 0.0,
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0
21
+ ],
22
+ "max": [
23
+ 1.0,
24
+ 1.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0,
29
+ 0.0
30
+ ],
31
+ "min": [
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.0,
53
+ 0.0,
54
+ 0.0,
55
+ 0.0,
56
+ 0.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ true
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 330734,
125
+ "num_trajectories": 180
126
+ }
127
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics_eval.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.7959861159324646,
6
+ 0.2040138840675354,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "std": [
14
+ 0.4030573070049286,
15
+ 0.4030573070049286,
16
+ 0.0,
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0
21
+ ],
22
+ "max": [
23
+ 1.0,
24
+ 1.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0,
29
+ 0.0
30
+ ],
31
+ "min": [
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.0,
53
+ 0.0,
54
+ 0.0,
55
+ 0.0,
56
+ 0.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ true
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 72000,
125
+ "num_trajectories": 20
126
+ }
127
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 79.39999999999868,
7
+ "mean_length": 659.2,
8
+ "std_reward": 62.80047770518764,
9
+ "std_length": 506.935656666603,
10
+ "episode_rewards": [
11
+ 8.399999999999986,
12
+ 31.900000000000162,
13
+ 171.8999999999956,
14
+ 50.40000000000037,
15
+ 134.39999999999728
16
+ ],
17
+ "episode_lengths": [
18
+ 86,
19
+ 276,
20
+ 1406,
21
+ 425,
22
+ 1103
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 2647,
26
+ "1": 649
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 79.39999999999868,
55
+ "mean_length": 659.2,
56
+ "std_reward": 62.80047770518764,
57
+ "std_length": 506.935656666603,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 79.39999999999868,
60
+ "macro_mean_length": 659.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 335.94000000001523,
7
+ "mean_length": 2724.8,
8
+ "std_reward": 154.00615052654007,
9
+ "std_length": 1240.415317544894,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 295.49999999999903,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 2399,
20
+ 3600,
21
+ 3600,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 10894,
26
+ "1": 2730
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 335.94000000001523,
55
+ "mean_length": 2724.8,
56
+ "std_reward": 154.00615052654007,
57
+ "std_length": 1240.415317544894,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 335.94000000001523,
60
+ "macro_mean_length": 2724.8,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 292.5400000000077,
7
+ "mean_length": 2375.8,
8
+ "std_reward": 145.61193082987145,
9
+ "std_length": 1173.9889948376858,
10
+ "episode_rewards": [
11
+ 52.70000000000039,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 269.8999999999946,
15
+ 250.89999999999208
16
+ ],
17
+ "episode_lengths": [
18
+ 439,
19
+ 3600,
20
+ 3600,
21
+ 2197,
22
+ 2043
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 9500,
26
+ "1": 2379
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 292.5400000000077,
55
+ "mean_length": 2375.8,
56
+ "std_reward": 145.61193082987145,
57
+ "std_length": 1173.9889948376858,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 292.5400000000077,
60
+ "macro_mean_length": 2375.8,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 390.9600000000196,
7
+ "mean_length": 3168.4,
8
+ "std_reward": 107.2800000000121,
9
+ "std_length": 863.2,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 176.3999999999954,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 1442,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 12633,
26
+ "1": 3209
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 390.9600000000196,
55
+ "mean_length": 3168.4,
56
+ "std_reward": 107.2800000000121,
57
+ "std_length": 863.2,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 390.9600000000196,
60
+ "macro_mean_length": 3168.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 282.3200000000154,
7
+ "mean_length": 2293.0,
8
+ "std_reward": 199.98049304870963,
9
+ "std_length": 1610.6818431956076,
10
+ "episode_rewards": [
11
+ 73.89999999999999,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 3.8999999999999986,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 615,
19
+ 3600,
20
+ 3600,
21
+ 50,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 9138,
26
+ "1": 2327
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 282.3200000000154,
55
+ "mean_length": 2293.0,
56
+ "std_reward": 199.98049304870963,
57
+ "std_length": 1610.6818431956076,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 282.3200000000154,
60
+ "macro_mean_length": 2293.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 444.60000000002566,
7
+ "mean_length": 3600.0,
8
+ "std_reward": 0.0,
9
+ "std_length": 0.0,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 14346,
26
+ "1": 3654
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 444.60000000002566,
55
+ "mean_length": 3600.0,
56
+ "std_reward": 0.0,
57
+ "std_length": 0.0,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 444.60000000002566,
60
+ "macro_mean_length": 3600.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 65.11999999999905,
7
+ "mean_length": 543.4,
8
+ "std_reward": 57.274371231815906,
9
+ "std_length": 461.5935874771225,
10
+ "episode_rewards": [
11
+ 3.8999999999999986,
12
+ 132.49999999999739,
13
+ 54.90000000000042,
14
+ 3.8999999999999986,
15
+ 130.39999999999745
16
+ ],
17
+ "episode_lengths": [
18
+ 50,
19
+ 1084,
20
+ 461,
21
+ 50,
22
+ 1072
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 2142,
26
+ "1": 575
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 65.11999999999905,
55
+ "mean_length": 543.4,
56
+ "std_reward": 57.274371231815906,
57
+ "std_length": 461.5935874771225,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 65.11999999999905,
60
+ "macro_mean_length": 543.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 370.4600000000205,
7
+ "mean_length": 3003.0,
8
+ "std_reward": 148.28000000001026,
9
+ "std_length": 1194.0,
10
+ "episode_rewards": [
11
+ 73.89999999999999,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 615,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 11891,
26
+ "1": 3124
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 370.4600000000205,
55
+ "mean_length": 3003.0,
56
+ "std_reward": 148.28000000001026,
57
+ "std_length": 1194.0,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 370.4600000000205,
60
+ "macro_mean_length": 3003.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 218.3800000000104,
7
+ "mean_length": 1778.2,
8
+ "std_reward": 184.83810646077453,
9
+ "std_length": 1488.5559982748382,
10
+ "episode_rewards": [
11
+ 73.89999999999999,
12
+ 444.60000000002566,
13
+ 54.90000000000042,
14
+ 444.60000000002566,
15
+ 73.89999999999999
16
+ ],
17
+ "episode_lengths": [
18
+ 615,
19
+ 3600,
20
+ 461,
21
+ 3600,
22
+ 615
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 7081,
26
+ "1": 1810
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 218.3800000000104,
55
+ "mean_length": 1778.2,
56
+ "std_reward": 184.83810646077453,
57
+ "std_length": 1488.5559982748382,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 218.3800000000104,
60
+ "macro_mean_length": 1778.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 317.72000000001435,
7
+ "mean_length": 2578.6,
8
+ "std_reward": 162.01944821534275,
9
+ "std_length": 1304.6114517357264,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 54.90000000000042,
14
+ 199.89999999999435,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 461,
21
+ 1632,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 10219,
26
+ "1": 2674
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 317.72000000001435,
55
+ "mean_length": 2578.6,
56
+ "std_reward": 162.01944821534275,
57
+ "std_length": 1304.6114517357264,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 317.72000000001435,
60
+ "macro_mean_length": 2578.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 365.76000000002057,
7
+ "mean_length": 2965.0,
8
+ "std_reward": 157.6800000000101,
9
+ "std_length": 1270.0,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 11775,
26
+ "1": 3050
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 365.76000000002057,
55
+ "mean_length": 2965.0,
56
+ "std_reward": 157.6800000000101,
57
+ "std_length": 1270.0,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 365.76000000002057,
60
+ "macro_mean_length": 2965.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 408.66000000001924,
7
+ "mean_length": 3311.2,
8
+ "std_reward": 71.8800000000128,
9
+ "std_length": 577.6,
10
+ "episode_rewards": [
11
+ 264.89999999999367,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 2156,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 13206,
26
+ "1": 3350
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 408.66000000001924,
55
+ "mean_length": 3311.2,
56
+ "std_reward": 71.8800000000128,
57
+ "std_length": 577.6,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 408.66000000001924,
60
+ "macro_mean_length": 3311.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 442.26000000002523,
7
+ "mean_length": 3582.4,
8
+ "std_reward": 4.680000000000791,
9
+ "std_length": 35.2,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 432.9000000000237
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3512
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 14270,
26
+ "1": 3642
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 442.26000000002523,
55
+ "mean_length": 3582.4,
56
+ "std_reward": 4.680000000000791,
57
+ "std_length": 35.2,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 442.26000000002523,
60
+ "macro_mean_length": 3582.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 442.26000000002523,
7
+ "mean_length": 3582.4,
8
+ "std_reward": 4.680000000000791,
9
+ "std_length": 35.2,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 432.9000000000237
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3512
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 14256,
26
+ "1": 3656
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 442.26000000002523,
55
+ "mean_length": 3582.4,
56
+ "std_reward": 4.680000000000791,
57
+ "std_length": 35.2,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 442.26000000002523,
60
+ "macro_mean_length": 3582.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 444.60000000002566,
7
+ "mean_length": 3600.0,
8
+ "std_reward": 0.0,
9
+ "std_length": 0.0,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 14338,
26
+ "1": 3662
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 444.60000000002566,
55
+ "mean_length": 3600.0,
56
+ "std_reward": 0.0,
57
+ "std_length": 0.0,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 444.60000000002566,
60
+ "macro_mean_length": 3600.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 365.76000000002057,
7
+ "mean_length": 2965.0,
8
+ "std_reward": 157.6800000000101,
9
+ "std_length": 1270.0,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 11866,
26
+ "1": 2959
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 365.76000000002057,
55
+ "mean_length": 2965.0,
56
+ "std_reward": 157.6800000000101,
57
+ "std_length": 1270.0,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 365.76000000002057,
60
+ "macro_mean_length": 2965.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 444.60000000002566,
7
+ "mean_length": 3600.0,
8
+ "std_reward": 0.0,
9
+ "std_length": 0.0,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 14315,
26
+ "1": 3685
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 444.60000000002566,
55
+ "mean_length": 3600.0,
56
+ "std_reward": 0.0,
57
+ "std_length": 0.0,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 444.60000000002566,
60
+ "macro_mean_length": 3600.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 176.28000000000358,
7
+ "mean_length": 1440.4,
8
+ "std_reward": 163.1896246702057,
9
+ "std_length": 1314.9364395285422,
10
+ "episode_rewards": [
11
+ 270.7999999999948,
12
+ 18.200000000000014,
13
+ 17.90000000000001,
14
+ 129.89999999999748,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 2206,
19
+ 166,
20
+ 163,
21
+ 1067,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 5754,
26
+ "1": 1448
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 176.28000000000358,
55
+ "mean_length": 1440.4,
56
+ "std_reward": 163.1896246702057,
57
+ "std_length": 1314.9364395285422,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 176.28000000000358,
60
+ "macro_mean_length": 1440.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_5000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 442.26000000002523,
7
+ "mean_length": 3582.4,
8
+ "std_reward": 4.680000000000791,
9
+ "std_length": 35.2,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 432.9000000000237
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 3512
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 14239,
26
+ "1": 3673
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 5000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 442.26000000002523,
55
+ "mean_length": 3582.4,
56
+ "std_reward": 4.680000000000791,
57
+ "std_length": 35.2,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 442.26000000002523,
60
+ "macro_mean_length": 3582.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 87.69999999999818,
7
+ "mean_length": 726.0,
8
+ "std_reward": 100.46123630535064,
9
+ "std_length": 810.7465695271242,
10
+ "episode_rewards": [
11
+ 264.89999999999367,
12
+ 22.40000000000006,
13
+ 12.899999999999974,
14
+ 3.8999999999999986,
15
+ 134.39999999999728
16
+ ],
17
+ "episode_lengths": [
18
+ 2156,
19
+ 199,
20
+ 122,
21
+ 50,
22
+ 1103
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 2897,
26
+ "1": 733
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 87.69999999999818,
55
+ "mean_length": 726.0,
56
+ "std_reward": 100.46123630535064,
57
+ "std_length": 810.7465695271242,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 87.69999999999818,
60
+ "macro_mean_length": 726.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/config.yaml ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ framework:
2
+ qwenvl:
3
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
4
+ attn_implementation: flash_attention_2
5
+ enable_gradient_checkpointing: true
6
+ action_model:
7
+ state_dim: 7
8
+ loss_type: discrete_ce
9
+ action_horizon: 1
10
+ future_action_window_size: 0
11
+ past_action_window_size: 0
12
+ action_dim: 7
13
+ action_env_dim: 2
14
+ name: QwenOFT
15
+ datasets:
16
+ vla_data:
17
+ dataset_py: lerobot_datasets
18
+ include_state: true
19
+ data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
20
+ data_mix: flappy_train__bridge
21
+ eval_data_mix: flappy_train__bridge__val
22
+ custom_mixtures_path: null
23
+ action_type: discrete
24
+ sequential_step_sampling: false
25
+ eval_sequential_step_sampling: null
26
+ num_workers: 8
27
+ eval_num_workers: 8
28
+ prefetch_factor: 4
29
+ persistent_workers: true
30
+ pin_memory: true
31
+ shuffle: true
32
+ action_balance:
33
+ enabled: false
34
+ strategy: balanced_epoch
35
+ action_key: action_id
36
+ target_flap_fraction: 0.3
37
+ noop_id: 0
38
+ flap_id: 1
39
+ latency_curriculum:
40
+ enabled: false
41
+ strategy: exclusive
42
+ latencies: null
43
+ phase_steps: null
44
+ per_device_batch_size: 32
45
+ load_all_data_for_training: true
46
+ num_obs_frames: 1
47
+ image_mode: single
48
+ stitch_grid:
49
+ - 2
50
+ - 2
51
+ obs_image_size: null
52
+ video_backend: torchvision_av
53
+ dataset:
54
+ source_hf: ${dataset.single_source_hf}
55
+ config_name: null
56
+ source_subdir: null
57
+ converted_name: ${dataset.single_converted_name}
58
+ single_source_hf: ''
59
+ mixed_source_hf: ''
60
+ single_converted_name: flappy_train
61
+ mixed_converted_name: flappy_mixed_latency_train
62
+ single_latency_filter: null
63
+ mixed_latency_filter: null
64
+ force_download: false
65
+ setup_force: false
66
+ skip_verification: false
67
+ verify_rows: 200
68
+ max_episodes: null
69
+ episodes_per_latency: null
70
+ latency_filter: ${dataset.single_latency_filter}
71
+ debug_subset:
72
+ enabled: false
73
+ max_episodes: 5
74
+ suffix: debug
75
+ base_model:
76
+ repo_id: ${bridge_base_model.repo_id.${model}}
77
+ initialization:
78
+ checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
79
+ checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
80
+ checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
81
+ trainer:
82
+ max_train_steps: 5000
83
+ num_warmup_steps: 100
84
+ save_interval: 500
85
+ eval_interval: 100
86
+ eval_num_batches: 100
87
+ per_latency_eval_num_batches: null
88
+ eval_action_classification: true
89
+ eval_action_classification_interval: null
90
+ cc_f1_tolerance: 1
91
+ learning_rate:
92
+ base: 2.0e-05
93
+ qwen_vl_interface: 1.0e-05
94
+ action_model: 0.0001
95
+ lr_scheduler_type: cosine_with_min_lr
96
+ scheduler_specific_kwargs:
97
+ min_lr: 1.0e-06
98
+ freeze_modules: ''
99
+ freeze_llm_bottom_ratio: 0.0
100
+ loss_scale:
101
+ vla: 1.0
102
+ vlm: 0.1
103
+ max_grad_norm: 1.0
104
+ weight_decay: 0.0
105
+ logging_frequency: 1
106
+ gradient_clipping: 1.0
107
+ gradient_accumulation_steps: 4
108
+ distributed_backend: none
109
+ is_resume: false
110
+ pretrained_checkpoint: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
111
+ resume_step: 0
112
+ reload_modules: null
113
+ optimizer:
114
+ name: AdamW
115
+ betas:
116
+ - 0.9
117
+ - 0.95
118
+ eps: 1.0e-08
119
+ weight_decay: 1.0e-08
120
+ fused: true
121
+ save_format: pt
122
+ workspace_dir: WORKSPACE_DIR
123
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
124
+ seed: 42
125
+ wandb_entity: saberrr-zju
126
+ wandb_project: starVLA_rl_games
127
+ auth:
128
+ env_file: null
129
+ hf_token_env: HF_TOKEN
130
+ wandb_api_key_env: WANDB_API_KEY
131
+ paths:
132
+ run_root_dir: results/Checkpoints
133
+ dataset_local_dir: playground/Datasets/rl_games
134
+ dataset_cache_dir: null
135
+ base_model_dir: ${bridge_base_model.local_dir.${model}}
136
+ accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
137
+ launch:
138
+ use_accelerate: true
139
+ gpus: null
140
+ num_processes: 1
141
+ dry_run: false
142
+ conda:
143
+ enabled: true
144
+ env_name: null
145
+ rl_games:
146
+ model_alias: openvla
147
+ env_eval:
148
+ image_size: 224
149
+ frameskip: 1
150
+ seed: 42
151
+ fixed_episode_seeds: true
152
+ latency_seed_stride: 0
153
+ task_seed_stride: 0
154
+ task_description: ''
155
+ enabled: true
156
+ distributed_mode: none
157
+ vectorized:
158
+ enabled: false
159
+ batch_size: 1
160
+ latency:
161
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
162
+ mode: single
163
+ values:
164
+ - 0
165
+ mid_train:
166
+ enabled: true
167
+ interval_steps: 250
168
+ latencies:
169
+ - 2
170
+ num_episodes: 5
171
+ max_steps_per_episode: 3600
172
+ post_train:
173
+ enabled: false
174
+ latencies:
175
+ - 0
176
+ - 1
177
+ - 2
178
+ - 3
179
+ - 4
180
+ num_episodes: 5
181
+ max_steps_per_episode: 3600
182
+ task: flappy
183
+ initialization_mode: bridge
184
+ action_carrier: bridge
185
+ model: openvla
186
+ env: flappy
187
+ init: bridge
188
+ bridge_base_model:
189
+ repo_id:
190
+ openvla: Qwen/Qwen3-VL-4B-Instruct
191
+ pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
192
+ pi05: Qwen/Qwen3-VL-4B-Instruct
193
+ gr00t: Qwen/Qwen3-VL-4B-Instruct
194
+ local_dir:
195
+ openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
196
+ pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
197
+ pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
198
+ gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
199
+ mode: single
200
+ checkpoint:
201
+ load: auto
202
+ hf_repo_id: null
203
+ save_best_model: false
204
+ save_pt_file: false
205
+ local:
206
+ keep_last_n: 1
207
+ sync:
208
+ enabled: false
209
+ repo_id: null
210
+ keep_last_n: 0
211
+ sync_every_n_checkpoints: 1
212
+ resume_policy: local_latest
213
+ run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
214
+ output_dir: null
215
+ config_yaml: null
216
+ is_debug: false
217
+ version_id: '0.21'
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/hydra.yaml ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${run_root_dir}/${run_id}/hydra
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - model=openvla
116
+ - env=flappy
117
+ - init=bridge
118
+ - mode=single
119
+ - run_id=flappy_fix_latency_2_200ep_full_tuning_corrected
120
+ - run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
121
+ - seed=42
122
+ - wandb_entity=saberrr-zju
123
+ - wandb_project=starVLA_rl_games
124
+ - rl_games.env_eval.enabled=true
125
+ - checkpoint.sync.enabled=false
126
+ - checkpoint.sync.keep_last_n=0
127
+ - checkpoint.local.keep_last_n=1
128
+ - checkpoint.save_best_model=false
129
+ - checkpoint.save_pt_file=false
130
+ - trainer.is_resume=false
131
+ - trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
132
+ - trainer.resume_step=0
133
+ - trainer.max_train_steps=5000
134
+ - trainer.num_warmup_steps=100
135
+ - trainer.save_interval=500
136
+ - trainer.eval_interval=100
137
+ - trainer.eval_num_batches=100
138
+ - trainer.eval_action_classification=true
139
+ - trainer.logging_frequency=1
140
+ - trainer.gradient_accumulation_steps=4
141
+ - trainer.distributed_backend=none
142
+ - trainer.learning_rate.base=2e-05
143
+ - trainer.learning_rate.qwen_vl_interface=1e-05
144
+ - trainer.learning_rate.action_model=0.0001
145
+ - trainer.lr_scheduler_type=cosine_with_min_lr
146
+ - trainer.scheduler_specific_kwargs.min_lr=1e-06
147
+ - trainer.freeze_llm_bottom_ratio=0.0
148
+ - trainer.loss_scale.vla=1.0
149
+ - trainer.loss_scale.vlm=0.1
150
+ - trainer.max_grad_norm=1.0
151
+ - trainer.weight_decay=0.0
152
+ - trainer.gradient_clipping=1.0
153
+ - trainer.optimizer.name=AdamW
154
+ - trainer.optimizer.betas=[0.9,0.95]
155
+ - trainer.optimizer.eps=1e-08
156
+ - trainer.optimizer.weight_decay=1e-08
157
+ - trainer.optimizer.fused=true
158
+ - trainer.save_format=pt
159
+ - framework.name=QwenOFT
160
+ - framework.qwenvl.attn_implementation=flash_attention_2
161
+ - framework.qwenvl.enable_gradient_checkpointing=true
162
+ - framework.action_model.action_dim=7
163
+ - framework.action_model.action_env_dim=2
164
+ - framework.action_model.state_dim=7
165
+ - framework.action_model.loss_type=discrete_ce
166
+ - framework.action_model.action_horizon=1
167
+ - framework.action_model.future_action_window_size=0
168
+ - framework.action_model.past_action_window_size=0
169
+ - datasets.vla_data.include_state=true
170
+ - datasets.vla_data.action_type=discrete
171
+ - datasets.vla_data.sequential_step_sampling=false
172
+ - datasets.vla_data.shuffle=true
173
+ - datasets.vla_data.num_workers=8
174
+ - datasets.vla_data.eval_num_workers=8
175
+ - datasets.vla_data.prefetch_factor=4
176
+ - datasets.vla_data.persistent_workers=true
177
+ - datasets.vla_data.pin_memory=true
178
+ - datasets.vla_data.action_balance.enabled=false
179
+ - datasets.vla_data.action_balance.strategy=balanced_epoch
180
+ - datasets.vla_data.action_balance.action_key=action_id
181
+ - datasets.vla_data.action_balance.target_flap_fraction=0.3
182
+ - datasets.vla_data.action_balance.noop_id=0
183
+ - datasets.vla_data.action_balance.flap_id=1
184
+ - datasets.vla_data.latency_curriculum.enabled=false
185
+ - datasets.vla_data.latency_curriculum.strategy=exclusive
186
+ - datasets.vla_data.per_device_batch_size=32
187
+ - datasets.vla_data.num_workers=8
188
+ - datasets.vla_data.eval_num_workers=8
189
+ - datasets.vla_data.prefetch_factor=4
190
+ - datasets.vla_data.persistent_workers=true
191
+ - datasets.vla_data.pin_memory=true
192
+ - datasets.vla_data.load_all_data_for_training=true
193
+ - datasets.vla_data.video_backend=torchvision_av
194
+ - datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
195
+ - datasets.vla_data.data_mix=flappy_train__bridge
196
+ - datasets.vla_data.eval_data_mix=flappy_train__bridge__val
197
+ - framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
198
+ - rl_games.task=flappy
199
+ - rl_games.model_alias=openvla
200
+ - rl_games.initialization_mode=bridge
201
+ - rl_games.action_carrier=bridge
202
+ - rl_games.env_eval.distributed_mode=none
203
+ - rl_games.env_eval.latency.mode=single
204
+ - rl_games.env_eval.frameskip=1
205
+ - rl_games.env_eval.image_size=224
206
+ - rl_games.env_eval.seed=42
207
+ - rl_games.env_eval.fixed_episode_seeds=true
208
+ - rl_games.env_eval.latency_seed_stride=0
209
+ - rl_games.env_eval.task_seed_stride=0
210
+ - rl_games.env_eval.latency.values=[0]
211
+ - rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
212
+ - rl_games.env_eval.mid_train.enabled=true
213
+ - rl_games.env_eval.mid_train.interval_steps=250
214
+ - rl_games.env_eval.mid_train.num_episodes=5
215
+ - rl_games.env_eval.mid_train.max_steps_per_episode=3600
216
+ - rl_games.env_eval.mid_train.latencies=[2]
217
+ - rl_games.env_eval.post_train.enabled=false
218
+ - rl_games.env_eval.post_train.num_episodes=5
219
+ - rl_games.env_eval.post_train.max_steps_per_episode=3600
220
+ - rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
221
+ job:
222
+ name: train_starvla_hydra
223
+ chdir: false
224
+ override_dirname: checkpoint.local.keep_last_n=1,checkpoint.save_best_model=false,checkpoint.save_pt_file=false,checkpoint.sync.enabled=false,checkpoint.sync.keep_last_n=0,datasets.vla_data.action_balance.action_key=action_id,datasets.vla_data.action_balance.enabled=false,datasets.vla_data.action_balance.flap_id=1,datasets.vla_data.action_balance.noop_id=0,datasets.vla_data.action_balance.strategy=balanced_epoch,datasets.vla_data.action_balance.target_flap_fraction=0.3,datasets.vla_data.action_type=discrete,datasets.vla_data.data_mix=flappy_train__bridge,datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep,datasets.vla_data.eval_data_mix=flappy_train__bridge__val,datasets.vla_data.eval_num_workers=8,datasets.vla_data.eval_num_workers=8,datasets.vla_data.include_state=true,datasets.vla_data.latency_curriculum.enabled=false,datasets.vla_data.latency_curriculum.strategy=exclusive,datasets.vla_data.load_all_data_for_training=true,datasets.vla_data.num_workers=8,datasets.vla_data.num_workers=8,datasets.vla_data.per_device_batch_size=32,datasets.vla_data.persistent_workers=true,datasets.vla_data.persistent_workers=true,datasets.vla_data.pin_memory=true,datasets.vla_data.pin_memory=true,datasets.vla_data.prefetch_factor=4,datasets.vla_data.prefetch_factor=4,datasets.vla_data.sequential_step_sampling=false,datasets.vla_data.shuffle=true,datasets.vla_data.video_backend=torchvision_av,env=flappy,framework.action_model.action_dim=7,framework.action_model.action_env_dim=2,framework.action_model.action_horizon=1,framework.action_model.future_action_window_size=0,framework.action_model.loss_type=discrete_ce,framework.action_model.past_action_window_size=0,framework.action_model.state_dim=7,framework.name=QwenOFT,framework.qwenvl.attn_implementation=flash_attention_2,framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct,framework.qwenvl.enable_gradient_checkpointing=true,init=bridge,mode=single,model=openvla,rl_games.action_carrier=bridge,rl_games.env_eval.distributed_mode=none,rl_games.env_eval.enabled=true,rl_games.env_eval.fixed_episode_seeds=true,rl_games.env_eval.frameskip=1,rl_games.env_eval.image_size=224,rl_games.env_eval.latency.mode=single,rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json,rl_games.env_eval.latency.values=[0],rl_games.env_eval.latency_seed_stride=0,rl_games.env_eval.mid_train.enabled=true,rl_games.env_eval.mid_train.interval_steps=250,rl_games.env_eval.mid_train.latencies=[2],rl_games.env_eval.mid_train.max_steps_per_episode=3600,rl_games.env_eval.mid_train.num_episodes=5,rl_games.env_eval.post_train.enabled=false,rl_games.env_eval.post_train.latencies=[0,1,2,3,4],rl_games.env_eval.post_train.max_steps_per_episode=3600,rl_games.env_eval.post_train.num_episodes=5,rl_games.env_eval.seed=42,rl_games.env_eval.task_seed_stride=0,rl_games.initialization_mode=bridge,rl_games.model_alias=openvla,rl_games.task=flappy,run_id=flappy_fix_latency_2_200ep_full_tuning_corrected,run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints,seed=42,trainer.distributed_backend=none,trainer.eval_action_classification=true,trainer.eval_interval=100,trainer.eval_num_batches=100,trainer.freeze_llm_bottom_ratio=0.0,trainer.gradient_accumulation_steps=4,trainer.gradient_clipping=1.0,trainer.is_resume=false,trainer.learning_rate.action_model=0.0001,trainer.learning_rate.base=2e-05,trainer.learning_rate.qwen_vl_interface=1e-05,trainer.logging_frequency=1,trainer.loss_scale.vla=1.0,trainer.loss_scale.vlm=0.1,trainer.lr_scheduler_type=cosine_with_min_lr,trainer.max_grad_norm=1.0,trainer.max_train_steps=5000,trainer.num_warmup_steps=100,trainer.optimizer.betas=[0.9,0.95],trainer.optimizer.eps=1e-08,trainer.optimizer.fused=true,trainer.optimizer.name=AdamW,trainer.optimizer.weight_decay=1e-08,trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt,trainer.resume_step=0,trainer.save_format=pt,trainer.save_interval=500,trainer.scheduler_specific_kwargs.min_lr=1e-06,trainer.weight_decay=0.0,wandb_entity=saberrr-zju,wandb_project=starVLA_rl_games
225
+ id: ???
226
+ num: ???
227
+ config_name: train
228
+ env_set: {}
229
+ env_copy: []
230
+ config:
231
+ override_dirname:
232
+ kv_sep: '='
233
+ item_sep: ','
234
+ exclude_keys: []
235
+ runtime:
236
+ version: 1.3.3
237
+ version_base: '1.1'
238
+ cwd: /workspace/latency-sensitive-bench/starVLA
239
+ config_sources:
240
+ - path: hydra.conf
241
+ schema: pkg
242
+ provider: hydra
243
+ - path: /workspace/latency-sensitive-bench/starVLA/examples/rl_games/config
244
+ schema: file
245
+ provider: main
246
+ - path: ''
247
+ schema: structured
248
+ provider: schema
249
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/hydra
250
+ choices:
251
+ cross_task_setup: null
252
+ checkpoint: default
253
+ mode: single
254
+ init: bridge
255
+ env: flappy
256
+ model: openvla
257
+ hydra/env: default
258
+ hydra/callbacks: null
259
+ hydra/job_logging: default
260
+ hydra/hydra_logging: default
261
+ hydra/hydra_help: default
262
+ hydra/help: default
263
+ hydra/sweeper: basic
264
+ hydra/launcher: basic
265
+ hydra/output: default
266
+ verbose: false
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/overrides.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - model=openvla
2
+ - env=flappy
3
+ - init=bridge
4
+ - mode=single
5
+ - run_id=flappy_fix_latency_2_200ep_full_tuning_corrected
6
+ - run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
7
+ - seed=42
8
+ - wandb_entity=saberrr-zju
9
+ - wandb_project=starVLA_rl_games
10
+ - rl_games.env_eval.enabled=true
11
+ - checkpoint.sync.enabled=false
12
+ - checkpoint.sync.keep_last_n=0
13
+ - checkpoint.local.keep_last_n=1
14
+ - checkpoint.save_best_model=false
15
+ - checkpoint.save_pt_file=false
16
+ - trainer.is_resume=false
17
+ - trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
18
+ - trainer.resume_step=0
19
+ - trainer.max_train_steps=5000
20
+ - trainer.num_warmup_steps=100
21
+ - trainer.save_interval=500
22
+ - trainer.eval_interval=100
23
+ - trainer.eval_num_batches=100
24
+ - trainer.eval_action_classification=true
25
+ - trainer.logging_frequency=1
26
+ - trainer.gradient_accumulation_steps=4
27
+ - trainer.distributed_backend=none
28
+ - trainer.learning_rate.base=2e-05
29
+ - trainer.learning_rate.qwen_vl_interface=1e-05
30
+ - trainer.learning_rate.action_model=0.0001
31
+ - trainer.lr_scheduler_type=cosine_with_min_lr
32
+ - trainer.scheduler_specific_kwargs.min_lr=1e-06
33
+ - trainer.freeze_llm_bottom_ratio=0.0
34
+ - trainer.loss_scale.vla=1.0
35
+ - trainer.loss_scale.vlm=0.1
36
+ - trainer.max_grad_norm=1.0
37
+ - trainer.weight_decay=0.0
38
+ - trainer.gradient_clipping=1.0
39
+ - trainer.optimizer.name=AdamW
40
+ - trainer.optimizer.betas=[0.9,0.95]
41
+ - trainer.optimizer.eps=1e-08
42
+ - trainer.optimizer.weight_decay=1e-08
43
+ - trainer.optimizer.fused=true
44
+ - trainer.save_format=pt
45
+ - framework.name=QwenOFT
46
+ - framework.qwenvl.attn_implementation=flash_attention_2
47
+ - framework.qwenvl.enable_gradient_checkpointing=true
48
+ - framework.action_model.action_dim=7
49
+ - framework.action_model.action_env_dim=2
50
+ - framework.action_model.state_dim=7
51
+ - framework.action_model.loss_type=discrete_ce
52
+ - framework.action_model.action_horizon=1
53
+ - framework.action_model.future_action_window_size=0
54
+ - framework.action_model.past_action_window_size=0
55
+ - datasets.vla_data.include_state=true
56
+ - datasets.vla_data.action_type=discrete
57
+ - datasets.vla_data.sequential_step_sampling=false
58
+ - datasets.vla_data.shuffle=true
59
+ - datasets.vla_data.num_workers=8
60
+ - datasets.vla_data.eval_num_workers=8
61
+ - datasets.vla_data.prefetch_factor=4
62
+ - datasets.vla_data.persistent_workers=true
63
+ - datasets.vla_data.pin_memory=true
64
+ - datasets.vla_data.action_balance.enabled=false
65
+ - datasets.vla_data.action_balance.strategy=balanced_epoch
66
+ - datasets.vla_data.action_balance.action_key=action_id
67
+ - datasets.vla_data.action_balance.target_flap_fraction=0.3
68
+ - datasets.vla_data.action_balance.noop_id=0
69
+ - datasets.vla_data.action_balance.flap_id=1
70
+ - datasets.vla_data.latency_curriculum.enabled=false
71
+ - datasets.vla_data.latency_curriculum.strategy=exclusive
72
+ - datasets.vla_data.per_device_batch_size=32
73
+ - datasets.vla_data.num_workers=8
74
+ - datasets.vla_data.eval_num_workers=8
75
+ - datasets.vla_data.prefetch_factor=4
76
+ - datasets.vla_data.persistent_workers=true
77
+ - datasets.vla_data.pin_memory=true
78
+ - datasets.vla_data.load_all_data_for_training=true
79
+ - datasets.vla_data.video_backend=torchvision_av
80
+ - datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
81
+ - datasets.vla_data.data_mix=flappy_train__bridge
82
+ - datasets.vla_data.eval_data_mix=flappy_train__bridge__val
83
+ - framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
84
+ - rl_games.task=flappy
85
+ - rl_games.model_alias=openvla
86
+ - rl_games.initialization_mode=bridge
87
+ - rl_games.action_carrier=bridge
88
+ - rl_games.env_eval.distributed_mode=none
89
+ - rl_games.env_eval.latency.mode=single
90
+ - rl_games.env_eval.frameskip=1
91
+ - rl_games.env_eval.image_size=224
92
+ - rl_games.env_eval.seed=42
93
+ - rl_games.env_eval.fixed_episode_seeds=true
94
+ - rl_games.env_eval.latency_seed_stride=0
95
+ - rl_games.env_eval.task_seed_stride=0
96
+ - rl_games.env_eval.latency.values=[0]
97
+ - rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
98
+ - rl_games.env_eval.mid_train.enabled=true
99
+ - rl_games.env_eval.mid_train.interval_steps=250
100
+ - rl_games.env_eval.mid_train.num_episodes=5
101
+ - rl_games.env_eval.mid_train.max_steps_per_episode=3600
102
+ - rl_games.env_eval.mid_train.latencies=[2]
103
+ - rl_games.env_eval.post_train.enabled=false
104
+ - rl_games.env_eval.post_train.num_episodes=5
105
+ - rl_games.env_eval.post_train.max_steps_per_episode=3600
106
+ - rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/train_starvla_hydra.log ADDED
The diff for this file is too large to render. See raw diff
 
flappy_fix_latency_2_200ep_full_tuning_corrected/summary.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"steps": 500}
2
+ {"steps": 1000}
3
+ {"steps": 1500}
4
+ {"steps": 2000}
5
+ {"steps": 2500}
6
+ {"steps": 3000}
7
+ {"steps": 3500}
8
+ {"steps": 4000}
9
+ {"steps": 4500}
10
+ {"steps": 5000}