Caesarrr commited on
Commit
e5ac608
·
verified ·
1 Parent(s): 75d9c38

Upload folder using huggingface_hub

Browse files
Files changed (33) hide show
  1. flappy_fix_latency_2_200ep_last_8_layers/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt +3 -0
  2. flappy_fix_latency_2_200ep_last_8_layers/checkpoints/steps_5000_state/model.safetensors +3 -0
  3. flappy_fix_latency_2_200ep_last_8_layers/checkpoints/steps_5000_state/optimizer.bin +3 -0
  4. flappy_fix_latency_2_200ep_last_8_layers/checkpoints/steps_5000_state/random_states_0.pkl +3 -0
  5. flappy_fix_latency_2_200ep_last_8_layers/config.full.yaml +217 -0
  6. flappy_fix_latency_2_200ep_last_8_layers/config.yaml +97 -0
  7. flappy_fix_latency_2_200ep_last_8_layers/dataset_statistics.json +127 -0
  8. flappy_fix_latency_2_200ep_last_8_layers/dataset_statistics_eval.json +127 -0
  9. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1000.json +63 -0
  10. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1250.json +63 -0
  11. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1500.json +63 -0
  12. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1750.json +63 -0
  13. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2000.json +63 -0
  14. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2250.json +63 -0
  15. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_250.json +62 -0
  16. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2500.json +63 -0
  17. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2750.json +63 -0
  18. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3000.json +63 -0
  19. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3250.json +63 -0
  20. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3500.json +63 -0
  21. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3750.json +63 -0
  22. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4000.json +63 -0
  23. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4250.json +63 -0
  24. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4500.json +63 -0
  25. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4750.json +63 -0
  26. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_500.json +63 -0
  27. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_5000.json +63 -0
  28. flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_750.json +63 -0
  29. flappy_fix_latency_2_200ep_last_8_layers/hydra/.hydra/config.yaml +215 -0
  30. flappy_fix_latency_2_200ep_last_8_layers/hydra/.hydra/hydra.yaml +259 -0
  31. flappy_fix_latency_2_200ep_last_8_layers/hydra/.hydra/overrides.yaml +99 -0
  32. flappy_fix_latency_2_200ep_last_8_layers/hydra/train_starvla_hydra.log +0 -0
  33. flappy_fix_latency_2_200ep_last_8_layers/summary.jsonl +10 -0
flappy_fix_latency_2_200ep_last_8_layers/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371cb744227687bb99bcad7f9ff2250cf06da75631359ad3eba4c6bc52570607
3
+ size 9785060316
flappy_fix_latency_2_200ep_last_8_layers/checkpoints/steps_5000_state/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afaa32fca8bd02a62fd251a0457e809370c64803c90166b0962241a2089519a
3
+ size 9138230516
flappy_fix_latency_2_200ep_last_8_layers/checkpoints/steps_5000_state/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c23c8ef7c487ca6e150753812b9d04e3c1d9479b1783685e53e4759b07f7c23
3
+ size 6972351998
flappy_fix_latency_2_200ep_last_8_layers/checkpoints/steps_5000_state/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3592d761ed9b400857ff294022f104313045e8a105e4209ffd44560bd94a7d7f
3
+ size 14821
flappy_fix_latency_2_200ep_last_8_layers/config.full.yaml ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ framework:
2
+ name: QwenOFT
3
+ qwenvl:
4
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
5
+ attn_implementation: flash_attention_2
6
+ enable_gradient_checkpointing: true
7
+ action_model:
8
+ action_model_type: MLP
9
+ action_dim: 7
10
+ action_hidden_dim: 2560
11
+ future_action_window_size: 0
12
+ past_action_window_size: 0
13
+ loss_type: discrete_ce
14
+ state_dim: 7
15
+ action_horizon: 1
16
+ action_env_dim: 2
17
+ datasets:
18
+ vla_data:
19
+ dataset_py: lerobot_datasets
20
+ include_state: true
21
+ data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
22
+ data_mix: flappy_train__bridge
23
+ eval_data_mix: flappy_train__bridge__val
24
+ custom_mixtures_path: null
25
+ action_type: discrete
26
+ sequential_step_sampling: false
27
+ eval_sequential_step_sampling: null
28
+ num_workers: 8
29
+ eval_num_workers: 8
30
+ prefetch_factor: 4
31
+ shuffle: true
32
+ action_balance:
33
+ enabled: false
34
+ strategy: balanced_epoch
35
+ action_key: action_id
36
+ target_flap_fraction: 0.3
37
+ noop_id: 0
38
+ flap_id: 1
39
+ latency_curriculum:
40
+ enabled: false
41
+ strategy: exclusive
42
+ latencies: null
43
+ phase_steps: null
44
+ per_device_batch_size: 64
45
+ load_all_data_for_training: true
46
+ num_obs_frames: 1
47
+ image_mode: single
48
+ stitch_grid:
49
+ - 2
50
+ - 2
51
+ obs_image_size: null
52
+ video_backend: torchvision_av
53
+ dataset:
54
+ source_hf: ''
55
+ config_name: null
56
+ source_subdir: null
57
+ converted_name: flappy_train
58
+ single_source_hf: ''
59
+ mixed_source_hf: ''
60
+ single_converted_name: flappy_train
61
+ mixed_converted_name: flappy_mixed_latency_train
62
+ single_latency_filter: null
63
+ mixed_latency_filter: null
64
+ force_download: false
65
+ setup_force: false
66
+ skip_verification: false
67
+ verify_rows: 200
68
+ max_episodes: null
69
+ episodes_per_latency: null
70
+ latency_filter: null
71
+ debug_subset:
72
+ enabled: false
73
+ max_episodes: 5
74
+ suffix: debug
75
+ base_model:
76
+ repo_id: Qwen/Qwen3-VL-4B-Instruct
77
+ initialization:
78
+ checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
79
+ checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
80
+ checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
81
+ trainer:
82
+ max_train_steps: 5000
83
+ num_warmup_steps: 100
84
+ save_interval: 500
85
+ eval_interval: 100
86
+ eval_num_batches: 100
87
+ per_latency_eval_num_batches: null
88
+ eval_action_classification: true
89
+ eval_action_classification_interval: null
90
+ cc_f1_tolerance: 1
91
+ learning_rate:
92
+ base: 2.0e-05
93
+ qwen_vl_interface: 1.0e-05
94
+ action_model: 0.0001
95
+ lr_scheduler_type: cosine_with_min_lr
96
+ scheduler_specific_kwargs:
97
+ min_lr: 1.0e-06
98
+ freeze_modules: ''
99
+ freeze_llm_bottom_ratio: 0.7778
100
+ loss_scale:
101
+ vla: 1.0
102
+ vlm: 0.1
103
+ max_grad_norm: 1.0
104
+ weight_decay: 0.0
105
+ logging_frequency: 1
106
+ gradient_clipping: 1.0
107
+ gradient_accumulation_steps: 2
108
+ distributed_backend: none
109
+ is_resume: false
110
+ pretrained_checkpoint: null
111
+ resume_step: 0
112
+ reload_modules: null
113
+ optimizer:
114
+ name: AdamW
115
+ betas:
116
+ - 0.9
117
+ - 0.95
118
+ eps: 1.0e-08
119
+ weight_decay: 1.0e-08
120
+ fused: false
121
+ save_format: pt
122
+ workspace_dir: WORKSPACE_DIR
123
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
124
+ seed: 42
125
+ wandb_entity: saberrr-zju
126
+ wandb_project: starVLA_rl_games
127
+ auth:
128
+ env_file: null
129
+ hf_token_env: HF_TOKEN
130
+ wandb_api_key_env: WANDB_API_KEY
131
+ paths:
132
+ run_root_dir: results/Checkpoints
133
+ dataset_local_dir: playground/Datasets/rl_games
134
+ dataset_cache_dir: null
135
+ base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
136
+ accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
137
+ launch:
138
+ use_accelerate: true
139
+ gpus: null
140
+ num_processes: 1
141
+ dry_run: false
142
+ conda:
143
+ enabled: true
144
+ env_name: null
145
+ rl_games:
146
+ model_alias: openvla
147
+ env_eval:
148
+ image_size: 224
149
+ frameskip: 1
150
+ seed: 42
151
+ fixed_episode_seeds: true
152
+ latency_seed_stride: 0
153
+ task_seed_stride: 0
154
+ task_description: ''
155
+ enabled: true
156
+ distributed_mode: none
157
+ vectorized:
158
+ enabled: false
159
+ batch_size: 1
160
+ latency:
161
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
162
+ mode: single
163
+ values:
164
+ - 0
165
+ mid_train:
166
+ enabled: true
167
+ interval_steps: 250
168
+ latencies:
169
+ - 2
170
+ num_episodes: 5
171
+ max_steps_per_episode: 3600
172
+ post_train:
173
+ enabled: false
174
+ latencies:
175
+ - 0
176
+ - 1
177
+ - 2
178
+ - 3
179
+ - 4
180
+ num_episodes: 5
181
+ max_steps_per_episode: 3600
182
+ task: flappy
183
+ initialization_mode: bridge
184
+ action_carrier: bridge
185
+ model: openvla
186
+ env: flappy
187
+ init: bridge
188
+ bridge_base_model:
189
+ repo_id:
190
+ openvla: Qwen/Qwen3-VL-4B-Instruct
191
+ pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
192
+ pi05: Qwen/Qwen3-VL-4B-Instruct
193
+ gr00t: Qwen/Qwen3-VL-4B-Instruct
194
+ local_dir:
195
+ openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
196
+ pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
197
+ pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
198
+ gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
199
+ mode: single
200
+ checkpoint:
201
+ load: auto
202
+ hf_repo_id: null
203
+ save_best_model: false
204
+ save_pt_file: false
205
+ local:
206
+ keep_last_n: 1
207
+ sync:
208
+ enabled: false
209
+ repo_id: null
210
+ keep_last_n: 0
211
+ sync_every_n_checkpoints: 1
212
+ resume_policy: local_latest
213
+ run_id: flappy_fix_latency_2_200ep_last_8_layers
214
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers
215
+ config_yaml: null
216
+ is_debug: false
217
+ version_id: '0.21'
flappy_fix_latency_2_200ep_last_8_layers/config.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint:
2
+ local:
3
+ keep_last_n: 1
4
+ save_best_model: false
5
+ save_pt_file: false
6
+ sync:
7
+ enabled: false
8
+ keep_last_n: 0
9
+ repo_id: null
10
+ datasets:
11
+ vla_data:
12
+ data_mix: flappy_train__bridge
13
+ dataset_py: lerobot_datasets
14
+ eval_data_mix: flappy_train__bridge__val
15
+ eval_num_workers: 8
16
+ include_state: true
17
+ latency_curriculum:
18
+ enabled: false
19
+ obs_image_size: null
20
+ per_device_batch_size: 64
21
+ prefetch_factor: 4
22
+ framework:
23
+ action_model:
24
+ action_dim: 7
25
+ action_env_dim: 2
26
+ action_hidden_dim: 2560
27
+ action_horizon: 1
28
+ action_model_type: MLP
29
+ loss_type: discrete_ce
30
+ state_dim: 7
31
+ name: QwenOFT
32
+ qwenvl:
33
+ attn_implementation: flash_attention_2
34
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
35
+ enable_gradient_checkpointing: true
36
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers
37
+ rl_games:
38
+ env_eval:
39
+ distributed_mode: none
40
+ enabled: true
41
+ fixed_episode_seeds: true
42
+ frameskip: 1
43
+ image_size: 224
44
+ latency:
45
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
46
+ latency_seed_stride: 0
47
+ mid_train:
48
+ enabled: true
49
+ interval_steps: 250
50
+ latencies:
51
+ - 2
52
+ max_steps_per_episode: 3600
53
+ num_episodes: 5
54
+ seed: 42
55
+ task_description: ''
56
+ task_seed_stride: 0
57
+ vectorized:
58
+ enabled: false
59
+ model_alias: openvla
60
+ task: flappy
61
+ run_id: flappy_fix_latency_2_200ep_last_8_layers
62
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
63
+ seed: 42
64
+ trainer:
65
+ distributed_backend: none
66
+ eval_action_classification: true
67
+ eval_action_classification_interval: null
68
+ eval_interval: 100
69
+ eval_num_batches: 100
70
+ freeze_llm_bottom_ratio: 0.7778
71
+ freeze_modules: ''
72
+ gradient_accumulation_steps: 2
73
+ gradient_clipping: 1.0
74
+ is_resume: false
75
+ learning_rate:
76
+ action_model: 0.0001
77
+ base: 2.0e-05
78
+ qwen_vl_interface: 1.0e-05
79
+ logging_frequency: 1
80
+ lr_scheduler_type: cosine_with_min_lr
81
+ max_train_steps: 5000
82
+ num_warmup_steps: 100
83
+ optimizer:
84
+ betas:
85
+ - 0.9
86
+ - 0.95
87
+ eps: 1.0e-08
88
+ fused: false
89
+ weight_decay: 1.0e-08
90
+ per_latency_eval_num_batches: null
91
+ pretrained_checkpoint: null
92
+ reload_modules: null
93
+ save_interval: 500
94
+ scheduler_specific_kwargs:
95
+ min_lr: 1.0e-06
96
+ wandb_entity: saberrr-zju
97
+ wandb_project: starVLA_rl_games
flappy_fix_latency_2_200ep_last_8_layers/dataset_statistics.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.6028500199317932,
6
+ 0.3971499800682068,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "std": [
14
+ 0.4890792667865753,
15
+ 0.4890792667865753,
16
+ 0.0,
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0
21
+ ],
22
+ "max": [
23
+ 1.0,
24
+ 1.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0,
29
+ 0.0
30
+ ],
31
+ "min": [
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.0,
53
+ 0.0,
54
+ 0.0,
55
+ 0.0,
56
+ 0.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ true
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 330734,
125
+ "num_trajectories": 180
126
+ }
127
+ }
flappy_fix_latency_2_200ep_last_8_layers/dataset_statistics_eval.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.7959861159324646,
6
+ 0.2040138840675354,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "std": [
14
+ 0.4030573070049286,
15
+ 0.4030573070049286,
16
+ 0.0,
17
+ 0.0,
18
+ 0.0,
19
+ 0.0,
20
+ 0.0
21
+ ],
22
+ "max": [
23
+ 1.0,
24
+ 1.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0,
29
+ 0.0
30
+ ],
31
+ "min": [
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.0,
53
+ 0.0,
54
+ 0.0,
55
+ 0.0,
56
+ 0.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ true
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 72000,
125
+ "num_trajectories": 20
126
+ }
127
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 153.99999999999991,
7
+ "mean_length": 1261.2,
8
+ "std_reward": 120.2648743399357,
9
+ "std_length": 970.7072473202206,
10
+ "episode_rewards": [
11
+ 143.89999999999685,
12
+ 213.89999999999372,
13
+ 12.899999999999974,
14
+ 348.9000000000087,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 1180,
19
+ 1745,
20
+ 122,
21
+ 2834,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 5055,
26
+ "1": 1251
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 153.99999999999991,
55
+ "mean_length": 1261.2,
56
+ "std_reward": 120.2648743399357,
57
+ "std_length": 970.7072473202206,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 153.99999999999991,
60
+ "macro_mean_length": 1261.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 255.94000000000815,
7
+ "mean_length": 2080.0,
8
+ "std_reward": 168.94732433514338,
9
+ "std_length": 1361.7158293858524,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 166.89999999999583,
13
+ 216.79999999999362,
14
+ 444.60000000002566,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 1365,
20
+ 1765,
21
+ 3600,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 8188,
26
+ "1": 2212
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 255.94000000000815,
55
+ "mean_length": 2080.0,
56
+ "std_reward": 168.94732433514338,
57
+ "std_length": 1361.7158293858524,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 255.94000000000815,
60
+ "macro_mean_length": 2080.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 156.02000000000382,
7
+ "mean_length": 1275.6,
8
+ "std_reward": 164.3303063953899,
9
+ "std_length": 1324.9588069068411,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 233.39999999999284,
13
+ 48.50000000000034,
14
+ 444.60000000002566,
15
+ 46.80000000000033
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 1904,
20
+ 406,
21
+ 3600,
22
+ 398
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 5074,
26
+ "1": 1304
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 156.02000000000382,
55
+ "mean_length": 1275.6,
56
+ "std_reward": 164.3303063953899,
57
+ "std_length": 1324.9588069068411,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 156.02000000000382,
60
+ "macro_mean_length": 1275.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_1750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 10.020000000000042,
7
+ "mean_length": 98.6,
8
+ "std_reward": 13.210510966650844,
9
+ "std_length": 106.95344781726301,
10
+ "episode_rewards": [
11
+ 36.40000000000021,
12
+ 3.8999999999999986,
13
+ 3.8999999999999986,
14
+ 2.0000000000000013,
15
+ 3.8999999999999986
16
+ ],
17
+ "episode_lengths": [
18
+ 312,
19
+ 50,
20
+ 50,
21
+ 31,
22
+ 50
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 400,
26
+ "1": 93
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 1750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 10.020000000000042,
55
+ "mean_length": 98.6,
56
+ "std_reward": 13.210510966650844,
57
+ "std_length": 106.95344781726301,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 10.020000000000042,
60
+ "macro_mean_length": 98.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 200.6800000000103,
7
+ "mean_length": 1635.4,
8
+ "std_reward": 200.38831702473323,
9
+ "std_length": 1614.02622035703,
10
+ "episode_rewards": [
11
+ 73.89999999999999,
12
+ 444.60000000002566,
13
+ 36.40000000000021,
14
+ 3.8999999999999986,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 615,
19
+ 3600,
20
+ 312,
21
+ 50,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 6460,
26
+ "1": 1717
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 200.6800000000103,
55
+ "mean_length": 1635.4,
56
+ "std_reward": 200.38831702473323,
57
+ "std_length": 1614.02622035703,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 200.6800000000103,
60
+ "macro_mean_length": 1635.4,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 190.06000000001035,
7
+ "mean_length": 1549.0,
8
+ "std_reward": 208.48305063003372,
9
+ "std_length": 1679.9416656539,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 3.8999999999999986,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 3600,
20
+ 3600,
21
+ 50,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 6160,
26
+ "1": 1585
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 190.06000000001035,
55
+ "mean_length": 1549.0,
56
+ "std_reward": 208.48305063003372,
57
+ "std_length": 1679.9416656539,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 190.06000000001035,
60
+ "macro_mean_length": 1549.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_250.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 2.0000000000000013,
7
+ "mean_length": 31.0,
8
+ "std_reward": 0.0,
9
+ "std_length": 0.0,
10
+ "episode_rewards": [
11
+ 2.0000000000000013,
12
+ 2.0000000000000013,
13
+ 2.0000000000000013,
14
+ 2.0000000000000013,
15
+ 2.0000000000000013
16
+ ],
17
+ "episode_lengths": [
18
+ 31,
19
+ 31,
20
+ 31,
21
+ 31,
22
+ 31
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 155
26
+ },
27
+ "fixed_episode_seeds": true,
28
+ "eval_seed": 42,
29
+ "episode_seeds": [
30
+ 42,
31
+ 43,
32
+ 44,
33
+ 45,
34
+ 46
35
+ ],
36
+ "episode_indices": [
37
+ 0,
38
+ 1,
39
+ 2,
40
+ 3,
41
+ 4
42
+ ]
43
+ }
44
+ },
45
+ "aggregate": {
46
+ "stage": "mid_train",
47
+ "step": 250,
48
+ "task": "flappy",
49
+ "model_alias": "openvla",
50
+ "fixed_episode_seeds": true,
51
+ "eval_seed": 42,
52
+ "total_episodes": 5,
53
+ "mean_reward": 2.0000000000000013,
54
+ "mean_length": 31.0,
55
+ "std_reward": 0.0,
56
+ "std_length": 0.0,
57
+ "task_count": 1,
58
+ "macro_mean_reward": 2.0000000000000013,
59
+ "macro_mean_length": 31.0,
60
+ "distributed_eval": false
61
+ }
62
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 173.2800000000008,
7
+ "mean_length": 1416.2,
8
+ "std_reward": 151.64563165485944,
9
+ "std_length": 1224.5701939864452,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 204.39999999999415,
13
+ 400.4000000000179,
14
+ 3.8999999999999986,
15
+ 250.89999999999208
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 1668,
20
+ 3250,
21
+ 50,
22
+ 2043
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 5586,
26
+ "1": 1495
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 173.2800000000008,
55
+ "mean_length": 1416.2,
56
+ "std_reward": 151.64563165485944,
57
+ "std_length": 1224.5701939864452,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 173.2800000000008,
60
+ "macro_mean_length": 1416.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_2750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 211.5600000000095,
7
+ "mean_length": 1722.6,
8
+ "std_reward": 198.2644758901727,
9
+ "std_length": 1597.6542304265963,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 444.60000000002566,
13
+ 157.89999999999623,
14
+ 3.8999999999999986,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 3600,
20
+ 1293,
21
+ 50,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 6883,
26
+ "1": 1730
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 2750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 211.5600000000095,
55
+ "mean_length": 1722.6,
56
+ "std_reward": 198.2644758901727,
57
+ "std_length": 1597.6542304265963,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 211.5600000000095,
60
+ "macro_mean_length": 1722.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 314.16000000001293,
7
+ "mean_length": 2550.6,
8
+ "std_reward": 168.18891283316998,
9
+ "std_length": 1356.5594126318242,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 414.4000000000204,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 260.3999999999929
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 3363,
20
+ 3600,
21
+ 3600,
22
+ 2120
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 10174,
26
+ "1": 2579
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 314.16000000001293,
55
+ "mean_length": 2550.6,
56
+ "std_reward": 168.18891283316998,
57
+ "std_length": 1356.5594126318242,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 314.16000000001293,
60
+ "macro_mean_length": 2550.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 268.90000000001544,
7
+ "mean_length": 2184.0,
8
+ "std_reward": 215.18962800285104,
9
+ "std_length": 1734.2502702897295,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 3.8999999999999986,
15
+ 444.60000000002566
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 3600,
20
+ 3600,
21
+ 50,
22
+ 3600
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 8709,
26
+ "1": 2211
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 268.90000000001544,
55
+ "mean_length": 2184.0,
56
+ "std_reward": 215.18962800285104,
57
+ "std_length": 1734.2502702897295,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 268.90000000001544,
60
+ "macro_mean_length": 2184.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 260.5200000000122,
7
+ "mean_length": 2119.0,
8
+ "std_reward": 189.18299500748955,
9
+ "std_length": 1526.6031573398504,
10
+ "episode_rewards": [
11
+ 6.799999999999992,
12
+ 414.4000000000204,
13
+ 444.60000000002566,
14
+ 381.9000000000146,
15
+ 54.90000000000042
16
+ ],
17
+ "episode_lengths": [
18
+ 70,
19
+ 3363,
20
+ 3600,
21
+ 3101,
22
+ 461
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 8418,
26
+ "1": 2177
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 260.5200000000122,
55
+ "mean_length": 2119.0,
56
+ "std_reward": 189.18299500748955,
57
+ "std_length": 1526.6031573398504,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 260.5200000000122,
60
+ "macro_mean_length": 2119.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_3750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 268.90000000001265,
7
+ "mean_length": 2186.2,
8
+ "std_reward": 184.73615780351173,
9
+ "std_length": 1488.7764640804878,
10
+ "episode_rewards": [
11
+ 36.40000000000021,
12
+ 364.0000000000114,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 54.90000000000042
16
+ ],
17
+ "episode_lengths": [
18
+ 312,
19
+ 2958,
20
+ 3600,
21
+ 3600,
22
+ 461
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 8749,
26
+ "1": 2182
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 3750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 268.90000000001265,
55
+ "mean_length": 2186.2,
56
+ "std_reward": 184.73615780351173,
57
+ "std_length": 1488.7764640804878,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 268.90000000001265,
60
+ "macro_mean_length": 2186.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 213.68000000001044,
7
+ "mean_length": 1740.2,
8
+ "std_reward": 188.710353717025,
9
+ "std_length": 1519.861230507575,
10
+ "episode_rewards": [
11
+ 73.89999999999999,
12
+ 444.60000000002566,
13
+ 54.90000000000042,
14
+ 444.60000000002566,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 615,
19
+ 3600,
20
+ 461,
21
+ 3600,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 6938,
26
+ "1": 1763
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 213.68000000001044,
55
+ "mean_length": 1740.2,
56
+ "std_reward": 188.710353717025,
57
+ "std_length": 1519.861230507575,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 213.68000000001044,
60
+ "macro_mean_length": 1740.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4250.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 268.7800000000133,
7
+ "mean_length": 2185.0,
8
+ "std_reward": 191.31879573111533,
9
+ "std_length": 1541.771837854097,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 22.40000000000006,
13
+ 444.60000000002566,
14
+ 381.9000000000146,
15
+ 50.40000000000037
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 199,
20
+ 3600,
21
+ 3101,
22
+ 425
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 8724,
26
+ "1": 2201
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4250,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 268.7800000000133,
55
+ "mean_length": 2185.0,
56
+ "std_reward": 191.31879573111533,
57
+ "std_length": 1541.771837854097,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 268.7800000000133,
60
+ "macro_mean_length": 2185.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 366.6600000000206,
7
+ "mean_length": 2972.2,
8
+ "std_reward": 155.88000000001009,
9
+ "std_length": 1255.6000000000001,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 54.90000000000042
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 461
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 11863,
26
+ "1": 2998
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 366.6600000000206,
55
+ "mean_length": 2972.2,
56
+ "std_reward": 155.88000000001009,
57
+ "std_length": 1255.6000000000001,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 366.6600000000206,
60
+ "macro_mean_length": 2972.2,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_4750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 278.1800000000084,
7
+ "mean_length": 2261.0,
8
+ "std_reward": 164.18610659859004,
9
+ "std_length": 1322.34791185981,
10
+ "episode_rewards": [
11
+ 444.60000000002566,
12
+ 213.89999999999372,
13
+ 444.60000000002566,
14
+ 3.8999999999999986,
15
+ 283.8999999999971
16
+ ],
17
+ "episode_lengths": [
18
+ 3600,
19
+ 1745,
20
+ 3600,
21
+ 50,
22
+ 2310
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 8954,
26
+ "1": 2351
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 4750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 278.1800000000084,
55
+ "mean_length": 2261.0,
56
+ "std_reward": 164.18610659859004,
57
+ "std_length": 1322.34791185981,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 278.1800000000084,
60
+ "macro_mean_length": 2261.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_500.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 19.920000000000094,
7
+ "mean_length": 179.6,
8
+ "std_reward": 21.023834093713926,
9
+ "std_length": 170.6113712505705,
10
+ "episode_rewards": [
11
+ 19.100000000000026,
12
+ 9.199999999999983,
13
+ 6.699999999999992,
14
+ 3.8999999999999986,
15
+ 60.700000000000486
16
+ ],
17
+ "episode_lengths": [
18
+ 175,
19
+ 94,
20
+ 69,
21
+ 50,
22
+ 510
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 723,
26
+ "1": 175
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 500,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 19.920000000000094,
55
+ "mean_length": 179.6,
56
+ "std_reward": 21.023834093713926,
57
+ "std_length": 170.6113712505705,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 19.920000000000094,
60
+ "macro_mean_length": 179.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_5000.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 285.0200000000155,
7
+ "mean_length": 2314.6,
8
+ "std_reward": 195.5323236705503,
9
+ "std_length": 1574.9920126781594,
10
+ "episode_rewards": [
11
+ 36.40000000000021,
12
+ 444.60000000002566,
13
+ 444.60000000002566,
14
+ 444.60000000002566,
15
+ 54.90000000000042
16
+ ],
17
+ "episode_lengths": [
18
+ 312,
19
+ 3600,
20
+ 3600,
21
+ 3600,
22
+ 461
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 9199,
26
+ "1": 2374
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 5000,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 285.0200000000155,
55
+ "mean_length": 2314.6,
56
+ "std_reward": 195.5323236705503,
57
+ "std_length": 1574.9920126781594,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 285.0200000000155,
60
+ "macro_mean_length": 2314.6,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/eval/mid_train/step_750.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_latency": {
3
+ "flappy/latency_2": {
4
+ "latency": 2,
5
+ "num_episodes": 5,
6
+ "mean_reward": 83.5799999999984,
7
+ "mean_length": 692.0,
8
+ "std_reward": 71.19664037017279,
9
+ "std_length": 575.6933211354809,
10
+ "episode_rewards": [
11
+ 88.59999999999933,
12
+ 200.7999999999943,
13
+ 3.8999999999999986,
14
+ 108.79999999999846,
15
+ 15.79999999999998
16
+ ],
17
+ "episode_lengths": [
18
+ 735,
19
+ 1641,
20
+ 50,
21
+ 892,
22
+ 142
23
+ ],
24
+ "decoded_action_hist": {
25
+ "0": 2698,
26
+ "1": 762
27
+ },
28
+ "fixed_episode_seeds": true,
29
+ "eval_seed": 42,
30
+ "episode_seeds": [
31
+ 42,
32
+ 43,
33
+ 44,
34
+ 45,
35
+ 46
36
+ ],
37
+ "episode_indices": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4
43
+ ]
44
+ }
45
+ },
46
+ "aggregate": {
47
+ "stage": "mid_train",
48
+ "step": 750,
49
+ "task": "flappy",
50
+ "model_alias": "openvla",
51
+ "fixed_episode_seeds": true,
52
+ "eval_seed": 42,
53
+ "total_episodes": 5,
54
+ "mean_reward": 83.5799999999984,
55
+ "mean_length": 692.0,
56
+ "std_reward": 71.19664037017279,
57
+ "std_length": 575.6933211354809,
58
+ "task_count": 1,
59
+ "macro_mean_reward": 83.5799999999984,
60
+ "macro_mean_length": 692.0,
61
+ "distributed_eval": false
62
+ }
63
+ }
flappy_fix_latency_2_200ep_last_8_layers/hydra/.hydra/config.yaml ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ framework:
2
+ qwenvl:
3
+ base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
4
+ attn_implementation: flash_attention_2
5
+ enable_gradient_checkpointing: true
6
+ action_model:
7
+ state_dim: 7
8
+ loss_type: discrete_ce
9
+ action_horizon: 1
10
+ future_action_window_size: 0
11
+ past_action_window_size: 0
12
+ action_dim: 7
13
+ action_env_dim: 2
14
+ name: QwenOFT
15
+ datasets:
16
+ vla_data:
17
+ dataset_py: lerobot_datasets
18
+ include_state: true
19
+ data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
20
+ data_mix: flappy_train__bridge
21
+ eval_data_mix: flappy_train__bridge__val
22
+ custom_mixtures_path: null
23
+ action_type: discrete
24
+ sequential_step_sampling: false
25
+ eval_sequential_step_sampling: null
26
+ num_workers: 8
27
+ eval_num_workers: 8
28
+ prefetch_factor: 4
29
+ shuffle: true
30
+ action_balance:
31
+ enabled: false
32
+ strategy: balanced_epoch
33
+ action_key: action_id
34
+ target_flap_fraction: 0.3
35
+ noop_id: 0
36
+ flap_id: 1
37
+ latency_curriculum:
38
+ enabled: false
39
+ strategy: exclusive
40
+ latencies: null
41
+ phase_steps: null
42
+ per_device_batch_size: 64
43
+ load_all_data_for_training: true
44
+ num_obs_frames: 1
45
+ image_mode: single
46
+ stitch_grid:
47
+ - 2
48
+ - 2
49
+ obs_image_size: null
50
+ video_backend: torchvision_av
51
+ dataset:
52
+ source_hf: ${dataset.single_source_hf}
53
+ config_name: null
54
+ source_subdir: null
55
+ converted_name: ${dataset.single_converted_name}
56
+ single_source_hf: ''
57
+ mixed_source_hf: ''
58
+ single_converted_name: flappy_train
59
+ mixed_converted_name: flappy_mixed_latency_train
60
+ single_latency_filter: null
61
+ mixed_latency_filter: null
62
+ force_download: false
63
+ setup_force: false
64
+ skip_verification: false
65
+ verify_rows: 200
66
+ max_episodes: null
67
+ episodes_per_latency: null
68
+ latency_filter: ${dataset.single_latency_filter}
69
+ debug_subset:
70
+ enabled: false
71
+ max_episodes: 5
72
+ suffix: debug
73
+ base_model:
74
+ repo_id: ${bridge_base_model.repo_id.${model}}
75
+ initialization:
76
+ checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
77
+ checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
78
+ checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
79
+ trainer:
80
+ max_train_steps: 5000
81
+ num_warmup_steps: 100
82
+ save_interval: 500
83
+ eval_interval: 100
84
+ eval_num_batches: 100
85
+ per_latency_eval_num_batches: null
86
+ eval_action_classification: true
87
+ eval_action_classification_interval: null
88
+ cc_f1_tolerance: 1
89
+ learning_rate:
90
+ base: 2.0e-05
91
+ qwen_vl_interface: 1.0e-05
92
+ action_model: 0.0001
93
+ lr_scheduler_type: cosine_with_min_lr
94
+ scheduler_specific_kwargs:
95
+ min_lr: 1.0e-06
96
+ freeze_modules: ''
97
+ freeze_llm_bottom_ratio: 0.7778
98
+ loss_scale:
99
+ vla: 1.0
100
+ vlm: 0.1
101
+ max_grad_norm: 1.0
102
+ weight_decay: 0.0
103
+ logging_frequency: 1
104
+ gradient_clipping: 1.0
105
+ gradient_accumulation_steps: 2
106
+ distributed_backend: none
107
+ is_resume: false
108
+ pretrained_checkpoint: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
109
+ resume_step: 0
110
+ reload_modules: null
111
+ optimizer:
112
+ name: AdamW
113
+ betas:
114
+ - 0.9
115
+ - 0.95
116
+ eps: 1.0e-08
117
+ weight_decay: 1.0e-08
118
+ fused: false
119
+ save_format: pt
120
+ workspace_dir: WORKSPACE_DIR
121
+ run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
122
+ seed: 42
123
+ wandb_entity: saberrr-zju
124
+ wandb_project: starVLA_rl_games
125
+ auth:
126
+ env_file: null
127
+ hf_token_env: HF_TOKEN
128
+ wandb_api_key_env: WANDB_API_KEY
129
+ paths:
130
+ run_root_dir: results/Checkpoints
131
+ dataset_local_dir: playground/Datasets/rl_games
132
+ dataset_cache_dir: null
133
+ base_model_dir: ${bridge_base_model.local_dir.${model}}
134
+ accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
135
+ launch:
136
+ use_accelerate: true
137
+ gpus: null
138
+ num_processes: 1
139
+ dry_run: false
140
+ conda:
141
+ enabled: true
142
+ env_name: null
143
+ rl_games:
144
+ model_alias: openvla
145
+ env_eval:
146
+ image_size: 224
147
+ frameskip: 1
148
+ seed: 42
149
+ fixed_episode_seeds: true
150
+ latency_seed_stride: 0
151
+ task_seed_stride: 0
152
+ task_description: ''
153
+ enabled: true
154
+ distributed_mode: none
155
+ vectorized:
156
+ enabled: false
157
+ batch_size: 1
158
+ latency:
159
+ prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
160
+ mode: single
161
+ values:
162
+ - 0
163
+ mid_train:
164
+ enabled: true
165
+ interval_steps: 250
166
+ latencies:
167
+ - 2
168
+ num_episodes: 5
169
+ max_steps_per_episode: 3600
170
+ post_train:
171
+ enabled: false
172
+ latencies:
173
+ - 0
174
+ - 1
175
+ - 2
176
+ - 3
177
+ - 4
178
+ num_episodes: 5
179
+ max_steps_per_episode: 3600
180
+ task: flappy
181
+ initialization_mode: bridge
182
+ action_carrier: bridge
183
+ model: openvla
184
+ env: flappy
185
+ init: bridge
186
+ bridge_base_model:
187
+ repo_id:
188
+ openvla: Qwen/Qwen3-VL-4B-Instruct
189
+ pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
190
+ pi05: Qwen/Qwen3-VL-4B-Instruct
191
+ gr00t: Qwen/Qwen3-VL-4B-Instruct
192
+ local_dir:
193
+ openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
194
+ pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
195
+ pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
196
+ gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
197
+ mode: single
198
+ checkpoint:
199
+ load: auto
200
+ hf_repo_id: null
201
+ save_best_model: false
202
+ save_pt_file: false
203
+ local:
204
+ keep_last_n: 1
205
+ sync:
206
+ enabled: false
207
+ repo_id: null
208
+ keep_last_n: 0
209
+ sync_every_n_checkpoints: 1
210
+ resume_policy: local_latest
211
+ run_id: flappy_fix_latency_2_200ep_last_8_layers
212
+ output_dir: null
213
+ config_yaml: null
214
+ is_debug: false
215
+ version_id: '0.21'
flappy_fix_latency_2_200ep_last_8_layers/hydra/.hydra/hydra.yaml ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${run_root_dir}/${run_id}/hydra
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - model=openvla
116
+ - env=flappy
117
+ - init=bridge
118
+ - mode=single
119
+ - run_id=flappy_fix_latency_2_200ep_last_8_layers
120
+ - run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
121
+ - seed=42
122
+ - wandb_entity=saberrr-zju
123
+ - wandb_project=starVLA_rl_games
124
+ - rl_games.env_eval.enabled=true
125
+ - checkpoint.sync.enabled=false
126
+ - checkpoint.sync.keep_last_n=0
127
+ - checkpoint.local.keep_last_n=1
128
+ - checkpoint.save_best_model=false
129
+ - checkpoint.save_pt_file=false
130
+ - trainer.is_resume=false
131
+ - trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
132
+ - trainer.resume_step=0
133
+ - trainer.max_train_steps=5000
134
+ - trainer.num_warmup_steps=100
135
+ - trainer.save_interval=500
136
+ - trainer.eval_interval=100
137
+ - trainer.eval_num_batches=100
138
+ - trainer.eval_action_classification=true
139
+ - trainer.logging_frequency=1
140
+ - trainer.gradient_accumulation_steps=2
141
+ - trainer.distributed_backend=none
142
+ - trainer.learning_rate.base=2e-05
143
+ - trainer.learning_rate.qwen_vl_interface=1e-05
144
+ - trainer.learning_rate.action_model=0.0001
145
+ - trainer.lr_scheduler_type=cosine_with_min_lr
146
+ - trainer.scheduler_specific_kwargs.min_lr=1e-06
147
+ - trainer.freeze_llm_bottom_ratio=0.7778
148
+ - trainer.loss_scale.vla=1.0
149
+ - trainer.loss_scale.vlm=0.1
150
+ - trainer.max_grad_norm=1.0
151
+ - trainer.weight_decay=0.0
152
+ - trainer.gradient_clipping=1.0
153
+ - trainer.optimizer.name=AdamW
154
+ - trainer.optimizer.betas=[0.9,0.95]
155
+ - trainer.optimizer.eps=1e-08
156
+ - trainer.optimizer.weight_decay=1e-08
157
+ - trainer.optimizer.fused=false
158
+ - trainer.save_format=pt
159
+ - framework.name=QwenOFT
160
+ - framework.qwenvl.attn_implementation=flash_attention_2
161
+ - framework.qwenvl.enable_gradient_checkpointing=true
162
+ - framework.action_model.action_dim=7
163
+ - framework.action_model.action_env_dim=2
164
+ - framework.action_model.state_dim=7
165
+ - framework.action_model.loss_type=discrete_ce
166
+ - framework.action_model.action_horizon=1
167
+ - framework.action_model.future_action_window_size=0
168
+ - framework.action_model.past_action_window_size=0
169
+ - datasets.vla_data.include_state=true
170
+ - datasets.vla_data.action_type=discrete
171
+ - datasets.vla_data.sequential_step_sampling=false
172
+ - datasets.vla_data.shuffle=true
173
+ - datasets.vla_data.action_balance.enabled=false
174
+ - datasets.vla_data.action_balance.strategy=balanced_epoch
175
+ - datasets.vla_data.action_balance.action_key=action_id
176
+ - datasets.vla_data.action_balance.target_flap_fraction=0.3
177
+ - datasets.vla_data.action_balance.noop_id=0
178
+ - datasets.vla_data.action_balance.flap_id=1
179
+ - datasets.vla_data.latency_curriculum.enabled=false
180
+ - datasets.vla_data.latency_curriculum.strategy=exclusive
181
+ - datasets.vla_data.per_device_batch_size=64
182
+ - datasets.vla_data.num_workers=8
183
+ - datasets.vla_data.eval_num_workers=8
184
+ - datasets.vla_data.prefetch_factor=4
185
+ - datasets.vla_data.load_all_data_for_training=true
186
+ - datasets.vla_data.video_backend=torchvision_av
187
+ - datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
188
+ - datasets.vla_data.data_mix=flappy_train__bridge
189
+ - datasets.vla_data.eval_data_mix=flappy_train__bridge__val
190
+ - framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
191
+ - rl_games.task=flappy
192
+ - rl_games.model_alias=openvla
193
+ - rl_games.initialization_mode=bridge
194
+ - rl_games.action_carrier=bridge
195
+ - rl_games.env_eval.distributed_mode=none
196
+ - rl_games.env_eval.latency.mode=single
197
+ - rl_games.env_eval.frameskip=1
198
+ - rl_games.env_eval.image_size=224
199
+ - rl_games.env_eval.seed=42
200
+ - rl_games.env_eval.fixed_episode_seeds=true
201
+ - rl_games.env_eval.latency_seed_stride=0
202
+ - rl_games.env_eval.task_seed_stride=0
203
+ - rl_games.env_eval.latency.values=[0]
204
+ - rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
205
+ - rl_games.env_eval.mid_train.enabled=true
206
+ - rl_games.env_eval.mid_train.interval_steps=250
207
+ - rl_games.env_eval.mid_train.num_episodes=5
208
+ - rl_games.env_eval.mid_train.max_steps_per_episode=3600
209
+ - rl_games.env_eval.mid_train.latencies=[2]
210
+ - rl_games.env_eval.post_train.enabled=false
211
+ - rl_games.env_eval.post_train.num_episodes=5
212
+ - rl_games.env_eval.post_train.max_steps_per_episode=3600
213
+ - rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
214
+ job:
215
+ name: train_starvla_hydra
216
+ chdir: false
217
+ override_dirname: checkpoint.local.keep_last_n=1,checkpoint.save_best_model=false,checkpoint.save_pt_file=false,checkpoint.sync.enabled=false,checkpoint.sync.keep_last_n=0,datasets.vla_data.action_balance.action_key=action_id,datasets.vla_data.action_balance.enabled=false,datasets.vla_data.action_balance.flap_id=1,datasets.vla_data.action_balance.noop_id=0,datasets.vla_data.action_balance.strategy=balanced_epoch,datasets.vla_data.action_balance.target_flap_fraction=0.3,datasets.vla_data.action_type=discrete,datasets.vla_data.data_mix=flappy_train__bridge,datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep,datasets.vla_data.eval_data_mix=flappy_train__bridge__val,datasets.vla_data.eval_num_workers=8,datasets.vla_data.include_state=true,datasets.vla_data.latency_curriculum.enabled=false,datasets.vla_data.latency_curriculum.strategy=exclusive,datasets.vla_data.load_all_data_for_training=true,datasets.vla_data.num_workers=8,datasets.vla_data.per_device_batch_size=64,datasets.vla_data.prefetch_factor=4,datasets.vla_data.sequential_step_sampling=false,datasets.vla_data.shuffle=true,datasets.vla_data.video_backend=torchvision_av,env=flappy,framework.action_model.action_dim=7,framework.action_model.action_env_dim=2,framework.action_model.action_horizon=1,framework.action_model.future_action_window_size=0,framework.action_model.loss_type=discrete_ce,framework.action_model.past_action_window_size=0,framework.action_model.state_dim=7,framework.name=QwenOFT,framework.qwenvl.attn_implementation=flash_attention_2,framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct,framework.qwenvl.enable_gradient_checkpointing=true,init=bridge,mode=single,model=openvla,rl_games.action_carrier=bridge,rl_games.env_eval.distributed_mode=none,rl_games.env_eval.enabled=true,rl_games.env_eval.fixed_episode_seeds=true,rl_games.env_eval.frameskip=1,rl_games.env_eval.image_size=224,rl_games.env_eval.latency.mode=single,rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json,rl_games.env_eval.latency.values=[0],rl_games.env_eval.latency_seed_stride=0,rl_games.env_eval.mid_train.enabled=true,rl_games.env_eval.mid_train.interval_steps=250,rl_games.env_eval.mid_train.latencies=[2],rl_games.env_eval.mid_train.max_steps_per_episode=3600,rl_games.env_eval.mid_train.num_episodes=5,rl_games.env_eval.post_train.enabled=false,rl_games.env_eval.post_train.latencies=[0,1,2,3,4],rl_games.env_eval.post_train.max_steps_per_episode=3600,rl_games.env_eval.post_train.num_episodes=5,rl_games.env_eval.seed=42,rl_games.env_eval.task_seed_stride=0,rl_games.initialization_mode=bridge,rl_games.model_alias=openvla,rl_games.task=flappy,run_id=flappy_fix_latency_2_200ep_last_8_layers,run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints,seed=42,trainer.distributed_backend=none,trainer.eval_action_classification=true,trainer.eval_interval=100,trainer.eval_num_batches=100,trainer.freeze_llm_bottom_ratio=0.7778,trainer.gradient_accumulation_steps=2,trainer.gradient_clipping=1.0,trainer.is_resume=false,trainer.learning_rate.action_model=0.0001,trainer.learning_rate.base=2e-05,trainer.learning_rate.qwen_vl_interface=1e-05,trainer.logging_frequency=1,trainer.loss_scale.vla=1.0,trainer.loss_scale.vlm=0.1,trainer.lr_scheduler_type=cosine_with_min_lr,trainer.max_grad_norm=1.0,trainer.max_train_steps=5000,trainer.num_warmup_steps=100,trainer.optimizer.betas=[0.9,0.95],trainer.optimizer.eps=1e-08,trainer.optimizer.fused=false,trainer.optimizer.name=AdamW,trainer.optimizer.weight_decay=1e-08,trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt,trainer.resume_step=0,trainer.save_format=pt,trainer.save_interval=500,trainer.scheduler_specific_kwargs.min_lr=1e-06,trainer.weight_decay=0.0,wandb_entity=saberrr-zju,wandb_project=starVLA_rl_games
218
+ id: ???
219
+ num: ???
220
+ config_name: train
221
+ env_set: {}
222
+ env_copy: []
223
+ config:
224
+ override_dirname:
225
+ kv_sep: '='
226
+ item_sep: ','
227
+ exclude_keys: []
228
+ runtime:
229
+ version: 1.3.3
230
+ version_base: '1.1'
231
+ cwd: /workspace/latency-sensitive-bench/starVLA
232
+ config_sources:
233
+ - path: hydra.conf
234
+ schema: pkg
235
+ provider: hydra
236
+ - path: /workspace/latency-sensitive-bench/starVLA/examples/rl_games/config
237
+ schema: file
238
+ provider: main
239
+ - path: ''
240
+ schema: structured
241
+ provider: schema
242
+ output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers/hydra
243
+ choices:
244
+ cross_task_setup: null
245
+ checkpoint: default
246
+ mode: single
247
+ init: bridge
248
+ env: flappy
249
+ model: openvla
250
+ hydra/env: default
251
+ hydra/callbacks: null
252
+ hydra/job_logging: default
253
+ hydra/hydra_logging: default
254
+ hydra/hydra_help: default
255
+ hydra/help: default
256
+ hydra/sweeper: basic
257
+ hydra/launcher: basic
258
+ hydra/output: default
259
+ verbose: false
flappy_fix_latency_2_200ep_last_8_layers/hydra/.hydra/overrides.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - model=openvla
2
+ - env=flappy
3
+ - init=bridge
4
+ - mode=single
5
+ - run_id=flappy_fix_latency_2_200ep_last_8_layers
6
+ - run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
7
+ - seed=42
8
+ - wandb_entity=saberrr-zju
9
+ - wandb_project=starVLA_rl_games
10
+ - rl_games.env_eval.enabled=true
11
+ - checkpoint.sync.enabled=false
12
+ - checkpoint.sync.keep_last_n=0
13
+ - checkpoint.local.keep_last_n=1
14
+ - checkpoint.save_best_model=false
15
+ - checkpoint.save_pt_file=false
16
+ - trainer.is_resume=false
17
+ - trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_last_8_layers/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
18
+ - trainer.resume_step=0
19
+ - trainer.max_train_steps=5000
20
+ - trainer.num_warmup_steps=100
21
+ - trainer.save_interval=500
22
+ - trainer.eval_interval=100
23
+ - trainer.eval_num_batches=100
24
+ - trainer.eval_action_classification=true
25
+ - trainer.logging_frequency=1
26
+ - trainer.gradient_accumulation_steps=2
27
+ - trainer.distributed_backend=none
28
+ - trainer.learning_rate.base=2e-05
29
+ - trainer.learning_rate.qwen_vl_interface=1e-05
30
+ - trainer.learning_rate.action_model=0.0001
31
+ - trainer.lr_scheduler_type=cosine_with_min_lr
32
+ - trainer.scheduler_specific_kwargs.min_lr=1e-06
33
+ - trainer.freeze_llm_bottom_ratio=0.7778
34
+ - trainer.loss_scale.vla=1.0
35
+ - trainer.loss_scale.vlm=0.1
36
+ - trainer.max_grad_norm=1.0
37
+ - trainer.weight_decay=0.0
38
+ - trainer.gradient_clipping=1.0
39
+ - trainer.optimizer.name=AdamW
40
+ - trainer.optimizer.betas=[0.9,0.95]
41
+ - trainer.optimizer.eps=1e-08
42
+ - trainer.optimizer.weight_decay=1e-08
43
+ - trainer.optimizer.fused=false
44
+ - trainer.save_format=pt
45
+ - framework.name=QwenOFT
46
+ - framework.qwenvl.attn_implementation=flash_attention_2
47
+ - framework.qwenvl.enable_gradient_checkpointing=true
48
+ - framework.action_model.action_dim=7
49
+ - framework.action_model.action_env_dim=2
50
+ - framework.action_model.state_dim=7
51
+ - framework.action_model.loss_type=discrete_ce
52
+ - framework.action_model.action_horizon=1
53
+ - framework.action_model.future_action_window_size=0
54
+ - framework.action_model.past_action_window_size=0
55
+ - datasets.vla_data.include_state=true
56
+ - datasets.vla_data.action_type=discrete
57
+ - datasets.vla_data.sequential_step_sampling=false
58
+ - datasets.vla_data.shuffle=true
59
+ - datasets.vla_data.action_balance.enabled=false
60
+ - datasets.vla_data.action_balance.strategy=balanced_epoch
61
+ - datasets.vla_data.action_balance.action_key=action_id
62
+ - datasets.vla_data.action_balance.target_flap_fraction=0.3
63
+ - datasets.vla_data.action_balance.noop_id=0
64
+ - datasets.vla_data.action_balance.flap_id=1
65
+ - datasets.vla_data.latency_curriculum.enabled=false
66
+ - datasets.vla_data.latency_curriculum.strategy=exclusive
67
+ - datasets.vla_data.per_device_batch_size=64
68
+ - datasets.vla_data.num_workers=8
69
+ - datasets.vla_data.eval_num_workers=8
70
+ - datasets.vla_data.prefetch_factor=4
71
+ - datasets.vla_data.load_all_data_for_training=true
72
+ - datasets.vla_data.video_backend=torchvision_av
73
+ - datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
74
+ - datasets.vla_data.data_mix=flappy_train__bridge
75
+ - datasets.vla_data.eval_data_mix=flappy_train__bridge__val
76
+ - framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
77
+ - rl_games.task=flappy
78
+ - rl_games.model_alias=openvla
79
+ - rl_games.initialization_mode=bridge
80
+ - rl_games.action_carrier=bridge
81
+ - rl_games.env_eval.distributed_mode=none
82
+ - rl_games.env_eval.latency.mode=single
83
+ - rl_games.env_eval.frameskip=1
84
+ - rl_games.env_eval.image_size=224
85
+ - rl_games.env_eval.seed=42
86
+ - rl_games.env_eval.fixed_episode_seeds=true
87
+ - rl_games.env_eval.latency_seed_stride=0
88
+ - rl_games.env_eval.task_seed_stride=0
89
+ - rl_games.env_eval.latency.values=[0]
90
+ - rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
91
+ - rl_games.env_eval.mid_train.enabled=true
92
+ - rl_games.env_eval.mid_train.interval_steps=250
93
+ - rl_games.env_eval.mid_train.num_episodes=5
94
+ - rl_games.env_eval.mid_train.max_steps_per_episode=3600
95
+ - rl_games.env_eval.mid_train.latencies=[2]
96
+ - rl_games.env_eval.post_train.enabled=false
97
+ - rl_games.env_eval.post_train.num_episodes=5
98
+ - rl_games.env_eval.post_train.max_steps_per_episode=3600
99
+ - rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
flappy_fix_latency_2_200ep_last_8_layers/hydra/train_starvla_hydra.log ADDED
The diff for this file is too large to render. See raw diff
 
flappy_fix_latency_2_200ep_last_8_layers/summary.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"steps": 500}
2
+ {"steps": 1000}
3
+ {"steps": 1500}
4
+ {"steps": 2000}
5
+ {"steps": 2500}
6
+ {"steps": 3000}
7
+ {"steps": 3500}
8
+ {"steps": 4000}
9
+ {"steps": 4500}
10
+ {"steps": 5000}