Upload folder using huggingface_hub
Browse files- flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/model.safetensors +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/optimizer.bin +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/random_states_0.pkl +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/config.full.yaml +219 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/config.yaml +99 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics.json +127 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics_eval.json +127 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_5000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/config.yaml +217 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/hydra.yaml +266 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/overrides.yaml +106 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/train_starvla_hydra.log +0 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected/summary.jsonl +10 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:371cb744227687bb99bcad7f9ff2250cf06da75631359ad3eba4c6bc52570607
|
| 3 |
+
size 9785060316
|
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff343b81e73667493a90c8c0696872ab7f3ed1bf55fae59cf8b57f9785b040eb
|
| 3 |
+
size 9138230516
|
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1a7b363b24f7908954dadea5afd7d198c03fc118e223f915b959f6d4d178060
|
| 3 |
+
size 18276885098
|
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e27bbcb30ed5d67c406a486689500f03328292ad932e058af8734a4eacb28fea
|
| 3 |
+
size 14821
|
flappy_fix_latency_2_200ep_full_tuning_corrected/config.full.yaml
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
framework:
|
| 2 |
+
name: QwenOFT
|
| 3 |
+
qwenvl:
|
| 4 |
+
base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 5 |
+
attn_implementation: flash_attention_2
|
| 6 |
+
enable_gradient_checkpointing: true
|
| 7 |
+
action_model:
|
| 8 |
+
action_model_type: MLP
|
| 9 |
+
action_dim: 7
|
| 10 |
+
action_hidden_dim: 2560
|
| 11 |
+
future_action_window_size: 0
|
| 12 |
+
past_action_window_size: 0
|
| 13 |
+
loss_type: discrete_ce
|
| 14 |
+
state_dim: 7
|
| 15 |
+
action_horizon: 1
|
| 16 |
+
action_env_dim: 2
|
| 17 |
+
datasets:
|
| 18 |
+
vla_data:
|
| 19 |
+
dataset_py: lerobot_datasets
|
| 20 |
+
include_state: true
|
| 21 |
+
data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 22 |
+
data_mix: flappy_train__bridge
|
| 23 |
+
eval_data_mix: flappy_train__bridge__val
|
| 24 |
+
custom_mixtures_path: null
|
| 25 |
+
action_type: discrete
|
| 26 |
+
sequential_step_sampling: false
|
| 27 |
+
eval_sequential_step_sampling: null
|
| 28 |
+
num_workers: 8
|
| 29 |
+
eval_num_workers: 8
|
| 30 |
+
prefetch_factor: 4
|
| 31 |
+
persistent_workers: true
|
| 32 |
+
pin_memory: true
|
| 33 |
+
shuffle: true
|
| 34 |
+
action_balance:
|
| 35 |
+
enabled: false
|
| 36 |
+
strategy: balanced_epoch
|
| 37 |
+
action_key: action_id
|
| 38 |
+
target_flap_fraction: 0.3
|
| 39 |
+
noop_id: 0
|
| 40 |
+
flap_id: 1
|
| 41 |
+
latency_curriculum:
|
| 42 |
+
enabled: false
|
| 43 |
+
strategy: exclusive
|
| 44 |
+
latencies: null
|
| 45 |
+
phase_steps: null
|
| 46 |
+
per_device_batch_size: 32
|
| 47 |
+
load_all_data_for_training: true
|
| 48 |
+
num_obs_frames: 1
|
| 49 |
+
image_mode: single
|
| 50 |
+
stitch_grid:
|
| 51 |
+
- 2
|
| 52 |
+
- 2
|
| 53 |
+
obs_image_size: null
|
| 54 |
+
video_backend: torchvision_av
|
| 55 |
+
dataset:
|
| 56 |
+
source_hf: ''
|
| 57 |
+
config_name: null
|
| 58 |
+
source_subdir: null
|
| 59 |
+
converted_name: flappy_train
|
| 60 |
+
single_source_hf: ''
|
| 61 |
+
mixed_source_hf: ''
|
| 62 |
+
single_converted_name: flappy_train
|
| 63 |
+
mixed_converted_name: flappy_mixed_latency_train
|
| 64 |
+
single_latency_filter: null
|
| 65 |
+
mixed_latency_filter: null
|
| 66 |
+
force_download: false
|
| 67 |
+
setup_force: false
|
| 68 |
+
skip_verification: false
|
| 69 |
+
verify_rows: 200
|
| 70 |
+
max_episodes: null
|
| 71 |
+
episodes_per_latency: null
|
| 72 |
+
latency_filter: null
|
| 73 |
+
debug_subset:
|
| 74 |
+
enabled: false
|
| 75 |
+
max_episodes: 5
|
| 76 |
+
suffix: debug
|
| 77 |
+
base_model:
|
| 78 |
+
repo_id: Qwen/Qwen3-VL-4B-Instruct
|
| 79 |
+
initialization:
|
| 80 |
+
checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
|
| 81 |
+
checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
|
| 82 |
+
checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
|
| 83 |
+
trainer:
|
| 84 |
+
max_train_steps: 5000
|
| 85 |
+
num_warmup_steps: 100
|
| 86 |
+
save_interval: 500
|
| 87 |
+
eval_interval: 100
|
| 88 |
+
eval_num_batches: 100
|
| 89 |
+
per_latency_eval_num_batches: null
|
| 90 |
+
eval_action_classification: true
|
| 91 |
+
eval_action_classification_interval: null
|
| 92 |
+
cc_f1_tolerance: 1
|
| 93 |
+
learning_rate:
|
| 94 |
+
base: 2.0e-05
|
| 95 |
+
qwen_vl_interface: 1.0e-05
|
| 96 |
+
action_model: 0.0001
|
| 97 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 98 |
+
scheduler_specific_kwargs:
|
| 99 |
+
min_lr: 1.0e-06
|
| 100 |
+
freeze_modules: ''
|
| 101 |
+
freeze_llm_bottom_ratio: 0.0
|
| 102 |
+
loss_scale:
|
| 103 |
+
vla: 1.0
|
| 104 |
+
vlm: 0.1
|
| 105 |
+
max_grad_norm: 1.0
|
| 106 |
+
weight_decay: 0.0
|
| 107 |
+
logging_frequency: 1
|
| 108 |
+
gradient_clipping: 1.0
|
| 109 |
+
gradient_accumulation_steps: 4
|
| 110 |
+
distributed_backend: none
|
| 111 |
+
is_resume: false
|
| 112 |
+
pretrained_checkpoint: null
|
| 113 |
+
resume_step: 0
|
| 114 |
+
reload_modules: null
|
| 115 |
+
optimizer:
|
| 116 |
+
name: AdamW
|
| 117 |
+
betas:
|
| 118 |
+
- 0.9
|
| 119 |
+
- 0.95
|
| 120 |
+
eps: 1.0e-08
|
| 121 |
+
weight_decay: 1.0e-08
|
| 122 |
+
fused: true
|
| 123 |
+
save_format: pt
|
| 124 |
+
workspace_dir: WORKSPACE_DIR
|
| 125 |
+
run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 126 |
+
seed: 42
|
| 127 |
+
wandb_entity: saberrr-zju
|
| 128 |
+
wandb_project: starVLA_rl_games
|
| 129 |
+
auth:
|
| 130 |
+
env_file: null
|
| 131 |
+
hf_token_env: HF_TOKEN
|
| 132 |
+
wandb_api_key_env: WANDB_API_KEY
|
| 133 |
+
paths:
|
| 134 |
+
run_root_dir: results/Checkpoints
|
| 135 |
+
dataset_local_dir: playground/Datasets/rl_games
|
| 136 |
+
dataset_cache_dir: null
|
| 137 |
+
base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 138 |
+
accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 139 |
+
launch:
|
| 140 |
+
use_accelerate: true
|
| 141 |
+
gpus: null
|
| 142 |
+
num_processes: 1
|
| 143 |
+
dry_run: false
|
| 144 |
+
conda:
|
| 145 |
+
enabled: true
|
| 146 |
+
env_name: null
|
| 147 |
+
rl_games:
|
| 148 |
+
model_alias: openvla
|
| 149 |
+
env_eval:
|
| 150 |
+
image_size: 224
|
| 151 |
+
frameskip: 1
|
| 152 |
+
seed: 42
|
| 153 |
+
fixed_episode_seeds: true
|
| 154 |
+
latency_seed_stride: 0
|
| 155 |
+
task_seed_stride: 0
|
| 156 |
+
task_description: ''
|
| 157 |
+
enabled: true
|
| 158 |
+
distributed_mode: none
|
| 159 |
+
vectorized:
|
| 160 |
+
enabled: false
|
| 161 |
+
batch_size: 1
|
| 162 |
+
latency:
|
| 163 |
+
prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 164 |
+
mode: single
|
| 165 |
+
values:
|
| 166 |
+
- 0
|
| 167 |
+
mid_train:
|
| 168 |
+
enabled: true
|
| 169 |
+
interval_steps: 250
|
| 170 |
+
latencies:
|
| 171 |
+
- 2
|
| 172 |
+
num_episodes: 5
|
| 173 |
+
max_steps_per_episode: 3600
|
| 174 |
+
post_train:
|
| 175 |
+
enabled: false
|
| 176 |
+
latencies:
|
| 177 |
+
- 0
|
| 178 |
+
- 1
|
| 179 |
+
- 2
|
| 180 |
+
- 3
|
| 181 |
+
- 4
|
| 182 |
+
num_episodes: 5
|
| 183 |
+
max_steps_per_episode: 3600
|
| 184 |
+
task: flappy
|
| 185 |
+
initialization_mode: bridge
|
| 186 |
+
action_carrier: bridge
|
| 187 |
+
model: openvla
|
| 188 |
+
env: flappy
|
| 189 |
+
init: bridge
|
| 190 |
+
bridge_base_model:
|
| 191 |
+
repo_id:
|
| 192 |
+
openvla: Qwen/Qwen3-VL-4B-Instruct
|
| 193 |
+
pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
|
| 194 |
+
pi05: Qwen/Qwen3-VL-4B-Instruct
|
| 195 |
+
gr00t: Qwen/Qwen3-VL-4B-Instruct
|
| 196 |
+
local_dir:
|
| 197 |
+
openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 198 |
+
pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
|
| 199 |
+
pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 200 |
+
gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 201 |
+
mode: single
|
| 202 |
+
checkpoint:
|
| 203 |
+
load: auto
|
| 204 |
+
hf_repo_id: null
|
| 205 |
+
save_best_model: false
|
| 206 |
+
save_pt_file: false
|
| 207 |
+
local:
|
| 208 |
+
keep_last_n: 1
|
| 209 |
+
sync:
|
| 210 |
+
enabled: false
|
| 211 |
+
repo_id: null
|
| 212 |
+
keep_last_n: 0
|
| 213 |
+
sync_every_n_checkpoints: 1
|
| 214 |
+
resume_policy: local_latest
|
| 215 |
+
run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 216 |
+
output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 217 |
+
config_yaml: null
|
| 218 |
+
is_debug: false
|
| 219 |
+
version_id: '0.21'
|
flappy_fix_latency_2_200ep_full_tuning_corrected/config.yaml
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoint:
|
| 2 |
+
local:
|
| 3 |
+
keep_last_n: 1
|
| 4 |
+
save_best_model: false
|
| 5 |
+
save_pt_file: false
|
| 6 |
+
sync:
|
| 7 |
+
enabled: false
|
| 8 |
+
keep_last_n: 0
|
| 9 |
+
repo_id: null
|
| 10 |
+
datasets:
|
| 11 |
+
vla_data:
|
| 12 |
+
data_mix: flappy_train__bridge
|
| 13 |
+
dataset_py: lerobot_datasets
|
| 14 |
+
eval_data_mix: flappy_train__bridge__val
|
| 15 |
+
eval_num_workers: 8
|
| 16 |
+
include_state: true
|
| 17 |
+
latency_curriculum:
|
| 18 |
+
enabled: false
|
| 19 |
+
obs_image_size: null
|
| 20 |
+
per_device_batch_size: 32
|
| 21 |
+
persistent_workers: true
|
| 22 |
+
pin_memory: true
|
| 23 |
+
prefetch_factor: 4
|
| 24 |
+
framework:
|
| 25 |
+
action_model:
|
| 26 |
+
action_dim: 7
|
| 27 |
+
action_env_dim: 2
|
| 28 |
+
action_hidden_dim: 2560
|
| 29 |
+
action_horizon: 1
|
| 30 |
+
action_model_type: MLP
|
| 31 |
+
loss_type: discrete_ce
|
| 32 |
+
state_dim: 7
|
| 33 |
+
name: QwenOFT
|
| 34 |
+
qwenvl:
|
| 35 |
+
attn_implementation: flash_attention_2
|
| 36 |
+
base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 37 |
+
enable_gradient_checkpointing: true
|
| 38 |
+
output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 39 |
+
rl_games:
|
| 40 |
+
env_eval:
|
| 41 |
+
distributed_mode: none
|
| 42 |
+
enabled: true
|
| 43 |
+
fixed_episode_seeds: true
|
| 44 |
+
frameskip: 1
|
| 45 |
+
image_size: 224
|
| 46 |
+
latency:
|
| 47 |
+
prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 48 |
+
latency_seed_stride: 0
|
| 49 |
+
mid_train:
|
| 50 |
+
enabled: true
|
| 51 |
+
interval_steps: 250
|
| 52 |
+
latencies:
|
| 53 |
+
- 2
|
| 54 |
+
max_steps_per_episode: 3600
|
| 55 |
+
num_episodes: 5
|
| 56 |
+
seed: 42
|
| 57 |
+
task_description: ''
|
| 58 |
+
task_seed_stride: 0
|
| 59 |
+
vectorized:
|
| 60 |
+
enabled: false
|
| 61 |
+
model_alias: openvla
|
| 62 |
+
task: flappy
|
| 63 |
+
run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 64 |
+
run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 65 |
+
seed: 42
|
| 66 |
+
trainer:
|
| 67 |
+
distributed_backend: none
|
| 68 |
+
eval_action_classification: true
|
| 69 |
+
eval_action_classification_interval: null
|
| 70 |
+
eval_interval: 100
|
| 71 |
+
eval_num_batches: 100
|
| 72 |
+
freeze_llm_bottom_ratio: 0.0
|
| 73 |
+
freeze_modules: ''
|
| 74 |
+
gradient_accumulation_steps: 4
|
| 75 |
+
gradient_clipping: 1.0
|
| 76 |
+
is_resume: false
|
| 77 |
+
learning_rate:
|
| 78 |
+
action_model: 0.0001
|
| 79 |
+
base: 2.0e-05
|
| 80 |
+
qwen_vl_interface: 1.0e-05
|
| 81 |
+
logging_frequency: 1
|
| 82 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 83 |
+
max_train_steps: 5000
|
| 84 |
+
num_warmup_steps: 100
|
| 85 |
+
optimizer:
|
| 86 |
+
betas:
|
| 87 |
+
- 0.9
|
| 88 |
+
- 0.95
|
| 89 |
+
eps: 1.0e-08
|
| 90 |
+
fused: true
|
| 91 |
+
weight_decay: 1.0e-08
|
| 92 |
+
per_latency_eval_num_batches: null
|
| 93 |
+
pretrained_checkpoint: null
|
| 94 |
+
reload_modules: null
|
| 95 |
+
save_interval: 500
|
| 96 |
+
scheduler_specific_kwargs:
|
| 97 |
+
min_lr: 1.0e-06
|
| 98 |
+
wandb_entity: saberrr-zju
|
| 99 |
+
wandb_project: starVLA_rl_games
|
flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.6028500199317932,
|
| 6 |
+
0.3971499800682068,
|
| 7 |
+
0.0,
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0,
|
| 11 |
+
0.0
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.4890792667865753,
|
| 15 |
+
0.4890792667865753,
|
| 16 |
+
0.0,
|
| 17 |
+
0.0,
|
| 18 |
+
0.0,
|
| 19 |
+
0.0,
|
| 20 |
+
0.0
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
1.0,
|
| 24 |
+
1.0,
|
| 25 |
+
0.0,
|
| 26 |
+
0.0,
|
| 27 |
+
0.0,
|
| 28 |
+
0.0,
|
| 29 |
+
0.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
0.0,
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
0.0,
|
| 36 |
+
0.0,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
0.0,
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
1.0,
|
| 51 |
+
1.0,
|
| 52 |
+
0.0,
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0,
|
| 56 |
+
0.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
true
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0,
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0
|
| 77 |
+
],
|
| 78 |
+
"std": [
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"max": [
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0,
|
| 91 |
+
0.0,
|
| 92 |
+
0.0,
|
| 93 |
+
0.0,
|
| 94 |
+
0.0
|
| 95 |
+
],
|
| 96 |
+
"min": [
|
| 97 |
+
0.0,
|
| 98 |
+
0.0,
|
| 99 |
+
0.0,
|
| 100 |
+
0.0,
|
| 101 |
+
0.0,
|
| 102 |
+
0.0,
|
| 103 |
+
0.0
|
| 104 |
+
],
|
| 105 |
+
"q01": [
|
| 106 |
+
0.0,
|
| 107 |
+
0.0,
|
| 108 |
+
0.0,
|
| 109 |
+
0.0,
|
| 110 |
+
0.0,
|
| 111 |
+
0.0,
|
| 112 |
+
0.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
"num_transitions": 330734,
|
| 125 |
+
"num_trajectories": 180
|
| 126 |
+
}
|
| 127 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics_eval.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.7959861159324646,
|
| 6 |
+
0.2040138840675354,
|
| 7 |
+
0.0,
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0,
|
| 11 |
+
0.0
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.4030573070049286,
|
| 15 |
+
0.4030573070049286,
|
| 16 |
+
0.0,
|
| 17 |
+
0.0,
|
| 18 |
+
0.0,
|
| 19 |
+
0.0,
|
| 20 |
+
0.0
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
1.0,
|
| 24 |
+
1.0,
|
| 25 |
+
0.0,
|
| 26 |
+
0.0,
|
| 27 |
+
0.0,
|
| 28 |
+
0.0,
|
| 29 |
+
0.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
0.0,
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
0.0,
|
| 36 |
+
0.0,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
0.0,
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
1.0,
|
| 51 |
+
1.0,
|
| 52 |
+
0.0,
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0,
|
| 56 |
+
0.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
true
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0,
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0
|
| 77 |
+
],
|
| 78 |
+
"std": [
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"max": [
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0,
|
| 91 |
+
0.0,
|
| 92 |
+
0.0,
|
| 93 |
+
0.0,
|
| 94 |
+
0.0
|
| 95 |
+
],
|
| 96 |
+
"min": [
|
| 97 |
+
0.0,
|
| 98 |
+
0.0,
|
| 99 |
+
0.0,
|
| 100 |
+
0.0,
|
| 101 |
+
0.0,
|
| 102 |
+
0.0,
|
| 103 |
+
0.0
|
| 104 |
+
],
|
| 105 |
+
"q01": [
|
| 106 |
+
0.0,
|
| 107 |
+
0.0,
|
| 108 |
+
0.0,
|
| 109 |
+
0.0,
|
| 110 |
+
0.0,
|
| 111 |
+
0.0,
|
| 112 |
+
0.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
"num_transitions": 72000,
|
| 125 |
+
"num_trajectories": 20
|
| 126 |
+
}
|
| 127 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 79.39999999999868,
|
| 7 |
+
"mean_length": 659.2,
|
| 8 |
+
"std_reward": 62.80047770518764,
|
| 9 |
+
"std_length": 506.935656666603,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
31.900000000000162,
|
| 13 |
+
171.8999999999956,
|
| 14 |
+
50.40000000000037,
|
| 15 |
+
134.39999999999728
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
276,
|
| 20 |
+
1406,
|
| 21 |
+
425,
|
| 22 |
+
1103
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 2647,
|
| 26 |
+
"1": 649
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 79.39999999999868,
|
| 55 |
+
"mean_length": 659.2,
|
| 56 |
+
"std_reward": 62.80047770518764,
|
| 57 |
+
"std_length": 506.935656666603,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 79.39999999999868,
|
| 60 |
+
"macro_mean_length": 659.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 335.94000000001523,
|
| 7 |
+
"mean_length": 2724.8,
|
| 8 |
+
"std_reward": 154.00615052654007,
|
| 9 |
+
"std_length": 1240.415317544894,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
295.49999999999903,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
50.40000000000037
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
2399,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
425
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 10894,
|
| 26 |
+
"1": 2730
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 335.94000000001523,
|
| 55 |
+
"mean_length": 2724.8,
|
| 56 |
+
"std_reward": 154.00615052654007,
|
| 57 |
+
"std_length": 1240.415317544894,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 335.94000000001523,
|
| 60 |
+
"macro_mean_length": 2724.8,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 292.5400000000077,
|
| 7 |
+
"mean_length": 2375.8,
|
| 8 |
+
"std_reward": 145.61193082987145,
|
| 9 |
+
"std_length": 1173.9889948376858,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
52.70000000000039,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
269.8999999999946,
|
| 15 |
+
250.89999999999208
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
439,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
2197,
|
| 22 |
+
2043
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 9500,
|
| 26 |
+
"1": 2379
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 292.5400000000077,
|
| 55 |
+
"mean_length": 2375.8,
|
| 56 |
+
"std_reward": 145.61193082987145,
|
| 57 |
+
"std_length": 1173.9889948376858,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 292.5400000000077,
|
| 60 |
+
"macro_mean_length": 2375.8,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 390.9600000000196,
|
| 7 |
+
"mean_length": 3168.4,
|
| 8 |
+
"std_reward": 107.2800000000121,
|
| 9 |
+
"std_length": 863.2,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
176.3999999999954,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
1442,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 12633,
|
| 26 |
+
"1": 3209
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 390.9600000000196,
|
| 55 |
+
"mean_length": 3168.4,
|
| 56 |
+
"std_reward": 107.2800000000121,
|
| 57 |
+
"std_length": 863.2,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 390.9600000000196,
|
| 60 |
+
"macro_mean_length": 3168.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 282.3200000000154,
|
| 7 |
+
"mean_length": 2293.0,
|
| 8 |
+
"std_reward": 199.98049304870963,
|
| 9 |
+
"std_length": 1610.6818431956076,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
73.89999999999999,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
615,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
50,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 9138,
|
| 26 |
+
"1": 2327
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 282.3200000000154,
|
| 55 |
+
"mean_length": 2293.0,
|
| 56 |
+
"std_reward": 199.98049304870963,
|
| 57 |
+
"std_length": 1610.6818431956076,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 282.3200000000154,
|
| 60 |
+
"macro_mean_length": 2293.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 444.60000000002566,
|
| 7 |
+
"mean_length": 3600.0,
|
| 8 |
+
"std_reward": 0.0,
|
| 9 |
+
"std_length": 0.0,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 14346,
|
| 26 |
+
"1": 3654
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 444.60000000002566,
|
| 55 |
+
"mean_length": 3600.0,
|
| 56 |
+
"std_reward": 0.0,
|
| 57 |
+
"std_length": 0.0,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 444.60000000002566,
|
| 60 |
+
"macro_mean_length": 3600.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 65.11999999999905,
|
| 7 |
+
"mean_length": 543.4,
|
| 8 |
+
"std_reward": 57.274371231815906,
|
| 9 |
+
"std_length": 461.5935874771225,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
3.8999999999999986,
|
| 12 |
+
132.49999999999739,
|
| 13 |
+
54.90000000000042,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
130.39999999999745
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
50,
|
| 19 |
+
1084,
|
| 20 |
+
461,
|
| 21 |
+
50,
|
| 22 |
+
1072
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 2142,
|
| 26 |
+
"1": 575
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 65.11999999999905,
|
| 55 |
+
"mean_length": 543.4,
|
| 56 |
+
"std_reward": 57.274371231815906,
|
| 57 |
+
"std_length": 461.5935874771225,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 65.11999999999905,
|
| 60 |
+
"macro_mean_length": 543.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 370.4600000000205,
|
| 7 |
+
"mean_length": 3003.0,
|
| 8 |
+
"std_reward": 148.28000000001026,
|
| 9 |
+
"std_length": 1194.0,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
73.89999999999999,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
615,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 11891,
|
| 26 |
+
"1": 3124
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 370.4600000000205,
|
| 55 |
+
"mean_length": 3003.0,
|
| 56 |
+
"std_reward": 148.28000000001026,
|
| 57 |
+
"std_length": 1194.0,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 370.4600000000205,
|
| 60 |
+
"macro_mean_length": 3003.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 218.3800000000104,
|
| 7 |
+
"mean_length": 1778.2,
|
| 8 |
+
"std_reward": 184.83810646077453,
|
| 9 |
+
"std_length": 1488.5559982748382,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
73.89999999999999,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
54.90000000000042,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
73.89999999999999
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
615,
|
| 19 |
+
3600,
|
| 20 |
+
461,
|
| 21 |
+
3600,
|
| 22 |
+
615
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 7081,
|
| 26 |
+
"1": 1810
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 218.3800000000104,
|
| 55 |
+
"mean_length": 1778.2,
|
| 56 |
+
"std_reward": 184.83810646077453,
|
| 57 |
+
"std_length": 1488.5559982748382,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 218.3800000000104,
|
| 60 |
+
"macro_mean_length": 1778.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 317.72000000001435,
|
| 7 |
+
"mean_length": 2578.6,
|
| 8 |
+
"std_reward": 162.01944821534275,
|
| 9 |
+
"std_length": 1304.6114517357264,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
54.90000000000042,
|
| 14 |
+
199.89999999999435,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
461,
|
| 21 |
+
1632,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 10219,
|
| 26 |
+
"1": 2674
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 3000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 317.72000000001435,
|
| 55 |
+
"mean_length": 2578.6,
|
| 56 |
+
"std_reward": 162.01944821534275,
|
| 57 |
+
"std_length": 1304.6114517357264,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 317.72000000001435,
|
| 60 |
+
"macro_mean_length": 2578.6,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 365.76000000002057,
|
| 7 |
+
"mean_length": 2965.0,
|
| 8 |
+
"std_reward": 157.6800000000101,
|
| 9 |
+
"std_length": 1270.0,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
50.40000000000037
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
425
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 11775,
|
| 26 |
+
"1": 3050
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 3250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 365.76000000002057,
|
| 55 |
+
"mean_length": 2965.0,
|
| 56 |
+
"std_reward": 157.6800000000101,
|
| 57 |
+
"std_length": 1270.0,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 365.76000000002057,
|
| 60 |
+
"macro_mean_length": 2965.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 408.66000000001924,
|
| 7 |
+
"mean_length": 3311.2,
|
| 8 |
+
"std_reward": 71.8800000000128,
|
| 9 |
+
"std_length": 577.6,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
264.89999999999367,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
2156,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 13206,
|
| 26 |
+
"1": 3350
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 3500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 408.66000000001924,
|
| 55 |
+
"mean_length": 3311.2,
|
| 56 |
+
"std_reward": 71.8800000000128,
|
| 57 |
+
"std_length": 577.6,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 408.66000000001924,
|
| 60 |
+
"macro_mean_length": 3311.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 442.26000000002523,
|
| 7 |
+
"mean_length": 3582.4,
|
| 8 |
+
"std_reward": 4.680000000000791,
|
| 9 |
+
"std_length": 35.2,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
432.9000000000237
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3512
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 14270,
|
| 26 |
+
"1": 3642
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 3750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 442.26000000002523,
|
| 55 |
+
"mean_length": 3582.4,
|
| 56 |
+
"std_reward": 4.680000000000791,
|
| 57 |
+
"std_length": 35.2,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 442.26000000002523,
|
| 60 |
+
"macro_mean_length": 3582.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 442.26000000002523,
|
| 7 |
+
"mean_length": 3582.4,
|
| 8 |
+
"std_reward": 4.680000000000791,
|
| 9 |
+
"std_length": 35.2,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
432.9000000000237
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3512
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 14256,
|
| 26 |
+
"1": 3656
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 4000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 442.26000000002523,
|
| 55 |
+
"mean_length": 3582.4,
|
| 56 |
+
"std_reward": 4.680000000000791,
|
| 57 |
+
"std_length": 35.2,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 442.26000000002523,
|
| 60 |
+
"macro_mean_length": 3582.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 444.60000000002566,
|
| 7 |
+
"mean_length": 3600.0,
|
| 8 |
+
"std_reward": 0.0,
|
| 9 |
+
"std_length": 0.0,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 14338,
|
| 26 |
+
"1": 3662
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 4250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 444.60000000002566,
|
| 55 |
+
"mean_length": 3600.0,
|
| 56 |
+
"std_reward": 0.0,
|
| 57 |
+
"std_length": 0.0,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 444.60000000002566,
|
| 60 |
+
"macro_mean_length": 3600.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 365.76000000002057,
|
| 7 |
+
"mean_length": 2965.0,
|
| 8 |
+
"std_reward": 157.6800000000101,
|
| 9 |
+
"std_length": 1270.0,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
50.40000000000037
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
425
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 11866,
|
| 26 |
+
"1": 2959
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 4500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 365.76000000002057,
|
| 55 |
+
"mean_length": 2965.0,
|
| 56 |
+
"std_reward": 157.6800000000101,
|
| 57 |
+
"std_length": 1270.0,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 365.76000000002057,
|
| 60 |
+
"macro_mean_length": 2965.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 444.60000000002566,
|
| 7 |
+
"mean_length": 3600.0,
|
| 8 |
+
"std_reward": 0.0,
|
| 9 |
+
"std_length": 0.0,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 14315,
|
| 26 |
+
"1": 3685
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 4750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 444.60000000002566,
|
| 55 |
+
"mean_length": 3600.0,
|
| 56 |
+
"std_reward": 0.0,
|
| 57 |
+
"std_length": 0.0,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 444.60000000002566,
|
| 60 |
+
"macro_mean_length": 3600.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 176.28000000000358,
|
| 7 |
+
"mean_length": 1440.4,
|
| 8 |
+
"std_reward": 163.1896246702057,
|
| 9 |
+
"std_length": 1314.9364395285422,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
270.7999999999948,
|
| 12 |
+
18.200000000000014,
|
| 13 |
+
17.90000000000001,
|
| 14 |
+
129.89999999999748,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
2206,
|
| 19 |
+
166,
|
| 20 |
+
163,
|
| 21 |
+
1067,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 5754,
|
| 26 |
+
"1": 1448
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 176.28000000000358,
|
| 55 |
+
"mean_length": 1440.4,
|
| 56 |
+
"std_reward": 163.1896246702057,
|
| 57 |
+
"std_length": 1314.9364395285422,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 176.28000000000358,
|
| 60 |
+
"macro_mean_length": 1440.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_5000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 442.26000000002523,
|
| 7 |
+
"mean_length": 3582.4,
|
| 8 |
+
"std_reward": 4.680000000000791,
|
| 9 |
+
"std_length": 35.2,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
444.60000000002566,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
432.9000000000237
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
3600,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3512
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 14239,
|
| 26 |
+
"1": 3673
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 5000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 442.26000000002523,
|
| 55 |
+
"mean_length": 3582.4,
|
| 56 |
+
"std_reward": 4.680000000000791,
|
| 57 |
+
"std_length": 35.2,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 442.26000000002523,
|
| 60 |
+
"macro_mean_length": 3582.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 87.69999999999818,
|
| 7 |
+
"mean_length": 726.0,
|
| 8 |
+
"std_reward": 100.46123630535064,
|
| 9 |
+
"std_length": 810.7465695271242,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
264.89999999999367,
|
| 12 |
+
22.40000000000006,
|
| 13 |
+
12.899999999999974,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
134.39999999999728
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
2156,
|
| 19 |
+
199,
|
| 20 |
+
122,
|
| 21 |
+
50,
|
| 22 |
+
1103
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 2897,
|
| 26 |
+
"1": 733
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 87.69999999999818,
|
| 55 |
+
"mean_length": 726.0,
|
| 56 |
+
"std_reward": 100.46123630535064,
|
| 57 |
+
"std_length": 810.7465695271242,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 87.69999999999818,
|
| 60 |
+
"macro_mean_length": 726.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
framework:
|
| 2 |
+
qwenvl:
|
| 3 |
+
base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 4 |
+
attn_implementation: flash_attention_2
|
| 5 |
+
enable_gradient_checkpointing: true
|
| 6 |
+
action_model:
|
| 7 |
+
state_dim: 7
|
| 8 |
+
loss_type: discrete_ce
|
| 9 |
+
action_horizon: 1
|
| 10 |
+
future_action_window_size: 0
|
| 11 |
+
past_action_window_size: 0
|
| 12 |
+
action_dim: 7
|
| 13 |
+
action_env_dim: 2
|
| 14 |
+
name: QwenOFT
|
| 15 |
+
datasets:
|
| 16 |
+
vla_data:
|
| 17 |
+
dataset_py: lerobot_datasets
|
| 18 |
+
include_state: true
|
| 19 |
+
data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 20 |
+
data_mix: flappy_train__bridge
|
| 21 |
+
eval_data_mix: flappy_train__bridge__val
|
| 22 |
+
custom_mixtures_path: null
|
| 23 |
+
action_type: discrete
|
| 24 |
+
sequential_step_sampling: false
|
| 25 |
+
eval_sequential_step_sampling: null
|
| 26 |
+
num_workers: 8
|
| 27 |
+
eval_num_workers: 8
|
| 28 |
+
prefetch_factor: 4
|
| 29 |
+
persistent_workers: true
|
| 30 |
+
pin_memory: true
|
| 31 |
+
shuffle: true
|
| 32 |
+
action_balance:
|
| 33 |
+
enabled: false
|
| 34 |
+
strategy: balanced_epoch
|
| 35 |
+
action_key: action_id
|
| 36 |
+
target_flap_fraction: 0.3
|
| 37 |
+
noop_id: 0
|
| 38 |
+
flap_id: 1
|
| 39 |
+
latency_curriculum:
|
| 40 |
+
enabled: false
|
| 41 |
+
strategy: exclusive
|
| 42 |
+
latencies: null
|
| 43 |
+
phase_steps: null
|
| 44 |
+
per_device_batch_size: 32
|
| 45 |
+
load_all_data_for_training: true
|
| 46 |
+
num_obs_frames: 1
|
| 47 |
+
image_mode: single
|
| 48 |
+
stitch_grid:
|
| 49 |
+
- 2
|
| 50 |
+
- 2
|
| 51 |
+
obs_image_size: null
|
| 52 |
+
video_backend: torchvision_av
|
| 53 |
+
dataset:
|
| 54 |
+
source_hf: ${dataset.single_source_hf}
|
| 55 |
+
config_name: null
|
| 56 |
+
source_subdir: null
|
| 57 |
+
converted_name: ${dataset.single_converted_name}
|
| 58 |
+
single_source_hf: ''
|
| 59 |
+
mixed_source_hf: ''
|
| 60 |
+
single_converted_name: flappy_train
|
| 61 |
+
mixed_converted_name: flappy_mixed_latency_train
|
| 62 |
+
single_latency_filter: null
|
| 63 |
+
mixed_latency_filter: null
|
| 64 |
+
force_download: false
|
| 65 |
+
setup_force: false
|
| 66 |
+
skip_verification: false
|
| 67 |
+
verify_rows: 200
|
| 68 |
+
max_episodes: null
|
| 69 |
+
episodes_per_latency: null
|
| 70 |
+
latency_filter: ${dataset.single_latency_filter}
|
| 71 |
+
debug_subset:
|
| 72 |
+
enabled: false
|
| 73 |
+
max_episodes: 5
|
| 74 |
+
suffix: debug
|
| 75 |
+
base_model:
|
| 76 |
+
repo_id: ${bridge_base_model.repo_id.${model}}
|
| 77 |
+
initialization:
|
| 78 |
+
checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
|
| 79 |
+
checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
|
| 80 |
+
checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
|
| 81 |
+
trainer:
|
| 82 |
+
max_train_steps: 5000
|
| 83 |
+
num_warmup_steps: 100
|
| 84 |
+
save_interval: 500
|
| 85 |
+
eval_interval: 100
|
| 86 |
+
eval_num_batches: 100
|
| 87 |
+
per_latency_eval_num_batches: null
|
| 88 |
+
eval_action_classification: true
|
| 89 |
+
eval_action_classification_interval: null
|
| 90 |
+
cc_f1_tolerance: 1
|
| 91 |
+
learning_rate:
|
| 92 |
+
base: 2.0e-05
|
| 93 |
+
qwen_vl_interface: 1.0e-05
|
| 94 |
+
action_model: 0.0001
|
| 95 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 96 |
+
scheduler_specific_kwargs:
|
| 97 |
+
min_lr: 1.0e-06
|
| 98 |
+
freeze_modules: ''
|
| 99 |
+
freeze_llm_bottom_ratio: 0.0
|
| 100 |
+
loss_scale:
|
| 101 |
+
vla: 1.0
|
| 102 |
+
vlm: 0.1
|
| 103 |
+
max_grad_norm: 1.0
|
| 104 |
+
weight_decay: 0.0
|
| 105 |
+
logging_frequency: 1
|
| 106 |
+
gradient_clipping: 1.0
|
| 107 |
+
gradient_accumulation_steps: 4
|
| 108 |
+
distributed_backend: none
|
| 109 |
+
is_resume: false
|
| 110 |
+
pretrained_checkpoint: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
|
| 111 |
+
resume_step: 0
|
| 112 |
+
reload_modules: null
|
| 113 |
+
optimizer:
|
| 114 |
+
name: AdamW
|
| 115 |
+
betas:
|
| 116 |
+
- 0.9
|
| 117 |
+
- 0.95
|
| 118 |
+
eps: 1.0e-08
|
| 119 |
+
weight_decay: 1.0e-08
|
| 120 |
+
fused: true
|
| 121 |
+
save_format: pt
|
| 122 |
+
workspace_dir: WORKSPACE_DIR
|
| 123 |
+
run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 124 |
+
seed: 42
|
| 125 |
+
wandb_entity: saberrr-zju
|
| 126 |
+
wandb_project: starVLA_rl_games
|
| 127 |
+
auth:
|
| 128 |
+
env_file: null
|
| 129 |
+
hf_token_env: HF_TOKEN
|
| 130 |
+
wandb_api_key_env: WANDB_API_KEY
|
| 131 |
+
paths:
|
| 132 |
+
run_root_dir: results/Checkpoints
|
| 133 |
+
dataset_local_dir: playground/Datasets/rl_games
|
| 134 |
+
dataset_cache_dir: null
|
| 135 |
+
base_model_dir: ${bridge_base_model.local_dir.${model}}
|
| 136 |
+
accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 137 |
+
launch:
|
| 138 |
+
use_accelerate: true
|
| 139 |
+
gpus: null
|
| 140 |
+
num_processes: 1
|
| 141 |
+
dry_run: false
|
| 142 |
+
conda:
|
| 143 |
+
enabled: true
|
| 144 |
+
env_name: null
|
| 145 |
+
rl_games:
|
| 146 |
+
model_alias: openvla
|
| 147 |
+
env_eval:
|
| 148 |
+
image_size: 224
|
| 149 |
+
frameskip: 1
|
| 150 |
+
seed: 42
|
| 151 |
+
fixed_episode_seeds: true
|
| 152 |
+
latency_seed_stride: 0
|
| 153 |
+
task_seed_stride: 0
|
| 154 |
+
task_description: ''
|
| 155 |
+
enabled: true
|
| 156 |
+
distributed_mode: none
|
| 157 |
+
vectorized:
|
| 158 |
+
enabled: false
|
| 159 |
+
batch_size: 1
|
| 160 |
+
latency:
|
| 161 |
+
prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 162 |
+
mode: single
|
| 163 |
+
values:
|
| 164 |
+
- 0
|
| 165 |
+
mid_train:
|
| 166 |
+
enabled: true
|
| 167 |
+
interval_steps: 250
|
| 168 |
+
latencies:
|
| 169 |
+
- 2
|
| 170 |
+
num_episodes: 5
|
| 171 |
+
max_steps_per_episode: 3600
|
| 172 |
+
post_train:
|
| 173 |
+
enabled: false
|
| 174 |
+
latencies:
|
| 175 |
+
- 0
|
| 176 |
+
- 1
|
| 177 |
+
- 2
|
| 178 |
+
- 3
|
| 179 |
+
- 4
|
| 180 |
+
num_episodes: 5
|
| 181 |
+
max_steps_per_episode: 3600
|
| 182 |
+
task: flappy
|
| 183 |
+
initialization_mode: bridge
|
| 184 |
+
action_carrier: bridge
|
| 185 |
+
model: openvla
|
| 186 |
+
env: flappy
|
| 187 |
+
init: bridge
|
| 188 |
+
bridge_base_model:
|
| 189 |
+
repo_id:
|
| 190 |
+
openvla: Qwen/Qwen3-VL-4B-Instruct
|
| 191 |
+
pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
|
| 192 |
+
pi05: Qwen/Qwen3-VL-4B-Instruct
|
| 193 |
+
gr00t: Qwen/Qwen3-VL-4B-Instruct
|
| 194 |
+
local_dir:
|
| 195 |
+
openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 196 |
+
pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
|
| 197 |
+
pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 198 |
+
gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 199 |
+
mode: single
|
| 200 |
+
checkpoint:
|
| 201 |
+
load: auto
|
| 202 |
+
hf_repo_id: null
|
| 203 |
+
save_best_model: false
|
| 204 |
+
save_pt_file: false
|
| 205 |
+
local:
|
| 206 |
+
keep_last_n: 1
|
| 207 |
+
sync:
|
| 208 |
+
enabled: false
|
| 209 |
+
repo_id: null
|
| 210 |
+
keep_last_n: 0
|
| 211 |
+
sync_every_n_checkpoints: 1
|
| 212 |
+
resume_policy: local_latest
|
| 213 |
+
run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 214 |
+
output_dir: null
|
| 215 |
+
config_yaml: null
|
| 216 |
+
is_debug: false
|
| 217 |
+
version_id: '0.21'
|
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ${run_root_dir}/${run_id}/hydra
|
| 4 |
+
sweep:
|
| 5 |
+
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task:
|
| 115 |
+
- model=openvla
|
| 116 |
+
- env=flappy
|
| 117 |
+
- init=bridge
|
| 118 |
+
- mode=single
|
| 119 |
+
- run_id=flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 120 |
+
- run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 121 |
+
- seed=42
|
| 122 |
+
- wandb_entity=saberrr-zju
|
| 123 |
+
- wandb_project=starVLA_rl_games
|
| 124 |
+
- rl_games.env_eval.enabled=true
|
| 125 |
+
- checkpoint.sync.enabled=false
|
| 126 |
+
- checkpoint.sync.keep_last_n=0
|
| 127 |
+
- checkpoint.local.keep_last_n=1
|
| 128 |
+
- checkpoint.save_best_model=false
|
| 129 |
+
- checkpoint.save_pt_file=false
|
| 130 |
+
- trainer.is_resume=false
|
| 131 |
+
- trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
|
| 132 |
+
- trainer.resume_step=0
|
| 133 |
+
- trainer.max_train_steps=5000
|
| 134 |
+
- trainer.num_warmup_steps=100
|
| 135 |
+
- trainer.save_interval=500
|
| 136 |
+
- trainer.eval_interval=100
|
| 137 |
+
- trainer.eval_num_batches=100
|
| 138 |
+
- trainer.eval_action_classification=true
|
| 139 |
+
- trainer.logging_frequency=1
|
| 140 |
+
- trainer.gradient_accumulation_steps=4
|
| 141 |
+
- trainer.distributed_backend=none
|
| 142 |
+
- trainer.learning_rate.base=2e-05
|
| 143 |
+
- trainer.learning_rate.qwen_vl_interface=1e-05
|
| 144 |
+
- trainer.learning_rate.action_model=0.0001
|
| 145 |
+
- trainer.lr_scheduler_type=cosine_with_min_lr
|
| 146 |
+
- trainer.scheduler_specific_kwargs.min_lr=1e-06
|
| 147 |
+
- trainer.freeze_llm_bottom_ratio=0.0
|
| 148 |
+
- trainer.loss_scale.vla=1.0
|
| 149 |
+
- trainer.loss_scale.vlm=0.1
|
| 150 |
+
- trainer.max_grad_norm=1.0
|
| 151 |
+
- trainer.weight_decay=0.0
|
| 152 |
+
- trainer.gradient_clipping=1.0
|
| 153 |
+
- trainer.optimizer.name=AdamW
|
| 154 |
+
- trainer.optimizer.betas=[0.9,0.95]
|
| 155 |
+
- trainer.optimizer.eps=1e-08
|
| 156 |
+
- trainer.optimizer.weight_decay=1e-08
|
| 157 |
+
- trainer.optimizer.fused=true
|
| 158 |
+
- trainer.save_format=pt
|
| 159 |
+
- framework.name=QwenOFT
|
| 160 |
+
- framework.qwenvl.attn_implementation=flash_attention_2
|
| 161 |
+
- framework.qwenvl.enable_gradient_checkpointing=true
|
| 162 |
+
- framework.action_model.action_dim=7
|
| 163 |
+
- framework.action_model.action_env_dim=2
|
| 164 |
+
- framework.action_model.state_dim=7
|
| 165 |
+
- framework.action_model.loss_type=discrete_ce
|
| 166 |
+
- framework.action_model.action_horizon=1
|
| 167 |
+
- framework.action_model.future_action_window_size=0
|
| 168 |
+
- framework.action_model.past_action_window_size=0
|
| 169 |
+
- datasets.vla_data.include_state=true
|
| 170 |
+
- datasets.vla_data.action_type=discrete
|
| 171 |
+
- datasets.vla_data.sequential_step_sampling=false
|
| 172 |
+
- datasets.vla_data.shuffle=true
|
| 173 |
+
- datasets.vla_data.num_workers=8
|
| 174 |
+
- datasets.vla_data.eval_num_workers=8
|
| 175 |
+
- datasets.vla_data.prefetch_factor=4
|
| 176 |
+
- datasets.vla_data.persistent_workers=true
|
| 177 |
+
- datasets.vla_data.pin_memory=true
|
| 178 |
+
- datasets.vla_data.action_balance.enabled=false
|
| 179 |
+
- datasets.vla_data.action_balance.strategy=balanced_epoch
|
| 180 |
+
- datasets.vla_data.action_balance.action_key=action_id
|
| 181 |
+
- datasets.vla_data.action_balance.target_flap_fraction=0.3
|
| 182 |
+
- datasets.vla_data.action_balance.noop_id=0
|
| 183 |
+
- datasets.vla_data.action_balance.flap_id=1
|
| 184 |
+
- datasets.vla_data.latency_curriculum.enabled=false
|
| 185 |
+
- datasets.vla_data.latency_curriculum.strategy=exclusive
|
| 186 |
+
- datasets.vla_data.per_device_batch_size=32
|
| 187 |
+
- datasets.vla_data.num_workers=8
|
| 188 |
+
- datasets.vla_data.eval_num_workers=8
|
| 189 |
+
- datasets.vla_data.prefetch_factor=4
|
| 190 |
+
- datasets.vla_data.persistent_workers=true
|
| 191 |
+
- datasets.vla_data.pin_memory=true
|
| 192 |
+
- datasets.vla_data.load_all_data_for_training=true
|
| 193 |
+
- datasets.vla_data.video_backend=torchvision_av
|
| 194 |
+
- datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 195 |
+
- datasets.vla_data.data_mix=flappy_train__bridge
|
| 196 |
+
- datasets.vla_data.eval_data_mix=flappy_train__bridge__val
|
| 197 |
+
- framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 198 |
+
- rl_games.task=flappy
|
| 199 |
+
- rl_games.model_alias=openvla
|
| 200 |
+
- rl_games.initialization_mode=bridge
|
| 201 |
+
- rl_games.action_carrier=bridge
|
| 202 |
+
- rl_games.env_eval.distributed_mode=none
|
| 203 |
+
- rl_games.env_eval.latency.mode=single
|
| 204 |
+
- rl_games.env_eval.frameskip=1
|
| 205 |
+
- rl_games.env_eval.image_size=224
|
| 206 |
+
- rl_games.env_eval.seed=42
|
| 207 |
+
- rl_games.env_eval.fixed_episode_seeds=true
|
| 208 |
+
- rl_games.env_eval.latency_seed_stride=0
|
| 209 |
+
- rl_games.env_eval.task_seed_stride=0
|
| 210 |
+
- rl_games.env_eval.latency.values=[0]
|
| 211 |
+
- rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 212 |
+
- rl_games.env_eval.mid_train.enabled=true
|
| 213 |
+
- rl_games.env_eval.mid_train.interval_steps=250
|
| 214 |
+
- rl_games.env_eval.mid_train.num_episodes=5
|
| 215 |
+
- rl_games.env_eval.mid_train.max_steps_per_episode=3600
|
| 216 |
+
- rl_games.env_eval.mid_train.latencies=[2]
|
| 217 |
+
- rl_games.env_eval.post_train.enabled=false
|
| 218 |
+
- rl_games.env_eval.post_train.num_episodes=5
|
| 219 |
+
- rl_games.env_eval.post_train.max_steps_per_episode=3600
|
| 220 |
+
- rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
|
| 221 |
+
job:
|
| 222 |
+
name: train_starvla_hydra
|
| 223 |
+
chdir: false
|
| 224 |
+
override_dirname: checkpoint.local.keep_last_n=1,checkpoint.save_best_model=false,checkpoint.save_pt_file=false,checkpoint.sync.enabled=false,checkpoint.sync.keep_last_n=0,datasets.vla_data.action_balance.action_key=action_id,datasets.vla_data.action_balance.enabled=false,datasets.vla_data.action_balance.flap_id=1,datasets.vla_data.action_balance.noop_id=0,datasets.vla_data.action_balance.strategy=balanced_epoch,datasets.vla_data.action_balance.target_flap_fraction=0.3,datasets.vla_data.action_type=discrete,datasets.vla_data.data_mix=flappy_train__bridge,datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep,datasets.vla_data.eval_data_mix=flappy_train__bridge__val,datasets.vla_data.eval_num_workers=8,datasets.vla_data.eval_num_workers=8,datasets.vla_data.include_state=true,datasets.vla_data.latency_curriculum.enabled=false,datasets.vla_data.latency_curriculum.strategy=exclusive,datasets.vla_data.load_all_data_for_training=true,datasets.vla_data.num_workers=8,datasets.vla_data.num_workers=8,datasets.vla_data.per_device_batch_size=32,datasets.vla_data.persistent_workers=true,datasets.vla_data.persistent_workers=true,datasets.vla_data.pin_memory=true,datasets.vla_data.pin_memory=true,datasets.vla_data.prefetch_factor=4,datasets.vla_data.prefetch_factor=4,datasets.vla_data.sequential_step_sampling=false,datasets.vla_data.shuffle=true,datasets.vla_data.video_backend=torchvision_av,env=flappy,framework.action_model.action_dim=7,framework.action_model.action_env_dim=2,framework.action_model.action_horizon=1,framework.action_model.future_action_window_size=0,framework.action_model.loss_type=discrete_ce,framework.action_model.past_action_window_size=0,framework.action_model.state_dim=7,framework.name=QwenOFT,framework.qwenvl.attn_implementation=flash_attention_2,framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct,framework.qwenvl.enable_gradient_checkpointing=true,init=bridge,mode=single,model=openvla,rl_games.action_carrier=bridge,rl_games.env_eval.distributed_mode=none,rl_games.env_eval.enabled=true,rl_games.env_eval.fixed_episode_seeds=true,rl_games.env_eval.frameskip=1,rl_games.env_eval.image_size=224,rl_games.env_eval.latency.mode=single,rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json,rl_games.env_eval.latency.values=[0],rl_games.env_eval.latency_seed_stride=0,rl_games.env_eval.mid_train.enabled=true,rl_games.env_eval.mid_train.interval_steps=250,rl_games.env_eval.mid_train.latencies=[2],rl_games.env_eval.mid_train.max_steps_per_episode=3600,rl_games.env_eval.mid_train.num_episodes=5,rl_games.env_eval.post_train.enabled=false,rl_games.env_eval.post_train.latencies=[0,1,2,3,4],rl_games.env_eval.post_train.max_steps_per_episode=3600,rl_games.env_eval.post_train.num_episodes=5,rl_games.env_eval.seed=42,rl_games.env_eval.task_seed_stride=0,rl_games.initialization_mode=bridge,rl_games.model_alias=openvla,rl_games.task=flappy,run_id=flappy_fix_latency_2_200ep_full_tuning_corrected,run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints,seed=42,trainer.distributed_backend=none,trainer.eval_action_classification=true,trainer.eval_interval=100,trainer.eval_num_batches=100,trainer.freeze_llm_bottom_ratio=0.0,trainer.gradient_accumulation_steps=4,trainer.gradient_clipping=1.0,trainer.is_resume=false,trainer.learning_rate.action_model=0.0001,trainer.learning_rate.base=2e-05,trainer.learning_rate.qwen_vl_interface=1e-05,trainer.logging_frequency=1,trainer.loss_scale.vla=1.0,trainer.loss_scale.vlm=0.1,trainer.lr_scheduler_type=cosine_with_min_lr,trainer.max_grad_norm=1.0,trainer.max_train_steps=5000,trainer.num_warmup_steps=100,trainer.optimizer.betas=[0.9,0.95],trainer.optimizer.eps=1e-08,trainer.optimizer.fused=true,trainer.optimizer.name=AdamW,trainer.optimizer.weight_decay=1e-08,trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt,trainer.resume_step=0,trainer.save_format=pt,trainer.save_interval=500,trainer.scheduler_specific_kwargs.min_lr=1e-06,trainer.weight_decay=0.0,wandb_entity=saberrr-zju,wandb_project=starVLA_rl_games
|
| 225 |
+
id: ???
|
| 226 |
+
num: ???
|
| 227 |
+
config_name: train
|
| 228 |
+
env_set: {}
|
| 229 |
+
env_copy: []
|
| 230 |
+
config:
|
| 231 |
+
override_dirname:
|
| 232 |
+
kv_sep: '='
|
| 233 |
+
item_sep: ','
|
| 234 |
+
exclude_keys: []
|
| 235 |
+
runtime:
|
| 236 |
+
version: 1.3.3
|
| 237 |
+
version_base: '1.1'
|
| 238 |
+
cwd: /workspace/latency-sensitive-bench/starVLA
|
| 239 |
+
config_sources:
|
| 240 |
+
- path: hydra.conf
|
| 241 |
+
schema: pkg
|
| 242 |
+
provider: hydra
|
| 243 |
+
- path: /workspace/latency-sensitive-bench/starVLA/examples/rl_games/config
|
| 244 |
+
schema: file
|
| 245 |
+
provider: main
|
| 246 |
+
- path: ''
|
| 247 |
+
schema: structured
|
| 248 |
+
provider: schema
|
| 249 |
+
output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/hydra
|
| 250 |
+
choices:
|
| 251 |
+
cross_task_setup: null
|
| 252 |
+
checkpoint: default
|
| 253 |
+
mode: single
|
| 254 |
+
init: bridge
|
| 255 |
+
env: flappy
|
| 256 |
+
model: openvla
|
| 257 |
+
hydra/env: default
|
| 258 |
+
hydra/callbacks: null
|
| 259 |
+
hydra/job_logging: default
|
| 260 |
+
hydra/hydra_logging: default
|
| 261 |
+
hydra/hydra_help: default
|
| 262 |
+
hydra/help: default
|
| 263 |
+
hydra/sweeper: basic
|
| 264 |
+
hydra/launcher: basic
|
| 265 |
+
hydra/output: default
|
| 266 |
+
verbose: false
|
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- model=openvla
|
| 2 |
+
- env=flappy
|
| 3 |
+
- init=bridge
|
| 4 |
+
- mode=single
|
| 5 |
+
- run_id=flappy_fix_latency_2_200ep_full_tuning_corrected
|
| 6 |
+
- run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 7 |
+
- seed=42
|
| 8 |
+
- wandb_entity=saberrr-zju
|
| 9 |
+
- wandb_project=starVLA_rl_games
|
| 10 |
+
- rl_games.env_eval.enabled=true
|
| 11 |
+
- checkpoint.sync.enabled=false
|
| 12 |
+
- checkpoint.sync.keep_last_n=0
|
| 13 |
+
- checkpoint.local.keep_last_n=1
|
| 14 |
+
- checkpoint.save_best_model=false
|
| 15 |
+
- checkpoint.save_pt_file=false
|
| 16 |
+
- trainer.is_resume=false
|
| 17 |
+
- trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
|
| 18 |
+
- trainer.resume_step=0
|
| 19 |
+
- trainer.max_train_steps=5000
|
| 20 |
+
- trainer.num_warmup_steps=100
|
| 21 |
+
- trainer.save_interval=500
|
| 22 |
+
- trainer.eval_interval=100
|
| 23 |
+
- trainer.eval_num_batches=100
|
| 24 |
+
- trainer.eval_action_classification=true
|
| 25 |
+
- trainer.logging_frequency=1
|
| 26 |
+
- trainer.gradient_accumulation_steps=4
|
| 27 |
+
- trainer.distributed_backend=none
|
| 28 |
+
- trainer.learning_rate.base=2e-05
|
| 29 |
+
- trainer.learning_rate.qwen_vl_interface=1e-05
|
| 30 |
+
- trainer.learning_rate.action_model=0.0001
|
| 31 |
+
- trainer.lr_scheduler_type=cosine_with_min_lr
|
| 32 |
+
- trainer.scheduler_specific_kwargs.min_lr=1e-06
|
| 33 |
+
- trainer.freeze_llm_bottom_ratio=0.0
|
| 34 |
+
- trainer.loss_scale.vla=1.0
|
| 35 |
+
- trainer.loss_scale.vlm=0.1
|
| 36 |
+
- trainer.max_grad_norm=1.0
|
| 37 |
+
- trainer.weight_decay=0.0
|
| 38 |
+
- trainer.gradient_clipping=1.0
|
| 39 |
+
- trainer.optimizer.name=AdamW
|
| 40 |
+
- trainer.optimizer.betas=[0.9,0.95]
|
| 41 |
+
- trainer.optimizer.eps=1e-08
|
| 42 |
+
- trainer.optimizer.weight_decay=1e-08
|
| 43 |
+
- trainer.optimizer.fused=true
|
| 44 |
+
- trainer.save_format=pt
|
| 45 |
+
- framework.name=QwenOFT
|
| 46 |
+
- framework.qwenvl.attn_implementation=flash_attention_2
|
| 47 |
+
- framework.qwenvl.enable_gradient_checkpointing=true
|
| 48 |
+
- framework.action_model.action_dim=7
|
| 49 |
+
- framework.action_model.action_env_dim=2
|
| 50 |
+
- framework.action_model.state_dim=7
|
| 51 |
+
- framework.action_model.loss_type=discrete_ce
|
| 52 |
+
- framework.action_model.action_horizon=1
|
| 53 |
+
- framework.action_model.future_action_window_size=0
|
| 54 |
+
- framework.action_model.past_action_window_size=0
|
| 55 |
+
- datasets.vla_data.include_state=true
|
| 56 |
+
- datasets.vla_data.action_type=discrete
|
| 57 |
+
- datasets.vla_data.sequential_step_sampling=false
|
| 58 |
+
- datasets.vla_data.shuffle=true
|
| 59 |
+
- datasets.vla_data.num_workers=8
|
| 60 |
+
- datasets.vla_data.eval_num_workers=8
|
| 61 |
+
- datasets.vla_data.prefetch_factor=4
|
| 62 |
+
- datasets.vla_data.persistent_workers=true
|
| 63 |
+
- datasets.vla_data.pin_memory=true
|
| 64 |
+
- datasets.vla_data.action_balance.enabled=false
|
| 65 |
+
- datasets.vla_data.action_balance.strategy=balanced_epoch
|
| 66 |
+
- datasets.vla_data.action_balance.action_key=action_id
|
| 67 |
+
- datasets.vla_data.action_balance.target_flap_fraction=0.3
|
| 68 |
+
- datasets.vla_data.action_balance.noop_id=0
|
| 69 |
+
- datasets.vla_data.action_balance.flap_id=1
|
| 70 |
+
- datasets.vla_data.latency_curriculum.enabled=false
|
| 71 |
+
- datasets.vla_data.latency_curriculum.strategy=exclusive
|
| 72 |
+
- datasets.vla_data.per_device_batch_size=32
|
| 73 |
+
- datasets.vla_data.num_workers=8
|
| 74 |
+
- datasets.vla_data.eval_num_workers=8
|
| 75 |
+
- datasets.vla_data.prefetch_factor=4
|
| 76 |
+
- datasets.vla_data.persistent_workers=true
|
| 77 |
+
- datasets.vla_data.pin_memory=true
|
| 78 |
+
- datasets.vla_data.load_all_data_for_training=true
|
| 79 |
+
- datasets.vla_data.video_backend=torchvision_av
|
| 80 |
+
- datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 81 |
+
- datasets.vla_data.data_mix=flappy_train__bridge
|
| 82 |
+
- datasets.vla_data.eval_data_mix=flappy_train__bridge__val
|
| 83 |
+
- framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 84 |
+
- rl_games.task=flappy
|
| 85 |
+
- rl_games.model_alias=openvla
|
| 86 |
+
- rl_games.initialization_mode=bridge
|
| 87 |
+
- rl_games.action_carrier=bridge
|
| 88 |
+
- rl_games.env_eval.distributed_mode=none
|
| 89 |
+
- rl_games.env_eval.latency.mode=single
|
| 90 |
+
- rl_games.env_eval.frameskip=1
|
| 91 |
+
- rl_games.env_eval.image_size=224
|
| 92 |
+
- rl_games.env_eval.seed=42
|
| 93 |
+
- rl_games.env_eval.fixed_episode_seeds=true
|
| 94 |
+
- rl_games.env_eval.latency_seed_stride=0
|
| 95 |
+
- rl_games.env_eval.task_seed_stride=0
|
| 96 |
+
- rl_games.env_eval.latency.values=[0]
|
| 97 |
+
- rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 98 |
+
- rl_games.env_eval.mid_train.enabled=true
|
| 99 |
+
- rl_games.env_eval.mid_train.interval_steps=250
|
| 100 |
+
- rl_games.env_eval.mid_train.num_episodes=5
|
| 101 |
+
- rl_games.env_eval.mid_train.max_steps_per_episode=3600
|
| 102 |
+
- rl_games.env_eval.mid_train.latencies=[2]
|
| 103 |
+
- rl_games.env_eval.post_train.enabled=false
|
| 104 |
+
- rl_games.env_eval.post_train.num_episodes=5
|
| 105 |
+
- rl_games.env_eval.post_train.max_steps_per_episode=3600
|
| 106 |
+
- rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
|
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/train_starvla_hydra.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
flappy_fix_latency_2_200ep_full_tuning_corrected/summary.jsonl
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 500}
|
| 2 |
+
{"steps": 1000}
|
| 3 |
+
{"steps": 1500}
|
| 4 |
+
{"steps": 2000}
|
| 5 |
+
{"steps": 2500}
|
| 6 |
+
{"steps": 3000}
|
| 7 |
+
{"steps": 3500}
|
| 8 |
+
{"steps": 4000}
|
| 9 |
+
{"steps": 4500}
|
| 10 |
+
{"steps": 5000}
|