Upload folder using huggingface_hub
Browse files- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/model.safetensors +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/optimizer.bin +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/random_states_0.pkl +3 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.full.yaml +224 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.yaml +103 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics.json +127 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics_eval.json +127 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_250.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_3000.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_500.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_750.json +63 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/config.yaml +222 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/hydra.yaml +341 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/overrides.yaml +181 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/train_starvla_hydra.log +0 -0
- flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/summary.jsonl +6 -0
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:371cb744227687bb99bcad7f9ff2250cf06da75631359ad3eba4c6bc52570607
|
| 3 |
+
size 9785060316
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dee60fc99cf35e2e6d5908129093c8c66c2cafb62d3b87a236d45acd86522d59
|
| 3 |
+
size 9138230516
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0bf50374f6a08b664673827a21dec3f972a8770dc56af310e8a6c225af8b023
|
| 3 |
+
size 18276885098
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/steps_3000_state/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb1d26c3be5ce9ab794023e41262ab96e409e1e140d3759482adef7778d779a8
|
| 3 |
+
size 14821
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.full.yaml
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
framework:
|
| 2 |
+
name: QwenOFT
|
| 3 |
+
qwenvl:
|
| 4 |
+
base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 5 |
+
attn_implementation: flash_attention_2
|
| 6 |
+
enable_gradient_checkpointing: true
|
| 7 |
+
action_model:
|
| 8 |
+
action_model_type: MLP
|
| 9 |
+
action_dim: 7
|
| 10 |
+
action_hidden_dim: 2560
|
| 11 |
+
future_action_window_size: 0
|
| 12 |
+
past_action_window_size: 0
|
| 13 |
+
loss_type: discrete_ce
|
| 14 |
+
state_dim: 7
|
| 15 |
+
action_horizon: 1
|
| 16 |
+
action_env_dim: 2
|
| 17 |
+
datasets:
|
| 18 |
+
vla_data:
|
| 19 |
+
dataset_py: lerobot_datasets
|
| 20 |
+
include_state: true
|
| 21 |
+
data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 22 |
+
data_mix: flappy_train__bridge
|
| 23 |
+
eval_data_mix: flappy_train__bridge__val
|
| 24 |
+
custom_mixtures_path: null
|
| 25 |
+
action_type: discrete
|
| 26 |
+
sequential_step_sampling: false
|
| 27 |
+
eval_sequential_step_sampling: null
|
| 28 |
+
num_workers: 8
|
| 29 |
+
eval_num_workers: 8
|
| 30 |
+
prefetch_factor: 4
|
| 31 |
+
persistent_workers: true
|
| 32 |
+
pin_memory: true
|
| 33 |
+
shuffle: true
|
| 34 |
+
action_balance:
|
| 35 |
+
enabled: false
|
| 36 |
+
strategy: balanced_epoch
|
| 37 |
+
action_key: action_id
|
| 38 |
+
target_flap_fraction: 0.3
|
| 39 |
+
noop_id: 0
|
| 40 |
+
flap_id: 1
|
| 41 |
+
latency_curriculum:
|
| 42 |
+
enabled: false
|
| 43 |
+
strategy: exclusive
|
| 44 |
+
latencies: null
|
| 45 |
+
phase_steps: null
|
| 46 |
+
per_device_batch_size: 32
|
| 47 |
+
load_all_data_for_training: true
|
| 48 |
+
num_obs_frames: 1
|
| 49 |
+
image_mode: single
|
| 50 |
+
stitch_grid:
|
| 51 |
+
- 2
|
| 52 |
+
- 2
|
| 53 |
+
obs_image_size: null
|
| 54 |
+
video_backend: torchvision_av
|
| 55 |
+
dataset:
|
| 56 |
+
source_hf: ''
|
| 57 |
+
config_name: null
|
| 58 |
+
source_subdir: null
|
| 59 |
+
converted_name: flappy_train
|
| 60 |
+
single_source_hf: ''
|
| 61 |
+
mixed_source_hf: ''
|
| 62 |
+
single_converted_name: flappy_train
|
| 63 |
+
mixed_converted_name: flappy_mixed_latency_train
|
| 64 |
+
single_latency_filter: null
|
| 65 |
+
mixed_latency_filter: null
|
| 66 |
+
force_download: false
|
| 67 |
+
setup_force: false
|
| 68 |
+
skip_verification: false
|
| 69 |
+
verify_rows: 200
|
| 70 |
+
max_episodes: null
|
| 71 |
+
episodes_per_latency: null
|
| 72 |
+
latency_filter: null
|
| 73 |
+
debug_subset:
|
| 74 |
+
enabled: false
|
| 75 |
+
max_episodes: 5
|
| 76 |
+
suffix: debug
|
| 77 |
+
base_model:
|
| 78 |
+
repo_id: Qwen/Qwen3-VL-4B-Instruct
|
| 79 |
+
initialization:
|
| 80 |
+
checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
|
| 81 |
+
checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
|
| 82 |
+
checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
|
| 83 |
+
trainer:
|
| 84 |
+
max_train_steps: 3000
|
| 85 |
+
num_warmup_steps: 100
|
| 86 |
+
save_interval: 500
|
| 87 |
+
eval_interval: 250
|
| 88 |
+
eval_num_batches: 50
|
| 89 |
+
per_latency_eval_num_batches: null
|
| 90 |
+
eval_action_classification: true
|
| 91 |
+
eval_action_classification_interval: null
|
| 92 |
+
cc_f1_tolerance: 1
|
| 93 |
+
learning_rate:
|
| 94 |
+
base: 2.0e-05
|
| 95 |
+
qwen_vl_interface: 1.0e-05
|
| 96 |
+
action_model: 0.0001
|
| 97 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 98 |
+
scheduler_specific_kwargs:
|
| 99 |
+
min_lr: 1.0e-06
|
| 100 |
+
freeze_modules: ''
|
| 101 |
+
freeze_vit: false
|
| 102 |
+
freeze_tied_embedding: false
|
| 103 |
+
freeze_llm_layers: []
|
| 104 |
+
loss_scale:
|
| 105 |
+
vla: 1.0
|
| 106 |
+
vlm: 0.1
|
| 107 |
+
max_grad_norm: 1.0
|
| 108 |
+
weight_decay: 0.0
|
| 109 |
+
logging_frequency: 1
|
| 110 |
+
profile_timing:
|
| 111 |
+
enabled: false
|
| 112 |
+
log_interval: 10
|
| 113 |
+
gradient_clipping: 1.0
|
| 114 |
+
gradient_accumulation_steps: 1
|
| 115 |
+
distributed_backend: none
|
| 116 |
+
is_resume: false
|
| 117 |
+
pretrained_checkpoint: null
|
| 118 |
+
resume_step: 0
|
| 119 |
+
reload_modules: null
|
| 120 |
+
optimizer:
|
| 121 |
+
name: AdamW
|
| 122 |
+
betas:
|
| 123 |
+
- 0.9
|
| 124 |
+
- 0.95
|
| 125 |
+
eps: 1.0e-08
|
| 126 |
+
weight_decay: 1.0e-08
|
| 127 |
+
fused: true
|
| 128 |
+
save_format: pt
|
| 129 |
+
workspace_dir: WORKSPACE_DIR
|
| 130 |
+
run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 131 |
+
seed: 42
|
| 132 |
+
wandb_entity: saberrr-zju
|
| 133 |
+
wandb_project: starVLA_rl_games
|
| 134 |
+
auth:
|
| 135 |
+
env_file: null
|
| 136 |
+
hf_token_env: HF_TOKEN
|
| 137 |
+
wandb_api_key_env: WANDB_API_KEY
|
| 138 |
+
paths:
|
| 139 |
+
run_root_dir: results/Checkpoints
|
| 140 |
+
dataset_local_dir: data/flappy_fix_latency_2_200ep
|
| 141 |
+
dataset_cache_dir: null
|
| 142 |
+
base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 143 |
+
accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 144 |
+
launch:
|
| 145 |
+
use_accelerate: true
|
| 146 |
+
gpus: null
|
| 147 |
+
num_processes: 1
|
| 148 |
+
dry_run: false
|
| 149 |
+
conda:
|
| 150 |
+
enabled: true
|
| 151 |
+
env_name: null
|
| 152 |
+
rl_games:
|
| 153 |
+
model_alias: openvla
|
| 154 |
+
env_eval:
|
| 155 |
+
image_size: 224
|
| 156 |
+
frameskip: 1
|
| 157 |
+
seed: 42
|
| 158 |
+
fixed_episode_seeds: true
|
| 159 |
+
latency_seed_stride: 0
|
| 160 |
+
task_seed_stride: 0
|
| 161 |
+
task_description: ''
|
| 162 |
+
enabled: true
|
| 163 |
+
distributed_mode: none
|
| 164 |
+
vectorized:
|
| 165 |
+
enabled: false
|
| 166 |
+
batch_size: 1
|
| 167 |
+
latency:
|
| 168 |
+
prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 169 |
+
mode: single
|
| 170 |
+
values:
|
| 171 |
+
- 0
|
| 172 |
+
mid_train:
|
| 173 |
+
enabled: true
|
| 174 |
+
interval_steps: 250
|
| 175 |
+
latencies:
|
| 176 |
+
- 2
|
| 177 |
+
num_episodes: 5
|
| 178 |
+
max_steps_per_episode: 3600
|
| 179 |
+
post_train:
|
| 180 |
+
enabled: false
|
| 181 |
+
latencies:
|
| 182 |
+
- 0
|
| 183 |
+
- 1
|
| 184 |
+
- 2
|
| 185 |
+
- 3
|
| 186 |
+
- 4
|
| 187 |
+
num_episodes: 5
|
| 188 |
+
max_steps_per_episode: 3600
|
| 189 |
+
task: flappy
|
| 190 |
+
initialization_mode: bridge
|
| 191 |
+
action_carrier: bridge
|
| 192 |
+
model: openvla
|
| 193 |
+
env: flappy
|
| 194 |
+
init: bridge
|
| 195 |
+
bridge_base_model:
|
| 196 |
+
repo_id:
|
| 197 |
+
openvla: Qwen/Qwen3-VL-4B-Instruct
|
| 198 |
+
pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
|
| 199 |
+
pi05: Qwen/Qwen3-VL-4B-Instruct
|
| 200 |
+
gr00t: Qwen/Qwen3-VL-4B-Instruct
|
| 201 |
+
local_dir:
|
| 202 |
+
openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 203 |
+
pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
|
| 204 |
+
pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 205 |
+
gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 206 |
+
mode: single
|
| 207 |
+
checkpoint:
|
| 208 |
+
load: none
|
| 209 |
+
hf_repo_id: null
|
| 210 |
+
save_best_model: false
|
| 211 |
+
save_pt_file: false
|
| 212 |
+
local:
|
| 213 |
+
keep_last_n: 1
|
| 214 |
+
sync:
|
| 215 |
+
enabled: false
|
| 216 |
+
repo_id: null
|
| 217 |
+
keep_last_n: 0
|
| 218 |
+
sync_every_n_checkpoints: 1
|
| 219 |
+
resume_policy: local_latest
|
| 220 |
+
run_id: flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 221 |
+
output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 222 |
+
config_yaml: null
|
| 223 |
+
is_debug: false
|
| 224 |
+
version_id: '0.21'
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/config.yaml
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
checkpoint:
|
| 2 |
+
local:
|
| 3 |
+
keep_last_n: 1
|
| 4 |
+
save_best_model: false
|
| 5 |
+
save_pt_file: false
|
| 6 |
+
sync:
|
| 7 |
+
enabled: false
|
| 8 |
+
keep_last_n: 0
|
| 9 |
+
repo_id: null
|
| 10 |
+
datasets:
|
| 11 |
+
vla_data:
|
| 12 |
+
data_mix: flappy_train__bridge
|
| 13 |
+
dataset_py: lerobot_datasets
|
| 14 |
+
eval_data_mix: flappy_train__bridge__val
|
| 15 |
+
eval_num_workers: 8
|
| 16 |
+
include_state: true
|
| 17 |
+
latency_curriculum:
|
| 18 |
+
enabled: false
|
| 19 |
+
obs_image_size: null
|
| 20 |
+
per_device_batch_size: 32
|
| 21 |
+
persistent_workers: true
|
| 22 |
+
pin_memory: true
|
| 23 |
+
prefetch_factor: 4
|
| 24 |
+
framework:
|
| 25 |
+
action_model:
|
| 26 |
+
action_dim: 7
|
| 27 |
+
action_env_dim: 2
|
| 28 |
+
action_hidden_dim: 2560
|
| 29 |
+
action_horizon: 1
|
| 30 |
+
action_model_type: MLP
|
| 31 |
+
loss_type: discrete_ce
|
| 32 |
+
state_dim: 7
|
| 33 |
+
name: QwenOFT
|
| 34 |
+
qwenvl:
|
| 35 |
+
attn_implementation: flash_attention_2
|
| 36 |
+
base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 37 |
+
enable_gradient_checkpointing: true
|
| 38 |
+
output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 39 |
+
rl_games:
|
| 40 |
+
env_eval:
|
| 41 |
+
distributed_mode: none
|
| 42 |
+
enabled: true
|
| 43 |
+
fixed_episode_seeds: true
|
| 44 |
+
frameskip: 1
|
| 45 |
+
image_size: 224
|
| 46 |
+
latency:
|
| 47 |
+
prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 48 |
+
latency_seed_stride: 0
|
| 49 |
+
mid_train:
|
| 50 |
+
enabled: true
|
| 51 |
+
interval_steps: 250
|
| 52 |
+
latencies:
|
| 53 |
+
- 2
|
| 54 |
+
max_steps_per_episode: 3600
|
| 55 |
+
num_episodes: 5
|
| 56 |
+
seed: 42
|
| 57 |
+
task_description: ''
|
| 58 |
+
task_seed_stride: 0
|
| 59 |
+
vectorized:
|
| 60 |
+
enabled: false
|
| 61 |
+
model_alias: openvla
|
| 62 |
+
task: flappy
|
| 63 |
+
run_id: flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 64 |
+
run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 65 |
+
seed: 42
|
| 66 |
+
trainer:
|
| 67 |
+
distributed_backend: none
|
| 68 |
+
eval_action_classification: true
|
| 69 |
+
eval_action_classification_interval: null
|
| 70 |
+
eval_interval: 250
|
| 71 |
+
eval_num_batches: 50
|
| 72 |
+
freeze_llm_layers: []
|
| 73 |
+
freeze_modules: ''
|
| 74 |
+
freeze_tied_embedding: false
|
| 75 |
+
freeze_vit: false
|
| 76 |
+
gradient_accumulation_steps: 1
|
| 77 |
+
gradient_clipping: 1.0
|
| 78 |
+
is_resume: false
|
| 79 |
+
learning_rate:
|
| 80 |
+
action_model: 0.0001
|
| 81 |
+
base: 2.0e-05
|
| 82 |
+
qwen_vl_interface: 1.0e-05
|
| 83 |
+
logging_frequency: 1
|
| 84 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 85 |
+
max_train_steps: 3000
|
| 86 |
+
num_warmup_steps: 100
|
| 87 |
+
optimizer:
|
| 88 |
+
betas:
|
| 89 |
+
- 0.9
|
| 90 |
+
- 0.95
|
| 91 |
+
eps: 1.0e-08
|
| 92 |
+
fused: true
|
| 93 |
+
weight_decay: 1.0e-08
|
| 94 |
+
per_latency_eval_num_batches: null
|
| 95 |
+
pretrained_checkpoint: null
|
| 96 |
+
profile_timing:
|
| 97 |
+
enabled: false
|
| 98 |
+
reload_modules: null
|
| 99 |
+
save_interval: 500
|
| 100 |
+
scheduler_specific_kwargs:
|
| 101 |
+
min_lr: 1.0e-06
|
| 102 |
+
wandb_entity: saberrr-zju
|
| 103 |
+
wandb_project: starVLA_rl_games
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.6028500199317932,
|
| 6 |
+
0.3971499800682068,
|
| 7 |
+
0.0,
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0,
|
| 11 |
+
0.0
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.4890792667865753,
|
| 15 |
+
0.4890792667865753,
|
| 16 |
+
0.0,
|
| 17 |
+
0.0,
|
| 18 |
+
0.0,
|
| 19 |
+
0.0,
|
| 20 |
+
0.0
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
1.0,
|
| 24 |
+
1.0,
|
| 25 |
+
0.0,
|
| 26 |
+
0.0,
|
| 27 |
+
0.0,
|
| 28 |
+
0.0,
|
| 29 |
+
0.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
0.0,
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
0.0,
|
| 36 |
+
0.0,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
0.0,
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
1.0,
|
| 51 |
+
1.0,
|
| 52 |
+
0.0,
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0,
|
| 56 |
+
0.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
true
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0,
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0
|
| 77 |
+
],
|
| 78 |
+
"std": [
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"max": [
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0,
|
| 91 |
+
0.0,
|
| 92 |
+
0.0,
|
| 93 |
+
0.0,
|
| 94 |
+
0.0
|
| 95 |
+
],
|
| 96 |
+
"min": [
|
| 97 |
+
0.0,
|
| 98 |
+
0.0,
|
| 99 |
+
0.0,
|
| 100 |
+
0.0,
|
| 101 |
+
0.0,
|
| 102 |
+
0.0,
|
| 103 |
+
0.0
|
| 104 |
+
],
|
| 105 |
+
"q01": [
|
| 106 |
+
0.0,
|
| 107 |
+
0.0,
|
| 108 |
+
0.0,
|
| 109 |
+
0.0,
|
| 110 |
+
0.0,
|
| 111 |
+
0.0,
|
| 112 |
+
0.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
"num_transitions": 330734,
|
| 125 |
+
"num_trajectories": 180
|
| 126 |
+
}
|
| 127 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/dataset_statistics_eval.json
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.7959861159324646,
|
| 6 |
+
0.2040138840675354,
|
| 7 |
+
0.0,
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0,
|
| 11 |
+
0.0
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.4030573070049286,
|
| 15 |
+
0.4030573070049286,
|
| 16 |
+
0.0,
|
| 17 |
+
0.0,
|
| 18 |
+
0.0,
|
| 19 |
+
0.0,
|
| 20 |
+
0.0
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
1.0,
|
| 24 |
+
1.0,
|
| 25 |
+
0.0,
|
| 26 |
+
0.0,
|
| 27 |
+
0.0,
|
| 28 |
+
0.0,
|
| 29 |
+
0.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
0.0,
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
0.0,
|
| 36 |
+
0.0,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
0.0,
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
1.0,
|
| 51 |
+
1.0,
|
| 52 |
+
0.0,
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0,
|
| 56 |
+
0.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
true
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0,
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0
|
| 77 |
+
],
|
| 78 |
+
"std": [
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"max": [
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0,
|
| 91 |
+
0.0,
|
| 92 |
+
0.0,
|
| 93 |
+
0.0,
|
| 94 |
+
0.0
|
| 95 |
+
],
|
| 96 |
+
"min": [
|
| 97 |
+
0.0,
|
| 98 |
+
0.0,
|
| 99 |
+
0.0,
|
| 100 |
+
0.0,
|
| 101 |
+
0.0,
|
| 102 |
+
0.0,
|
| 103 |
+
0.0
|
| 104 |
+
],
|
| 105 |
+
"q01": [
|
| 106 |
+
0.0,
|
| 107 |
+
0.0,
|
| 108 |
+
0.0,
|
| 109 |
+
0.0,
|
| 110 |
+
0.0,
|
| 111 |
+
0.0,
|
| 112 |
+
0.0
|
| 113 |
+
],
|
| 114 |
+
"q99": [
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
"num_transitions": 72000,
|
| 125 |
+
"num_trajectories": 20
|
| 126 |
+
}
|
| 127 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 4.799999999999995,
|
| 7 |
+
"mean_length": 57.2,
|
| 8 |
+
"std_reward": 1.7999999999999952,
|
| 9 |
+
"std_length": 14.4,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
3.8999999999999986,
|
| 13 |
+
3.8999999999999986,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
3.8999999999999986
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
50,
|
| 20 |
+
50,
|
| 21 |
+
50,
|
| 22 |
+
50
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 237,
|
| 26 |
+
"1": 49
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 4.799999999999995,
|
| 55 |
+
"mean_length": 57.2,
|
| 56 |
+
"std_reward": 1.7999999999999952,
|
| 57 |
+
"std_length": 14.4,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 4.799999999999995,
|
| 60 |
+
"macro_mean_length": 57.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 4.799999999999995,
|
| 7 |
+
"mean_length": 57.2,
|
| 8 |
+
"std_reward": 1.7999999999999952,
|
| 9 |
+
"std_length": 14.4,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
3.8999999999999986,
|
| 13 |
+
3.8999999999999986,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
3.8999999999999986
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
50,
|
| 20 |
+
50,
|
| 21 |
+
50,
|
| 22 |
+
50
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 235,
|
| 26 |
+
"1": 51
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 4.799999999999995,
|
| 55 |
+
"mean_length": 57.2,
|
| 56 |
+
"std_reward": 1.7999999999999952,
|
| 57 |
+
"std_length": 14.4,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 4.799999999999995,
|
| 60 |
+
"macro_mean_length": 57.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 153.74000000000365,
|
| 7 |
+
"mean_length": 1258.2,
|
| 8 |
+
"std_reward": 160.43479173796118,
|
| 9 |
+
"std_length": 1292.4083565189449,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
171.8999999999956,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
139.89999999999702
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
3600,
|
| 20 |
+
1406,
|
| 21 |
+
50,
|
| 22 |
+
1149
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 4978,
|
| 26 |
+
"1": 1313
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 153.74000000000365,
|
| 55 |
+
"mean_length": 1258.2,
|
| 56 |
+
"std_reward": 160.43479173796118,
|
| 57 |
+
"std_length": 1292.4083565189449,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 153.74000000000365,
|
| 60 |
+
"macro_mean_length": 1258.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_1750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 217.38000000000935,
|
| 7 |
+
"mean_length": 1770.0,
|
| 8 |
+
"std_reward": 195.1522113633477,
|
| 9 |
+
"std_length": 1572.0778606672125,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
176.3999999999954,
|
| 13 |
+
12.899999999999974,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
1442,
|
| 20 |
+
122,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 7008,
|
| 26 |
+
"1": 1842
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 1750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 217.38000000000935,
|
| 55 |
+
"mean_length": 1770.0,
|
| 56 |
+
"std_reward": 195.1522113633477,
|
| 57 |
+
"std_length": 1572.0778606672125,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 217.38000000000935,
|
| 60 |
+
"macro_mean_length": 1770.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 116.27999999999801,
|
| 7 |
+
"mean_length": 956.0,
|
| 8 |
+
"std_reward": 102.48608490912143,
|
| 9 |
+
"std_length": 827.2419235024298,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
176.3999999999954,
|
| 13 |
+
12.899999999999974,
|
| 14 |
+
278.89999999999617,
|
| 15 |
+
104.79999999999863
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
1442,
|
| 20 |
+
122,
|
| 21 |
+
2269,
|
| 22 |
+
861
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 3842,
|
| 26 |
+
"1": 938
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 116.27999999999801,
|
| 55 |
+
"mean_length": 956.0,
|
| 56 |
+
"std_reward": 102.48608490912143,
|
| 57 |
+
"std_length": 827.2419235024298,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 116.27999999999801,
|
| 60 |
+
"macro_mean_length": 956.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 243.26000000001076,
|
| 7 |
+
"mean_length": 1978.4,
|
| 8 |
+
"std_reward": 199.3218462688027,
|
| 9 |
+
"std_length": 1605.7832481378052,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
314.8000000000025
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
50,
|
| 22 |
+
2556
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 7898,
|
| 26 |
+
"1": 1994
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 243.26000000001076,
|
| 55 |
+
"mean_length": 1978.4,
|
| 56 |
+
"std_reward": 199.3218462688027,
|
| 57 |
+
"std_length": 1605.7832481378052,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 243.26000000001076,
|
| 60 |
+
"macro_mean_length": 1978.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_250.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 37.10000000000025,
|
| 7 |
+
"mean_length": 319.0,
|
| 8 |
+
"std_reward": 23.528960878033065,
|
| 9 |
+
"std_length": 189.75668631170814,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
60.600000000000485,
|
| 12 |
+
13.69999999999997,
|
| 13 |
+
4.599999999999996,
|
| 14 |
+
59.900000000000475,
|
| 15 |
+
46.70000000000033
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
509,
|
| 19 |
+
130,
|
| 20 |
+
57,
|
| 21 |
+
502,
|
| 22 |
+
397
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 1243,
|
| 26 |
+
"1": 352
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 250,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 37.10000000000025,
|
| 55 |
+
"mean_length": 319.0,
|
| 56 |
+
"std_reward": 23.528960878033065,
|
| 57 |
+
"std_length": 189.75668631170814,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 37.10000000000025,
|
| 60 |
+
"macro_mean_length": 319.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 283.2200000000154,
|
| 7 |
+
"mean_length": 2300.2,
|
| 8 |
+
"std_reward": 198.73168242634333,
|
| 9 |
+
"std_length": 1600.6886517995933,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
444.60000000002566,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
73.89999999999999
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
3600,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
615
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 9213,
|
| 26 |
+
"1": 2288
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 283.2200000000154,
|
| 55 |
+
"mean_length": 2300.2,
|
| 56 |
+
"std_reward": 198.73168242634333,
|
| 57 |
+
"std_length": 1600.6886517995933,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 283.2200000000154,
|
| 60 |
+
"macro_mean_length": 2300.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_2750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 112.99999999999913,
|
| 7 |
+
"mean_length": 930.4,
|
| 8 |
+
"std_reward": 113.37080752998062,
|
| 9 |
+
"std_length": 914.9803495157696,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
176.3999999999954,
|
| 13 |
+
73.89999999999999,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
302.4000000000004
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
1442,
|
| 20 |
+
615,
|
| 21 |
+
50,
|
| 22 |
+
2459
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 3696,
|
| 26 |
+
"1": 956
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 2750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 112.99999999999913,
|
| 55 |
+
"mean_length": 930.4,
|
| 56 |
+
"std_reward": 113.37080752998062,
|
| 57 |
+
"std_length": 914.9803495157696,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 112.99999999999913,
|
| 60 |
+
"macro_mean_length": 930.4,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_3000.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 303.72000000001447,
|
| 7 |
+
"mean_length": 2465.6,
|
| 8 |
+
"std_reward": 180.53576266215117,
|
| 9 |
+
"std_length": 1454.0181016754914,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
8.399999999999986,
|
| 12 |
+
176.3999999999954,
|
| 13 |
+
444.60000000002566,
|
| 14 |
+
444.60000000002566,
|
| 15 |
+
444.60000000002566
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
86,
|
| 19 |
+
1442,
|
| 20 |
+
3600,
|
| 21 |
+
3600,
|
| 22 |
+
3600
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 9797,
|
| 26 |
+
"1": 2531
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 3000,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 303.72000000001447,
|
| 55 |
+
"mean_length": 2465.6,
|
| 56 |
+
"std_reward": 180.53576266215117,
|
| 57 |
+
"std_length": 1454.0181016754914,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 303.72000000001447,
|
| 60 |
+
"macro_mean_length": 2465.6,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_500.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 73.41999999999845,
|
| 7 |
+
"mean_length": 612.0,
|
| 8 |
+
"std_reward": 90.13413116017405,
|
| 9 |
+
"std_length": 727.8139872247579,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
247.19999999999223,
|
| 12 |
+
22.40000000000006,
|
| 13 |
+
18.800000000000022,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
74.79999999999994
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
2015,
|
| 19 |
+
199,
|
| 20 |
+
172,
|
| 21 |
+
50,
|
| 22 |
+
624
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 2359,
|
| 26 |
+
"1": 701
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 500,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 73.41999999999845,
|
| 55 |
+
"mean_length": 612.0,
|
| 56 |
+
"std_reward": 90.13413116017405,
|
| 57 |
+
"std_length": 727.8139872247579,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 73.41999999999845,
|
| 60 |
+
"macro_mean_length": 612.0,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/eval/mid_train/step_750.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"per_latency": {
|
| 3 |
+
"flappy/latency_2": {
|
| 4 |
+
"latency": 2,
|
| 5 |
+
"num_episodes": 5,
|
| 6 |
+
"mean_reward": 72.7599999999988,
|
| 7 |
+
"mean_length": 607.2,
|
| 8 |
+
"std_reward": 66.34210126307238,
|
| 9 |
+
"std_length": 535.8131763964003,
|
| 10 |
+
"episode_rewards": [
|
| 11 |
+
186.79999999999492,
|
| 12 |
+
102.79999999999869,
|
| 13 |
+
18.900000000000023,
|
| 14 |
+
3.8999999999999986,
|
| 15 |
+
51.40000000000038
|
| 16 |
+
],
|
| 17 |
+
"episode_lengths": [
|
| 18 |
+
1528,
|
| 19 |
+
850,
|
| 20 |
+
173,
|
| 21 |
+
50,
|
| 22 |
+
435
|
| 23 |
+
],
|
| 24 |
+
"decoded_action_hist": {
|
| 25 |
+
"0": 2406,
|
| 26 |
+
"1": 630
|
| 27 |
+
},
|
| 28 |
+
"fixed_episode_seeds": true,
|
| 29 |
+
"eval_seed": 42,
|
| 30 |
+
"episode_seeds": [
|
| 31 |
+
42,
|
| 32 |
+
43,
|
| 33 |
+
44,
|
| 34 |
+
45,
|
| 35 |
+
46
|
| 36 |
+
],
|
| 37 |
+
"episode_indices": [
|
| 38 |
+
0,
|
| 39 |
+
1,
|
| 40 |
+
2,
|
| 41 |
+
3,
|
| 42 |
+
4
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"aggregate": {
|
| 47 |
+
"stage": "mid_train",
|
| 48 |
+
"step": 750,
|
| 49 |
+
"task": "flappy",
|
| 50 |
+
"model_alias": "openvla",
|
| 51 |
+
"fixed_episode_seeds": true,
|
| 52 |
+
"eval_seed": 42,
|
| 53 |
+
"total_episodes": 5,
|
| 54 |
+
"mean_reward": 72.7599999999988,
|
| 55 |
+
"mean_length": 607.2,
|
| 56 |
+
"std_reward": 66.34210126307238,
|
| 57 |
+
"std_length": 535.8131763964003,
|
| 58 |
+
"task_count": 1,
|
| 59 |
+
"macro_mean_reward": 72.7599999999988,
|
| 60 |
+
"macro_mean_length": 607.2,
|
| 61 |
+
"distributed_eval": false
|
| 62 |
+
}
|
| 63 |
+
}
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
framework:
|
| 2 |
+
qwenvl:
|
| 3 |
+
base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 4 |
+
attn_implementation: flash_attention_2
|
| 5 |
+
enable_gradient_checkpointing: true
|
| 6 |
+
action_model:
|
| 7 |
+
state_dim: 7
|
| 8 |
+
loss_type: discrete_ce
|
| 9 |
+
action_horizon: 1
|
| 10 |
+
future_action_window_size: 0
|
| 11 |
+
past_action_window_size: 0
|
| 12 |
+
action_dim: 7
|
| 13 |
+
action_env_dim: 2
|
| 14 |
+
name: QwenOFT
|
| 15 |
+
datasets:
|
| 16 |
+
vla_data:
|
| 17 |
+
dataset_py: lerobot_datasets
|
| 18 |
+
include_state: true
|
| 19 |
+
data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 20 |
+
data_mix: flappy_train__bridge
|
| 21 |
+
eval_data_mix: flappy_train__bridge__val
|
| 22 |
+
custom_mixtures_path: null
|
| 23 |
+
action_type: discrete
|
| 24 |
+
sequential_step_sampling: false
|
| 25 |
+
eval_sequential_step_sampling: null
|
| 26 |
+
num_workers: 8
|
| 27 |
+
eval_num_workers: 8
|
| 28 |
+
prefetch_factor: 4
|
| 29 |
+
persistent_workers: true
|
| 30 |
+
pin_memory: true
|
| 31 |
+
shuffle: true
|
| 32 |
+
action_balance:
|
| 33 |
+
enabled: false
|
| 34 |
+
strategy: balanced_epoch
|
| 35 |
+
action_key: action_id
|
| 36 |
+
target_flap_fraction: 0.3
|
| 37 |
+
noop_id: 0
|
| 38 |
+
flap_id: 1
|
| 39 |
+
latency_curriculum:
|
| 40 |
+
enabled: false
|
| 41 |
+
strategy: exclusive
|
| 42 |
+
latencies: null
|
| 43 |
+
phase_steps: null
|
| 44 |
+
per_device_batch_size: 32
|
| 45 |
+
load_all_data_for_training: true
|
| 46 |
+
num_obs_frames: 1
|
| 47 |
+
image_mode: single
|
| 48 |
+
stitch_grid:
|
| 49 |
+
- 2
|
| 50 |
+
- 2
|
| 51 |
+
obs_image_size: null
|
| 52 |
+
video_backend: torchvision_av
|
| 53 |
+
dataset:
|
| 54 |
+
source_hf: ''
|
| 55 |
+
config_name: null
|
| 56 |
+
source_subdir: null
|
| 57 |
+
converted_name: flappy_train
|
| 58 |
+
single_source_hf: ''
|
| 59 |
+
mixed_source_hf: ''
|
| 60 |
+
single_converted_name: flappy_train
|
| 61 |
+
mixed_converted_name: flappy_mixed_latency_train
|
| 62 |
+
single_latency_filter: null
|
| 63 |
+
mixed_latency_filter: null
|
| 64 |
+
force_download: false
|
| 65 |
+
setup_force: false
|
| 66 |
+
skip_verification: false
|
| 67 |
+
verify_rows: 200
|
| 68 |
+
max_episodes: null
|
| 69 |
+
episodes_per_latency: null
|
| 70 |
+
latency_filter: null
|
| 71 |
+
debug_subset:
|
| 72 |
+
enabled: false
|
| 73 |
+
max_episodes: 5
|
| 74 |
+
suffix: debug
|
| 75 |
+
base_model:
|
| 76 |
+
repo_id: Qwen/Qwen3-VL-4B-Instruct
|
| 77 |
+
initialization:
|
| 78 |
+
checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
|
| 79 |
+
checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
|
| 80 |
+
checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
|
| 81 |
+
trainer:
|
| 82 |
+
max_train_steps: 3000
|
| 83 |
+
num_warmup_steps: 100
|
| 84 |
+
save_interval: 500
|
| 85 |
+
eval_interval: 250
|
| 86 |
+
eval_num_batches: 50
|
| 87 |
+
per_latency_eval_num_batches: null
|
| 88 |
+
eval_action_classification: true
|
| 89 |
+
eval_action_classification_interval: null
|
| 90 |
+
cc_f1_tolerance: 1
|
| 91 |
+
learning_rate:
|
| 92 |
+
base: 2.0e-05
|
| 93 |
+
qwen_vl_interface: 1.0e-05
|
| 94 |
+
action_model: 0.0001
|
| 95 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 96 |
+
scheduler_specific_kwargs:
|
| 97 |
+
min_lr: 1.0e-06
|
| 98 |
+
freeze_modules: ''
|
| 99 |
+
freeze_vit: false
|
| 100 |
+
freeze_tied_embedding: false
|
| 101 |
+
freeze_llm_layers: []
|
| 102 |
+
loss_scale:
|
| 103 |
+
vla: 1.0
|
| 104 |
+
vlm: 0.1
|
| 105 |
+
max_grad_norm: 1.0
|
| 106 |
+
weight_decay: 0.0
|
| 107 |
+
logging_frequency: 1
|
| 108 |
+
profile_timing:
|
| 109 |
+
enabled: false
|
| 110 |
+
log_interval: 10
|
| 111 |
+
gradient_clipping: 1.0
|
| 112 |
+
gradient_accumulation_steps: 1
|
| 113 |
+
distributed_backend: none
|
| 114 |
+
is_resume: false
|
| 115 |
+
pretrained_checkpoint: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
|
| 116 |
+
resume_step: 0
|
| 117 |
+
reload_modules: null
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
fused: true
|
| 126 |
+
save_format: pt
|
| 127 |
+
workspace_dir: WORKSPACE_DIR
|
| 128 |
+
run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 129 |
+
seed: 42
|
| 130 |
+
wandb_entity: saberrr-zju
|
| 131 |
+
wandb_project: starVLA_rl_games
|
| 132 |
+
auth:
|
| 133 |
+
env_file: null
|
| 134 |
+
hf_token_env: HF_TOKEN
|
| 135 |
+
wandb_api_key_env: WANDB_API_KEY
|
| 136 |
+
paths:
|
| 137 |
+
run_root_dir: results/Checkpoints
|
| 138 |
+
dataset_local_dir: data/flappy_fix_latency_2_200ep
|
| 139 |
+
dataset_cache_dir: null
|
| 140 |
+
base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 141 |
+
accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 142 |
+
launch:
|
| 143 |
+
use_accelerate: true
|
| 144 |
+
gpus: null
|
| 145 |
+
num_processes: 1
|
| 146 |
+
dry_run: false
|
| 147 |
+
conda:
|
| 148 |
+
enabled: true
|
| 149 |
+
env_name: null
|
| 150 |
+
rl_games:
|
| 151 |
+
model_alias: openvla
|
| 152 |
+
env_eval:
|
| 153 |
+
image_size: 224
|
| 154 |
+
frameskip: 1
|
| 155 |
+
seed: 42
|
| 156 |
+
fixed_episode_seeds: true
|
| 157 |
+
latency_seed_stride: 0
|
| 158 |
+
task_seed_stride: 0
|
| 159 |
+
task_description: ''
|
| 160 |
+
enabled: true
|
| 161 |
+
distributed_mode: none
|
| 162 |
+
vectorized:
|
| 163 |
+
enabled: false
|
| 164 |
+
batch_size: 1
|
| 165 |
+
latency:
|
| 166 |
+
prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 167 |
+
mode: single
|
| 168 |
+
values:
|
| 169 |
+
- 0
|
| 170 |
+
mid_train:
|
| 171 |
+
enabled: true
|
| 172 |
+
interval_steps: 250
|
| 173 |
+
latencies:
|
| 174 |
+
- 2
|
| 175 |
+
num_episodes: 5
|
| 176 |
+
max_steps_per_episode: 3600
|
| 177 |
+
post_train:
|
| 178 |
+
enabled: false
|
| 179 |
+
latencies:
|
| 180 |
+
- 0
|
| 181 |
+
- 1
|
| 182 |
+
- 2
|
| 183 |
+
- 3
|
| 184 |
+
- 4
|
| 185 |
+
num_episodes: 5
|
| 186 |
+
max_steps_per_episode: 3600
|
| 187 |
+
task: flappy
|
| 188 |
+
initialization_mode: bridge
|
| 189 |
+
action_carrier: bridge
|
| 190 |
+
model: openvla
|
| 191 |
+
env: flappy
|
| 192 |
+
init: bridge
|
| 193 |
+
bridge_base_model:
|
| 194 |
+
repo_id:
|
| 195 |
+
openvla: Qwen/Qwen3-VL-4B-Instruct
|
| 196 |
+
pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
|
| 197 |
+
pi05: Qwen/Qwen3-VL-4B-Instruct
|
| 198 |
+
gr00t: Qwen/Qwen3-VL-4B-Instruct
|
| 199 |
+
local_dir:
|
| 200 |
+
openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 201 |
+
pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
|
| 202 |
+
pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 203 |
+
gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 204 |
+
mode: single
|
| 205 |
+
checkpoint:
|
| 206 |
+
load: none
|
| 207 |
+
hf_repo_id: null
|
| 208 |
+
save_best_model: false
|
| 209 |
+
save_pt_file: false
|
| 210 |
+
local:
|
| 211 |
+
keep_last_n: 1
|
| 212 |
+
sync:
|
| 213 |
+
enabled: false
|
| 214 |
+
repo_id: null
|
| 215 |
+
keep_last_n: 0
|
| 216 |
+
sync_every_n_checkpoints: 1
|
| 217 |
+
resume_policy: local_latest
|
| 218 |
+
run_id: flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 219 |
+
output_dir: null
|
| 220 |
+
config_yaml: null
|
| 221 |
+
is_debug: false
|
| 222 |
+
version_id: 0.21
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ${run_root_dir}/${run_id}/hydra
|
| 4 |
+
sweep:
|
| 5 |
+
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task:
|
| 115 |
+
- model=openvla
|
| 116 |
+
- env=flappy
|
| 117 |
+
- init=bridge
|
| 118 |
+
- mode=single
|
| 119 |
+
- ++framework.qwenvl.base_vlm=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 120 |
+
- ++framework.qwenvl.attn_implementation=flash_attention_2
|
| 121 |
+
- ++framework.qwenvl.enable_gradient_checkpointing=true
|
| 122 |
+
- ++framework.action_model.state_dim=7
|
| 123 |
+
- ++framework.action_model.loss_type=discrete_ce
|
| 124 |
+
- ++framework.action_model.action_horizon=1
|
| 125 |
+
- ++framework.action_model.future_action_window_size=0
|
| 126 |
+
- ++framework.action_model.past_action_window_size=0
|
| 127 |
+
- ++framework.action_model.action_dim=7
|
| 128 |
+
- ++framework.action_model.action_env_dim=2
|
| 129 |
+
- ++framework.name=QwenOFT
|
| 130 |
+
- ++datasets.vla_data.dataset_py=lerobot_datasets
|
| 131 |
+
- ++datasets.vla_data.include_state=true
|
| 132 |
+
- ++datasets.vla_data.data_root_dir=playground/Datasets/rl_games
|
| 133 |
+
- ++datasets.vla_data.data_mix=flappy_train
|
| 134 |
+
- ++datasets.vla_data.eval_data_mix=null
|
| 135 |
+
- ++datasets.vla_data.custom_mixtures_path=null
|
| 136 |
+
- ++datasets.vla_data.action_type=discrete
|
| 137 |
+
- ++datasets.vla_data.sequential_step_sampling=false
|
| 138 |
+
- ++datasets.vla_data.eval_sequential_step_sampling=null
|
| 139 |
+
- ++datasets.vla_data.num_workers=8
|
| 140 |
+
- ++datasets.vla_data.eval_num_workers=8
|
| 141 |
+
- ++datasets.vla_data.prefetch_factor=4
|
| 142 |
+
- ++datasets.vla_data.persistent_workers=true
|
| 143 |
+
- ++datasets.vla_data.pin_memory=true
|
| 144 |
+
- ++datasets.vla_data.shuffle=true
|
| 145 |
+
- ++datasets.vla_data.action_balance.enabled=false
|
| 146 |
+
- ++datasets.vla_data.action_balance.strategy=balanced_epoch
|
| 147 |
+
- ++datasets.vla_data.action_balance.action_key=action_id
|
| 148 |
+
- ++datasets.vla_data.action_balance.target_flap_fraction=0.3
|
| 149 |
+
- ++datasets.vla_data.action_balance.noop_id=0
|
| 150 |
+
- ++datasets.vla_data.action_balance.flap_id=1
|
| 151 |
+
- ++datasets.vla_data.latency_curriculum.enabled=false
|
| 152 |
+
- ++datasets.vla_data.latency_curriculum.strategy=exclusive
|
| 153 |
+
- ++datasets.vla_data.latency_curriculum.latencies=null
|
| 154 |
+
- ++datasets.vla_data.latency_curriculum.phase_steps=null
|
| 155 |
+
- ++datasets.vla_data.per_device_batch_size=32
|
| 156 |
+
- ++datasets.vla_data.load_all_data_for_training=true
|
| 157 |
+
- ++datasets.vla_data.num_obs_frames=1
|
| 158 |
+
- ++datasets.vla_data.image_mode=single
|
| 159 |
+
- ++datasets.vla_data.stitch_grid=[2,2]
|
| 160 |
+
- ++datasets.vla_data.obs_image_size=null
|
| 161 |
+
- ++datasets.vla_data.video_backend=torchvision_av
|
| 162 |
+
- ++dataset.source_hf=
|
| 163 |
+
- ++dataset.config_name=null
|
| 164 |
+
- ++dataset.source_subdir=null
|
| 165 |
+
- ++dataset.converted_name=flappy_train
|
| 166 |
+
- ++dataset.single_source_hf=
|
| 167 |
+
- ++dataset.mixed_source_hf=
|
| 168 |
+
- ++dataset.single_converted_name=flappy_train
|
| 169 |
+
- ++dataset.mixed_converted_name=flappy_mixed_latency_train
|
| 170 |
+
- ++dataset.single_latency_filter=null
|
| 171 |
+
- ++dataset.mixed_latency_filter=null
|
| 172 |
+
- ++dataset.force_download=false
|
| 173 |
+
- ++dataset.setup_force=false
|
| 174 |
+
- ++dataset.skip_verification=false
|
| 175 |
+
- ++dataset.verify_rows=200
|
| 176 |
+
- ++dataset.max_episodes=null
|
| 177 |
+
- ++dataset.episodes_per_latency=null
|
| 178 |
+
- ++dataset.latency_filter=null
|
| 179 |
+
- ++dataset.debug_subset.enabled=false
|
| 180 |
+
- ++dataset.debug_subset.max_episodes=5
|
| 181 |
+
- ++dataset.debug_subset.suffix=debug
|
| 182 |
+
- ++base_model.repo_id=Qwen/Qwen3-VL-4B-Instruct
|
| 183 |
+
- ++initialization.checkpoint_local_dir=playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
|
| 184 |
+
- ++initialization.checkpoint_hf_repo_id=StarVLA/Qwen3VL-OFT-Bridge-RT-1
|
| 185 |
+
- ++initialization.checkpoint_filename=checkpoints/steps_5000_pytorch_model.pt
|
| 186 |
+
- ++trainer.max_train_steps=3000
|
| 187 |
+
- ++trainer.num_warmup_steps=100
|
| 188 |
+
- ++trainer.save_interval=500
|
| 189 |
+
- ++trainer.eval_interval=250
|
| 190 |
+
- ++trainer.eval_num_batches=50
|
| 191 |
+
- ++trainer.per_latency_eval_num_batches=null
|
| 192 |
+
- ++trainer.eval_action_classification=true
|
| 193 |
+
- ++trainer.eval_action_classification_interval=null
|
| 194 |
+
- ++trainer.cc_f1_tolerance=1
|
| 195 |
+
- ++trainer.learning_rate.base=2e-05
|
| 196 |
+
- ++trainer.learning_rate.qwen_vl_interface=1e-05
|
| 197 |
+
- ++trainer.learning_rate.action_model=0.0001
|
| 198 |
+
- ++trainer.lr_scheduler_type=cosine_with_min_lr
|
| 199 |
+
- ++trainer.scheduler_specific_kwargs.min_lr=1e-06
|
| 200 |
+
- ++trainer.freeze_modules=
|
| 201 |
+
- ++trainer.freeze_vit=false
|
| 202 |
+
- ++trainer.freeze_tied_embedding=false
|
| 203 |
+
- ++trainer.freeze_llm_layers=[]
|
| 204 |
+
- ++trainer.loss_scale.vla=1.0
|
| 205 |
+
- ++trainer.loss_scale.vlm=0.1
|
| 206 |
+
- ++trainer.max_grad_norm=1.0
|
| 207 |
+
- ++trainer.weight_decay=0.0
|
| 208 |
+
- ++trainer.logging_frequency=1
|
| 209 |
+
- ++trainer.profile_timing.enabled=false
|
| 210 |
+
- ++trainer.profile_timing.log_interval=10
|
| 211 |
+
- ++trainer.gradient_clipping=1.0
|
| 212 |
+
- ++trainer.gradient_accumulation_steps=1
|
| 213 |
+
- ++trainer.distributed_backend=none
|
| 214 |
+
- ++trainer.is_resume=false
|
| 215 |
+
- ++trainer.pretrained_checkpoint=null
|
| 216 |
+
- ++trainer.resume_step=0
|
| 217 |
+
- ++trainer.reload_modules=null
|
| 218 |
+
- ++trainer.optimizer.name=AdamW
|
| 219 |
+
- ++trainer.optimizer.betas=[0.9,0.95]
|
| 220 |
+
- ++trainer.optimizer.eps=1e-08
|
| 221 |
+
- ++trainer.optimizer.weight_decay=1e-08
|
| 222 |
+
- ++trainer.optimizer.fused=true
|
| 223 |
+
- ++trainer.save_format=pt
|
| 224 |
+
- ++workspace_dir=WORKSPACE_DIR
|
| 225 |
+
- ++run_root_dir=results/Checkpoints
|
| 226 |
+
- ++seed=42
|
| 227 |
+
- ++wandb_entity=saberrr-zju
|
| 228 |
+
- ++wandb_project=starVLA_rl_games
|
| 229 |
+
- ++auth.env_file=null
|
| 230 |
+
- ++auth.hf_token_env=HF_TOKEN
|
| 231 |
+
- ++auth.wandb_api_key_env=WANDB_API_KEY
|
| 232 |
+
- ++paths.run_root_dir=results/Checkpoints
|
| 233 |
+
- ++paths.dataset_local_dir=data/flappy_fix_latency_2_200ep
|
| 234 |
+
- ++paths.dataset_cache_dir=null
|
| 235 |
+
- ++paths.base_model_dir=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 236 |
+
- ++paths.accelerate_config=starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 237 |
+
- ++rl_games.model_alias=openvla
|
| 238 |
+
- ++rl_games.env_eval.image_size=224
|
| 239 |
+
- ++rl_games.env_eval.frameskip=1
|
| 240 |
+
- ++rl_games.env_eval.seed=42
|
| 241 |
+
- ++rl_games.env_eval.fixed_episode_seeds=true
|
| 242 |
+
- ++rl_games.env_eval.latency_seed_stride=0
|
| 243 |
+
- ++rl_games.env_eval.task_seed_stride=0
|
| 244 |
+
- ++rl_games.env_eval.task_description=
|
| 245 |
+
- ++rl_games.env_eval.enabled=true
|
| 246 |
+
- ++rl_games.env_eval.distributed_mode=none
|
| 247 |
+
- ++rl_games.env_eval.vectorized.enabled=false
|
| 248 |
+
- ++rl_games.env_eval.vectorized.batch_size=1
|
| 249 |
+
- ++rl_games.env_eval.latency.prompt_map_path=null
|
| 250 |
+
- ++rl_games.env_eval.latency.mode=single
|
| 251 |
+
- ++rl_games.env_eval.latency.values=[0]
|
| 252 |
+
- ++rl_games.env_eval.mid_train.enabled=true
|
| 253 |
+
- ++rl_games.env_eval.mid_train.interval_steps=250
|
| 254 |
+
- ++rl_games.env_eval.mid_train.latencies=[2]
|
| 255 |
+
- ++rl_games.env_eval.mid_train.num_episodes=5
|
| 256 |
+
- ++rl_games.env_eval.mid_train.max_steps_per_episode=3600
|
| 257 |
+
- ++rl_games.env_eval.post_train.enabled=false
|
| 258 |
+
- ++rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
|
| 259 |
+
- ++rl_games.env_eval.post_train.num_episodes=5
|
| 260 |
+
- ++rl_games.env_eval.post_train.max_steps_per_episode=3600
|
| 261 |
+
- ++rl_games.task=flappy
|
| 262 |
+
- ++rl_games.initialization_mode=bridge
|
| 263 |
+
- ++rl_games.action_carrier=bridge
|
| 264 |
+
- ++bridge_base_model.repo_id.openvla=Qwen/Qwen3-VL-4B-Instruct
|
| 265 |
+
- ++bridge_base_model.repo_id.pi0=StarVLA/Qwen2.5-VL-3B-Instruct-Action
|
| 266 |
+
- ++bridge_base_model.repo_id.pi05=Qwen/Qwen3-VL-4B-Instruct
|
| 267 |
+
- ++bridge_base_model.repo_id.gr00t=Qwen/Qwen3-VL-4B-Instruct
|
| 268 |
+
- ++bridge_base_model.local_dir.openvla=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 269 |
+
- ++bridge_base_model.local_dir.pi0=playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
|
| 270 |
+
- ++bridge_base_model.local_dir.pi05=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 271 |
+
- ++bridge_base_model.local_dir.gr00t=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 272 |
+
- ++checkpoint.load=none
|
| 273 |
+
- ++checkpoint.hf_repo_id=null
|
| 274 |
+
- ++checkpoint.save_best_model=false
|
| 275 |
+
- ++checkpoint.save_pt_file=false
|
| 276 |
+
- ++checkpoint.local.keep_last_n=1
|
| 277 |
+
- ++checkpoint.sync.enabled=false
|
| 278 |
+
- ++checkpoint.sync.repo_id=null
|
| 279 |
+
- ++checkpoint.sync.keep_last_n=0
|
| 280 |
+
- ++checkpoint.sync.sync_every_n_checkpoints=1
|
| 281 |
+
- ++checkpoint.sync.resume_policy=local_latest
|
| 282 |
+
- ++run_id=flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 283 |
+
- ++output_dir=null
|
| 284 |
+
- ++config_yaml=null
|
| 285 |
+
- ++is_debug=false
|
| 286 |
+
- ++version_id=0.21
|
| 287 |
+
- ++run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 288 |
+
- ++trainer.is_resume=false
|
| 289 |
+
- ++trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
|
| 290 |
+
- ++trainer.resume_step=0
|
| 291 |
+
- ++datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 292 |
+
- ++datasets.vla_data.data_mix=flappy_train__bridge
|
| 293 |
+
- ++datasets.vla_data.eval_data_mix=flappy_train__bridge__val
|
| 294 |
+
- ++framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 295 |
+
- ++rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
| 296 |
+
job:
|
| 297 |
+
name: train_starvla_hydra
|
| 298 |
+
chdir: false
|
| 299 |
+
override_dirname: ++auth.env_file=null,++auth.hf_token_env=HF_TOKEN,++auth.wandb_api_key_env=WANDB_API_KEY,++base_model.repo_id=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.gr00t=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.openvla=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.pi05=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++bridge_base_model.local_dir.pi0=playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action,++bridge_base_model.repo_id.gr00t=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.repo_id.openvla=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.repo_id.pi05=Qwen/Qwen3-VL-4B-Instruct,++bridge_base_model.repo_id.pi0=StarVLA/Qwen2.5-VL-3B-Instruct-Action,++checkpoint.hf_repo_id=null,++checkpoint.load=none,++checkpoint.local.keep_last_n=1,++checkpoint.save_best_model=false,++checkpoint.save_pt_file=false,++checkpoint.sync.enabled=false,++checkpoint.sync.keep_last_n=0,++checkpoint.sync.repo_id=null,++checkpoint.sync.resume_policy=local_latest,++checkpoint.sync.sync_every_n_checkpoints=1,++config_yaml=null,++dataset.config_name=null,++dataset.converted_name=flappy_train,++dataset.debug_subset.enabled=false,++dataset.debug_subset.max_episodes=5,++dataset.debug_subset.suffix=debug,++dataset.episodes_per_latency=null,++dataset.force_download=false,++dataset.latency_filter=null,++dataset.max_episodes=null,++dataset.mixed_converted_name=flappy_mixed_latency_train,++dataset.mixed_latency_filter=null,++dataset.mixed_source_hf=,++dataset.setup_force=false,++dataset.single_converted_name=flappy_train,++dataset.single_latency_filter=null,++dataset.single_source_hf=,++dataset.skip_verification=false,++dataset.source_hf=,++dataset.source_subdir=null,++dataset.verify_rows=200,++datasets.vla_data.action_balance.action_key=action_id,++datasets.vla_data.action_balance.enabled=false,++datasets.vla_data.action_balance.flap_id=1,++datasets.vla_data.action_balance.noop_id=0,++datasets.vla_data.action_balance.strategy=balanced_epoch,++datasets.vla_data.action_balance.target_flap_fraction=0.3,++datasets.vla_data.action_type=discrete,++datasets.vla_data.custom_mixtures_path=null,++datasets.vla_data.data_mix=flappy_train,++datasets.vla_data.data_mix=flappy_train__bridge,++datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep,++datasets.vla_data.data_root_dir=playground/Datasets/rl_games,++datasets.vla_data.dataset_py=lerobot_datasets,++datasets.vla_data.eval_data_mix=flappy_train__bridge__val,++datasets.vla_data.eval_data_mix=null,++datasets.vla_data.eval_num_workers=8,++datasets.vla_data.eval_sequential_step_sampling=null,++datasets.vla_data.image_mode=single,++datasets.vla_data.include_state=true,++datasets.vla_data.latency_curriculum.enabled=false,++datasets.vla_data.latency_curriculum.latencies=null,++datasets.vla_data.latency_curriculum.phase_steps=null,++datasets.vla_data.latency_curriculum.strategy=exclusive,++datasets.vla_data.load_all_data_for_training=true,++datasets.vla_data.num_obs_frames=1,++datasets.vla_data.num_workers=8,++datasets.vla_data.obs_image_size=null,++datasets.vla_data.per_device_batch_size=32,++datasets.vla_data.persistent_workers=true,++datasets.vla_data.pin_memory=true,++datasets.vla_data.prefetch_factor=4,++datasets.vla_data.sequential_step_sampling=false,++datasets.vla_data.shuffle=true,++datasets.vla_data.stitch_grid=[2,2],++datasets.vla_data.video_backend=torchvision_av,++framework.action_model.action_dim=7,++framework.action_model.action_env_dim=2,++framework.action_model.action_horizon=1,++framework.action_model.future_action_window_size=0,++framework.action_model.loss_type=discrete_ce,++framework.action_model.past_action_window_size=0,++framework.action_model.state_dim=7,++framework.name=QwenOFT,++framework.qwenvl.attn_implementation=flash_attention_2,++framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct,++framework.qwenvl.base_vlm=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++framework.qwenvl.enable_gradient_checkpointing=true,++initialization.checkpoint_filename=checkpoints/steps_5000_pytorch_model.pt,++initialization.checkpoint_hf_repo_id=StarVLA/Qwen3VL-OFT-Bridge-RT-1,++initialization.checkpoint_local_dir=playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1,++is_debug=false,++output_dir=null,++paths.accelerate_config=starVLA/config/deepseeds/deepspeed_zero2.yaml,++paths.base_model_dir=playground/Pretrained_models/Qwen3-VL-4B-Instruct,++paths.dataset_cache_dir=null,++paths.dataset_local_dir=data/flappy_fix_latency_2_200ep,++paths.run_root_dir=results/Checkpoints,++rl_games.action_carrier=bridge,++rl_games.env_eval.distributed_mode=none,++rl_games.env_eval.enabled=true,++rl_games.env_eval.fixed_episode_seeds=true,++rl_games.env_eval.frameskip=1,++rl_games.env_eval.image_size=224,++rl_games.env_eval.latency.mode=single,++rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json,++rl_games.env_eval.latency.prompt_map_path=null,++rl_games.env_eval.latency.values=[0],++rl_games.env_eval.latency_seed_stride=0,++rl_games.env_eval.mid_train.enabled=true,++rl_games.env_eval.mid_train.interval_steps=250,++rl_games.env_eval.mid_train.latencies=[2],++rl_games.env_eval.mid_train.max_steps_per_episode=3600,++rl_games.env_eval.mid_train.num_episodes=5,++rl_games.env_eval.post_train.enabled=false,++rl_games.env_eval.post_train.latencies=[0,1,2,3,4],++rl_games.env_eval.post_train.max_steps_per_episode=3600,++rl_games.env_eval.post_train.num_episodes=5,++rl_games.env_eval.seed=42,++rl_games.env_eval.task_description=,++rl_games.env_eval.task_seed_stride=0,++rl_games.env_eval.vectorized.batch_size=1,++rl_games.env_eval.vectorized.enabled=false,++rl_games.initialization_mode=bridge,++rl_games.model_alias=openvla,++rl_games.task=flappy,++run_id=flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps,++run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints,++run_root_dir=results/Checkpoints,++seed=42,++trainer.cc_f1_tolerance=1,++trainer.distributed_backend=none,++trainer.eval_action_classification=true,++trainer.eval_action_classification_interval=null,++trainer.eval_interval=250,++trainer.eval_num_batches=50,++trainer.freeze_llm_layers=[],++trainer.freeze_modules=,++trainer.freeze_tied_embedding=false,++trainer.freeze_vit=false,++trainer.gradient_accumulation_steps=1,++trainer.gradient_clipping=1.0,++trainer.is_resume=false,++trainer.is_resume=false,++trainer.learning_rate.action_model=0.0001,++trainer.learning_rate.base=2e-05,++trainer.learning_rate.qwen_vl_interface=1e-05,++trainer.logging_frequency=1,++trainer.loss_scale.vla=1.0,++trainer.loss_scale.vlm=0.1,++trainer.lr_scheduler_type=cosine_with_min_lr,++trainer.max_grad_norm=1.0,++trainer.max_train_steps=3000,++trainer.num_warmup_steps=100,++trainer.optimizer.betas=[0.9,0.95],++trainer.optimizer.eps=1e-08,++trainer.optimizer.fused=true,++trainer.optimizer.name=AdamW,++trainer.optimizer.weight_decay=1e-08,++trainer.per_latency_eval_num_batches=null,++trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt,++trainer.pretrained_checkpoint=null,++trainer.profile_timing.enabled=false,++trainer.profile_timing.log_interval=10,++trainer.reload_modules=null,++trainer.resume_step=0,++trainer.resume_step=0,++trainer.save_format=pt,++trainer.save_interval=500,++trainer.scheduler_specific_kwargs.min_lr=1e-06,++trainer.weight_decay=0.0,++version_id=0.21,++wandb_entity=saberrr-zju,++wandb_project=starVLA_rl_games,++workspace_dir=WORKSPACE_DIR,env=flappy,init=bridge,mode=single,model=openvla
|
| 300 |
+
id: ???
|
| 301 |
+
num: ???
|
| 302 |
+
config_name: train
|
| 303 |
+
env_set: {}
|
| 304 |
+
env_copy: []
|
| 305 |
+
config:
|
| 306 |
+
override_dirname:
|
| 307 |
+
kv_sep: '='
|
| 308 |
+
item_sep: ','
|
| 309 |
+
exclude_keys: []
|
| 310 |
+
runtime:
|
| 311 |
+
version: 1.3.3
|
| 312 |
+
version_base: '1.1'
|
| 313 |
+
cwd: /workspace/latency-sensitive-bench/starVLA
|
| 314 |
+
config_sources:
|
| 315 |
+
- path: hydra.conf
|
| 316 |
+
schema: pkg
|
| 317 |
+
provider: hydra
|
| 318 |
+
- path: /workspace/latency-sensitive-bench/starVLA/examples/rl_games/config
|
| 319 |
+
schema: file
|
| 320 |
+
provider: main
|
| 321 |
+
- path: ''
|
| 322 |
+
schema: structured
|
| 323 |
+
provider: schema
|
| 324 |
+
output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra
|
| 325 |
+
choices:
|
| 326 |
+
cross_task_setup: null
|
| 327 |
+
checkpoint: default
|
| 328 |
+
mode: single
|
| 329 |
+
init: bridge
|
| 330 |
+
env: flappy
|
| 331 |
+
model: openvla
|
| 332 |
+
hydra/env: default
|
| 333 |
+
hydra/callbacks: null
|
| 334 |
+
hydra/job_logging: default
|
| 335 |
+
hydra/hydra_logging: default
|
| 336 |
+
hydra/hydra_help: default
|
| 337 |
+
hydra/help: default
|
| 338 |
+
hydra/sweeper: basic
|
| 339 |
+
hydra/launcher: basic
|
| 340 |
+
hydra/output: default
|
| 341 |
+
verbose: false
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- model=openvla
|
| 2 |
+
- env=flappy
|
| 3 |
+
- init=bridge
|
| 4 |
+
- mode=single
|
| 5 |
+
- ++framework.qwenvl.base_vlm=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 6 |
+
- ++framework.qwenvl.attn_implementation=flash_attention_2
|
| 7 |
+
- ++framework.qwenvl.enable_gradient_checkpointing=true
|
| 8 |
+
- ++framework.action_model.state_dim=7
|
| 9 |
+
- ++framework.action_model.loss_type=discrete_ce
|
| 10 |
+
- ++framework.action_model.action_horizon=1
|
| 11 |
+
- ++framework.action_model.future_action_window_size=0
|
| 12 |
+
- ++framework.action_model.past_action_window_size=0
|
| 13 |
+
- ++framework.action_model.action_dim=7
|
| 14 |
+
- ++framework.action_model.action_env_dim=2
|
| 15 |
+
- ++framework.name=QwenOFT
|
| 16 |
+
- ++datasets.vla_data.dataset_py=lerobot_datasets
|
| 17 |
+
- ++datasets.vla_data.include_state=true
|
| 18 |
+
- ++datasets.vla_data.data_root_dir=playground/Datasets/rl_games
|
| 19 |
+
- ++datasets.vla_data.data_mix=flappy_train
|
| 20 |
+
- ++datasets.vla_data.eval_data_mix=null
|
| 21 |
+
- ++datasets.vla_data.custom_mixtures_path=null
|
| 22 |
+
- ++datasets.vla_data.action_type=discrete
|
| 23 |
+
- ++datasets.vla_data.sequential_step_sampling=false
|
| 24 |
+
- ++datasets.vla_data.eval_sequential_step_sampling=null
|
| 25 |
+
- ++datasets.vla_data.num_workers=8
|
| 26 |
+
- ++datasets.vla_data.eval_num_workers=8
|
| 27 |
+
- ++datasets.vla_data.prefetch_factor=4
|
| 28 |
+
- ++datasets.vla_data.persistent_workers=true
|
| 29 |
+
- ++datasets.vla_data.pin_memory=true
|
| 30 |
+
- ++datasets.vla_data.shuffle=true
|
| 31 |
+
- ++datasets.vla_data.action_balance.enabled=false
|
| 32 |
+
- ++datasets.vla_data.action_balance.strategy=balanced_epoch
|
| 33 |
+
- ++datasets.vla_data.action_balance.action_key=action_id
|
| 34 |
+
- ++datasets.vla_data.action_balance.target_flap_fraction=0.3
|
| 35 |
+
- ++datasets.vla_data.action_balance.noop_id=0
|
| 36 |
+
- ++datasets.vla_data.action_balance.flap_id=1
|
| 37 |
+
- ++datasets.vla_data.latency_curriculum.enabled=false
|
| 38 |
+
- ++datasets.vla_data.latency_curriculum.strategy=exclusive
|
| 39 |
+
- ++datasets.vla_data.latency_curriculum.latencies=null
|
| 40 |
+
- ++datasets.vla_data.latency_curriculum.phase_steps=null
|
| 41 |
+
- ++datasets.vla_data.per_device_batch_size=32
|
| 42 |
+
- ++datasets.vla_data.load_all_data_for_training=true
|
| 43 |
+
- ++datasets.vla_data.num_obs_frames=1
|
| 44 |
+
- ++datasets.vla_data.image_mode=single
|
| 45 |
+
- ++datasets.vla_data.stitch_grid=[2,2]
|
| 46 |
+
- ++datasets.vla_data.obs_image_size=null
|
| 47 |
+
- ++datasets.vla_data.video_backend=torchvision_av
|
| 48 |
+
- ++dataset.source_hf=
|
| 49 |
+
- ++dataset.config_name=null
|
| 50 |
+
- ++dataset.source_subdir=null
|
| 51 |
+
- ++dataset.converted_name=flappy_train
|
| 52 |
+
- ++dataset.single_source_hf=
|
| 53 |
+
- ++dataset.mixed_source_hf=
|
| 54 |
+
- ++dataset.single_converted_name=flappy_train
|
| 55 |
+
- ++dataset.mixed_converted_name=flappy_mixed_latency_train
|
| 56 |
+
- ++dataset.single_latency_filter=null
|
| 57 |
+
- ++dataset.mixed_latency_filter=null
|
| 58 |
+
- ++dataset.force_download=false
|
| 59 |
+
- ++dataset.setup_force=false
|
| 60 |
+
- ++dataset.skip_verification=false
|
| 61 |
+
- ++dataset.verify_rows=200
|
| 62 |
+
- ++dataset.max_episodes=null
|
| 63 |
+
- ++dataset.episodes_per_latency=null
|
| 64 |
+
- ++dataset.latency_filter=null
|
| 65 |
+
- ++dataset.debug_subset.enabled=false
|
| 66 |
+
- ++dataset.debug_subset.max_episodes=5
|
| 67 |
+
- ++dataset.debug_subset.suffix=debug
|
| 68 |
+
- ++base_model.repo_id=Qwen/Qwen3-VL-4B-Instruct
|
| 69 |
+
- ++initialization.checkpoint_local_dir=playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
|
| 70 |
+
- ++initialization.checkpoint_hf_repo_id=StarVLA/Qwen3VL-OFT-Bridge-RT-1
|
| 71 |
+
- ++initialization.checkpoint_filename=checkpoints/steps_5000_pytorch_model.pt
|
| 72 |
+
- ++trainer.max_train_steps=3000
|
| 73 |
+
- ++trainer.num_warmup_steps=100
|
| 74 |
+
- ++trainer.save_interval=500
|
| 75 |
+
- ++trainer.eval_interval=250
|
| 76 |
+
- ++trainer.eval_num_batches=50
|
| 77 |
+
- ++trainer.per_latency_eval_num_batches=null
|
| 78 |
+
- ++trainer.eval_action_classification=true
|
| 79 |
+
- ++trainer.eval_action_classification_interval=null
|
| 80 |
+
- ++trainer.cc_f1_tolerance=1
|
| 81 |
+
- ++trainer.learning_rate.base=2e-05
|
| 82 |
+
- ++trainer.learning_rate.qwen_vl_interface=1e-05
|
| 83 |
+
- ++trainer.learning_rate.action_model=0.0001
|
| 84 |
+
- ++trainer.lr_scheduler_type=cosine_with_min_lr
|
| 85 |
+
- ++trainer.scheduler_specific_kwargs.min_lr=1e-06
|
| 86 |
+
- ++trainer.freeze_modules=
|
| 87 |
+
- ++trainer.freeze_vit=false
|
| 88 |
+
- ++trainer.freeze_tied_embedding=false
|
| 89 |
+
- ++trainer.freeze_llm_layers=[]
|
| 90 |
+
- ++trainer.loss_scale.vla=1.0
|
| 91 |
+
- ++trainer.loss_scale.vlm=0.1
|
| 92 |
+
- ++trainer.max_grad_norm=1.0
|
| 93 |
+
- ++trainer.weight_decay=0.0
|
| 94 |
+
- ++trainer.logging_frequency=1
|
| 95 |
+
- ++trainer.profile_timing.enabled=false
|
| 96 |
+
- ++trainer.profile_timing.log_interval=10
|
| 97 |
+
- ++trainer.gradient_clipping=1.0
|
| 98 |
+
- ++trainer.gradient_accumulation_steps=1
|
| 99 |
+
- ++trainer.distributed_backend=none
|
| 100 |
+
- ++trainer.is_resume=false
|
| 101 |
+
- ++trainer.pretrained_checkpoint=null
|
| 102 |
+
- ++trainer.resume_step=0
|
| 103 |
+
- ++trainer.reload_modules=null
|
| 104 |
+
- ++trainer.optimizer.name=AdamW
|
| 105 |
+
- ++trainer.optimizer.betas=[0.9,0.95]
|
| 106 |
+
- ++trainer.optimizer.eps=1e-08
|
| 107 |
+
- ++trainer.optimizer.weight_decay=1e-08
|
| 108 |
+
- ++trainer.optimizer.fused=true
|
| 109 |
+
- ++trainer.save_format=pt
|
| 110 |
+
- ++workspace_dir=WORKSPACE_DIR
|
| 111 |
+
- ++run_root_dir=results/Checkpoints
|
| 112 |
+
- ++seed=42
|
| 113 |
+
- ++wandb_entity=saberrr-zju
|
| 114 |
+
- ++wandb_project=starVLA_rl_games
|
| 115 |
+
- ++auth.env_file=null
|
| 116 |
+
- ++auth.hf_token_env=HF_TOKEN
|
| 117 |
+
- ++auth.wandb_api_key_env=WANDB_API_KEY
|
| 118 |
+
- ++paths.run_root_dir=results/Checkpoints
|
| 119 |
+
- ++paths.dataset_local_dir=data/flappy_fix_latency_2_200ep
|
| 120 |
+
- ++paths.dataset_cache_dir=null
|
| 121 |
+
- ++paths.base_model_dir=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 122 |
+
- ++paths.accelerate_config=starVLA/config/deepseeds/deepspeed_zero2.yaml
|
| 123 |
+
- ++rl_games.model_alias=openvla
|
| 124 |
+
- ++rl_games.env_eval.image_size=224
|
| 125 |
+
- ++rl_games.env_eval.frameskip=1
|
| 126 |
+
- ++rl_games.env_eval.seed=42
|
| 127 |
+
- ++rl_games.env_eval.fixed_episode_seeds=true
|
| 128 |
+
- ++rl_games.env_eval.latency_seed_stride=0
|
| 129 |
+
- ++rl_games.env_eval.task_seed_stride=0
|
| 130 |
+
- ++rl_games.env_eval.task_description=
|
| 131 |
+
- ++rl_games.env_eval.enabled=true
|
| 132 |
+
- ++rl_games.env_eval.distributed_mode=none
|
| 133 |
+
- ++rl_games.env_eval.vectorized.enabled=false
|
| 134 |
+
- ++rl_games.env_eval.vectorized.batch_size=1
|
| 135 |
+
- ++rl_games.env_eval.latency.prompt_map_path=null
|
| 136 |
+
- ++rl_games.env_eval.latency.mode=single
|
| 137 |
+
- ++rl_games.env_eval.latency.values=[0]
|
| 138 |
+
- ++rl_games.env_eval.mid_train.enabled=true
|
| 139 |
+
- ++rl_games.env_eval.mid_train.interval_steps=250
|
| 140 |
+
- ++rl_games.env_eval.mid_train.latencies=[2]
|
| 141 |
+
- ++rl_games.env_eval.mid_train.num_episodes=5
|
| 142 |
+
- ++rl_games.env_eval.mid_train.max_steps_per_episode=3600
|
| 143 |
+
- ++rl_games.env_eval.post_train.enabled=false
|
| 144 |
+
- ++rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
|
| 145 |
+
- ++rl_games.env_eval.post_train.num_episodes=5
|
| 146 |
+
- ++rl_games.env_eval.post_train.max_steps_per_episode=3600
|
| 147 |
+
- ++rl_games.task=flappy
|
| 148 |
+
- ++rl_games.initialization_mode=bridge
|
| 149 |
+
- ++rl_games.action_carrier=bridge
|
| 150 |
+
- ++bridge_base_model.repo_id.openvla=Qwen/Qwen3-VL-4B-Instruct
|
| 151 |
+
- ++bridge_base_model.repo_id.pi0=StarVLA/Qwen2.5-VL-3B-Instruct-Action
|
| 152 |
+
- ++bridge_base_model.repo_id.pi05=Qwen/Qwen3-VL-4B-Instruct
|
| 153 |
+
- ++bridge_base_model.repo_id.gr00t=Qwen/Qwen3-VL-4B-Instruct
|
| 154 |
+
- ++bridge_base_model.local_dir.openvla=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 155 |
+
- ++bridge_base_model.local_dir.pi0=playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
|
| 156 |
+
- ++bridge_base_model.local_dir.pi05=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 157 |
+
- ++bridge_base_model.local_dir.gr00t=playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 158 |
+
- ++checkpoint.load=none
|
| 159 |
+
- ++checkpoint.hf_repo_id=null
|
| 160 |
+
- ++checkpoint.save_best_model=false
|
| 161 |
+
- ++checkpoint.save_pt_file=false
|
| 162 |
+
- ++checkpoint.local.keep_last_n=1
|
| 163 |
+
- ++checkpoint.sync.enabled=false
|
| 164 |
+
- ++checkpoint.sync.repo_id=null
|
| 165 |
+
- ++checkpoint.sync.keep_last_n=0
|
| 166 |
+
- ++checkpoint.sync.sync_every_n_checkpoints=1
|
| 167 |
+
- ++checkpoint.sync.resume_policy=local_latest
|
| 168 |
+
- ++run_id=flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps
|
| 169 |
+
- ++output_dir=null
|
| 170 |
+
- ++config_yaml=null
|
| 171 |
+
- ++is_debug=false
|
| 172 |
+
- ++version_id=0.21
|
| 173 |
+
- ++run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
|
| 174 |
+
- ++trainer.is_resume=false
|
| 175 |
+
- ++trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
|
| 176 |
+
- ++trainer.resume_step=0
|
| 177 |
+
- ++datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
|
| 178 |
+
- ++datasets.vla_data.data_mix=flappy_train__bridge
|
| 179 |
+
- ++datasets.vla_data.eval_data_mix=flappy_train__bridge__val
|
| 180 |
+
- ++framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
|
| 181 |
+
- ++rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/hydra/train_starvla_hydra.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
flappy_fix_latency_2_200ep_full_tuning_corrected_bs32_3k_steps/summary.jsonl
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 500}
|
| 2 |
+
{"steps": 1000}
|
| 3 |
+
{"steps": 1500}
|
| 4 |
+
{"steps": 2000}
|
| 5 |
+
{"steps": 2500}
|
| 6 |
+
{"steps": 3000}
|