Caesarrr commited on 7 days ago

Commit

d8e0eac

verified ·

1 Parent(s): 52480ff

Upload folder using huggingface_hub

Browse files

Files changed (33) hide show

flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt +3 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/model.safetensors +3 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/optimizer.bin +3 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/random_states_0.pkl +3 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/config.full.yaml +219 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/config.yaml +99 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics.json +127 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics_eval.json +127 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1000.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1250.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1500.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1750.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2000.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2250.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_250.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2500.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2750.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3000.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3250.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3500.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3750.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4000.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4250.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4500.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4750.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_500.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_5000.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_750.json +63 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/config.yaml +217 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/hydra.yaml +266 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/overrides.yaml +106 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/train_starvla_hydra.log +0 -0
flappy_fix_latency_2_200ep_full_tuning_corrected/summary.jsonl +10 -0

flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:371cb744227687bb99bcad7f9ff2250cf06da75631359ad3eba4c6bc52570607
+size 9785060316

flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff343b81e73667493a90c8c0696872ab7f3ed1bf55fae59cf8b57f9785b040eb
+size 9138230516

flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/optimizer.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1a7b363b24f7908954dadea5afd7d198c03fc118e223f915b959f6d4d178060
+size 18276885098

flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/steps_5000_state/random_states_0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e27bbcb30ed5d67c406a486689500f03328292ad932e058af8734a4eacb28fea
+size 14821

flappy_fix_latency_2_200ep_full_tuning_corrected/config.full.yaml ADDED Viewed

	@@ -0,0 +1,219 @@

+framework:
+  name: QwenOFT
+  qwenvl:
+    base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    attn_implementation: flash_attention_2
+    enable_gradient_checkpointing: true
+  action_model:
+    action_model_type: MLP
+    action_dim: 7
+    action_hidden_dim: 2560
+    future_action_window_size: 0
+    past_action_window_size: 0
+    loss_type: discrete_ce
+    state_dim: 7
+    action_horizon: 1
+    action_env_dim: 2
+datasets:
+  vla_data:
+    dataset_py: lerobot_datasets
+    include_state: true
+    data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
+    data_mix: flappy_train__bridge
+    eval_data_mix: flappy_train__bridge__val
+    custom_mixtures_path: null
+    action_type: discrete
+    sequential_step_sampling: false
+    eval_sequential_step_sampling: null
+    num_workers: 8
+    eval_num_workers: 8
+    prefetch_factor: 4
+    persistent_workers: true
+    pin_memory: true
+    shuffle: true
+    action_balance:
+      enabled: false
+      strategy: balanced_epoch
+      action_key: action_id
+      target_flap_fraction: 0.3
+      noop_id: 0
+      flap_id: 1
+    latency_curriculum:
+      enabled: false
+      strategy: exclusive
+      latencies: null
+      phase_steps: null
+    per_device_batch_size: 32
+    load_all_data_for_training: true
+    num_obs_frames: 1
+    image_mode: single
+    stitch_grid:
+    - 2
+    - 2
+    obs_image_size: null
+    video_backend: torchvision_av
+dataset:
+  source_hf: ''
+  config_name: null
+  source_subdir: null
+  converted_name: flappy_train
+  single_source_hf: ''
+  mixed_source_hf: ''
+  single_converted_name: flappy_train
+  mixed_converted_name: flappy_mixed_latency_train
+  single_latency_filter: null
+  mixed_latency_filter: null
+  force_download: false
+  setup_force: false
+  skip_verification: false
+  verify_rows: 200
+  max_episodes: null
+  episodes_per_latency: null
+  latency_filter: null
+  debug_subset:
+    enabled: false
+    max_episodes: 5
+    suffix: debug
+base_model:
+  repo_id: Qwen/Qwen3-VL-4B-Instruct
+initialization:
+  checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
+  checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
+  checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
+trainer:
+  max_train_steps: 5000
+  num_warmup_steps: 100
+  save_interval: 500
+  eval_interval: 100
+  eval_num_batches: 100
+  per_latency_eval_num_batches: null
+  eval_action_classification: true
+  eval_action_classification_interval: null
+  cc_f1_tolerance: 1
+  learning_rate:
+    base: 2.0e-05
+    qwen_vl_interface: 1.0e-05
+    action_model: 0.0001
+  lr_scheduler_type: cosine_with_min_lr
+  scheduler_specific_kwargs:
+    min_lr: 1.0e-06
+  freeze_modules: ''
+  freeze_llm_bottom_ratio: 0.0
+  loss_scale:
+    vla: 1.0
+    vlm: 0.1
+  max_grad_norm: 1.0
+  weight_decay: 0.0
+  logging_frequency: 1
+  gradient_clipping: 1.0
+  gradient_accumulation_steps: 4
+  distributed_backend: none
+  is_resume: false
+  pretrained_checkpoint: null
+  resume_step: 0
+  reload_modules: null
+  optimizer:
+    name: AdamW
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+    fused: true
+  save_format: pt
+workspace_dir: WORKSPACE_DIR
+run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
+seed: 42
+wandb_entity: saberrr-zju
+wandb_project: starVLA_rl_games
+auth:
+  env_file: null
+  hf_token_env: HF_TOKEN
+  wandb_api_key_env: WANDB_API_KEY
+paths:
+  run_root_dir: results/Checkpoints
+  dataset_local_dir: playground/Datasets/rl_games
+  dataset_cache_dir: null
+  base_model_dir: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+  accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
+launch:
+  use_accelerate: true
+  gpus: null
+  num_processes: 1
+  dry_run: false
+conda:
+  enabled: true
+  env_name: null
+rl_games:
+  model_alias: openvla
+  env_eval:
+    image_size: 224
+    frameskip: 1
+    seed: 42
+    fixed_episode_seeds: true
+    latency_seed_stride: 0
+    task_seed_stride: 0
+    task_description: ''
+    enabled: true
+    distributed_mode: none
+    vectorized:
+      enabled: false
+      batch_size: 1
+    latency:
+      prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
+      mode: single
+      values:
+      - 0
+    mid_train:
+      enabled: true
+      interval_steps: 250
+      latencies:
+      - 2
+      num_episodes: 5
+      max_steps_per_episode: 3600
+    post_train:
+      enabled: false
+      latencies:
+      - 0
+      - 1
+      - 2
+      - 3
+      - 4
+      num_episodes: 5
+      max_steps_per_episode: 3600
+  task: flappy
+  initialization_mode: bridge
+  action_carrier: bridge
+model: openvla
+env: flappy
+init: bridge
+bridge_base_model:
+  repo_id:
+    openvla: Qwen/Qwen3-VL-4B-Instruct
+    pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
+    pi05: Qwen/Qwen3-VL-4B-Instruct
+    gr00t: Qwen/Qwen3-VL-4B-Instruct
+  local_dir:
+    openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
+    pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+mode: single
+checkpoint:
+  load: auto
+  hf_repo_id: null
+  save_best_model: false
+  save_pt_file: false
+  local:
+    keep_last_n: 1
+  sync:
+    enabled: false
+    repo_id: null
+    keep_last_n: 0
+    sync_every_n_checkpoints: 1
+    resume_policy: local_latest
+run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
+output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected
+config_yaml: null
+is_debug: false
+version_id: '0.21'

flappy_fix_latency_2_200ep_full_tuning_corrected/config.yaml ADDED Viewed

	@@ -0,0 +1,99 @@

+checkpoint:
+  local:
+    keep_last_n: 1
+  save_best_model: false
+  save_pt_file: false
+  sync:
+    enabled: false
+    keep_last_n: 0
+    repo_id: null
+datasets:
+  vla_data:
+    data_mix: flappy_train__bridge
+    dataset_py: lerobot_datasets
+    eval_data_mix: flappy_train__bridge__val
+    eval_num_workers: 8
+    include_state: true
+    latency_curriculum:
+      enabled: false
+    obs_image_size: null
+    per_device_batch_size: 32
+    persistent_workers: true
+    pin_memory: true
+    prefetch_factor: 4
+framework:
+  action_model:
+    action_dim: 7
+    action_env_dim: 2
+    action_hidden_dim: 2560
+    action_horizon: 1
+    action_model_type: MLP
+    loss_type: discrete_ce
+    state_dim: 7
+  name: QwenOFT
+  qwenvl:
+    attn_implementation: flash_attention_2
+    base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    enable_gradient_checkpointing: true
+output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected
+rl_games:
+  env_eval:
+    distributed_mode: none
+    enabled: true
+    fixed_episode_seeds: true
+    frameskip: 1
+    image_size: 224
+    latency:
+      prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
+    latency_seed_stride: 0
+    mid_train:
+      enabled: true
+      interval_steps: 250
+      latencies:
+      - 2
+      max_steps_per_episode: 3600
+      num_episodes: 5
+    seed: 42
+    task_description: ''
+    task_seed_stride: 0
+    vectorized:
+      enabled: false
+  model_alias: openvla
+  task: flappy
+run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
+run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
+seed: 42
+trainer:
+  distributed_backend: none
+  eval_action_classification: true
+  eval_action_classification_interval: null
+  eval_interval: 100
+  eval_num_batches: 100
+  freeze_llm_bottom_ratio: 0.0
+  freeze_modules: ''
+  gradient_accumulation_steps: 4
+  gradient_clipping: 1.0
+  is_resume: false
+  learning_rate:
+    action_model: 0.0001
+    base: 2.0e-05
+    qwen_vl_interface: 1.0e-05
+  logging_frequency: 1
+  lr_scheduler_type: cosine_with_min_lr
+  max_train_steps: 5000
+  num_warmup_steps: 100
+  optimizer:
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    fused: true
+    weight_decay: 1.0e-08
+  per_latency_eval_num_batches: null
+  pretrained_checkpoint: null
+  reload_modules: null
+  save_interval: 500
+  scheduler_specific_kwargs:
+    min_lr: 1.0e-06
+wandb_entity: saberrr-zju
+wandb_project: starVLA_rl_games

flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,127 @@

+{
+  "new_embodiment": {
+    "action": {
+      "mean": [
+        0.6028500199317932,
+        0.3971499800682068,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "std": [
+        0.4890792667865753,
+        0.4890792667865753,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "max": [
+        1.0,
+        1.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "min": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q01": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q99": [
+        1.0,
+        1.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true
+      ]
+    },
+    "state": {
+      "mean": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "std": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "max": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "min": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q01": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q99": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ]
+    },
+    "num_transitions": 330734,
+    "num_trajectories": 180
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/dataset_statistics_eval.json ADDED Viewed

	@@ -0,0 +1,127 @@

+{
+  "new_embodiment": {
+    "action": {
+      "mean": [
+        0.7959861159324646,
+        0.2040138840675354,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "std": [
+        0.4030573070049286,
+        0.4030573070049286,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "max": [
+        1.0,
+        1.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "min": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q01": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q99": [
+        1.0,
+        1.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true
+      ]
+    },
+    "state": {
+      "mean": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "std": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "max": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "min": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q01": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "q99": [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ]
+    },
+    "num_transitions": 72000,
+    "num_trajectories": 20
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1000.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 79.39999999999868,
+      "mean_length": 659.2,
+      "std_reward": 62.80047770518764,
+      "std_length": 506.935656666603,
+      "episode_rewards": [
+        8.399999999999986,
+        31.900000000000162,
+        171.8999999999956,
+        50.40000000000037,
+        134.39999999999728
+      ],
+      "episode_lengths": [
+        86,
+        276,
+        1406,
+        425,
+        1103
+      ],
+      "decoded_action_hist": {
+        "0": 2647,
+        "1": 649
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 1000,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 79.39999999999868,
+    "mean_length": 659.2,
+    "std_reward": 62.80047770518764,
+    "std_length": 506.935656666603,
+    "task_count": 1,
+    "macro_mean_reward": 79.39999999999868,
+    "macro_mean_length": 659.2,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1250.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 335.94000000001523,
+      "mean_length": 2724.8,
+      "std_reward": 154.00615052654007,
+      "std_length": 1240.415317544894,
+      "episode_rewards": [
+        444.60000000002566,
+        295.49999999999903,
+        444.60000000002566,
+        444.60000000002566,
+        50.40000000000037
+      ],
+      "episode_lengths": [
+        3600,
+        2399,
+        3600,
+        3600,
+        425
+      ],
+      "decoded_action_hist": {
+        "0": 10894,
+        "1": 2730
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 1250,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 335.94000000001523,
+    "mean_length": 2724.8,
+    "std_reward": 154.00615052654007,
+    "std_length": 1240.415317544894,
+    "task_count": 1,
+    "macro_mean_reward": 335.94000000001523,
+    "macro_mean_length": 2724.8,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1500.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 292.5400000000077,
+      "mean_length": 2375.8,
+      "std_reward": 145.61193082987145,
+      "std_length": 1173.9889948376858,
+      "episode_rewards": [
+        52.70000000000039,
+        444.60000000002566,
+        444.60000000002566,
+        269.8999999999946,
+        250.89999999999208
+      ],
+      "episode_lengths": [
+        439,
+        3600,
+        3600,
+        2197,
+        2043
+      ],
+      "decoded_action_hist": {
+        "0": 9500,
+        "1": 2379
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 1500,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 292.5400000000077,
+    "mean_length": 2375.8,
+    "std_reward": 145.61193082987145,
+    "std_length": 1173.9889948376858,
+    "task_count": 1,
+    "macro_mean_reward": 292.5400000000077,
+    "macro_mean_length": 2375.8,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_1750.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 390.9600000000196,
+      "mean_length": 3168.4,
+      "std_reward": 107.2800000000121,
+      "std_length": 863.2,
+      "episode_rewards": [
+        444.60000000002566,
+        176.3999999999954,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        3600,
+        1442,
+        3600,
+        3600,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 12633,
+        "1": 3209
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 1750,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 390.9600000000196,
+    "mean_length": 3168.4,
+    "std_reward": 107.2800000000121,
+    "std_length": 863.2,
+    "task_count": 1,
+    "macro_mean_reward": 390.9600000000196,
+    "macro_mean_length": 3168.4,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2000.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 282.3200000000154,
+      "mean_length": 2293.0,
+      "std_reward": 199.98049304870963,
+      "std_length": 1610.6818431956076,
+      "episode_rewards": [
+        73.89999999999999,
+        444.60000000002566,
+        444.60000000002566,
+        3.8999999999999986,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        615,
+        3600,
+        3600,
+        50,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 9138,
+        "1": 2327
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 2000,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 282.3200000000154,
+    "mean_length": 2293.0,
+    "std_reward": 199.98049304870963,
+    "std_length": 1610.6818431956076,
+    "task_count": 1,
+    "macro_mean_reward": 282.3200000000154,
+    "macro_mean_length": 2293.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2250.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 444.60000000002566,
+      "mean_length": 3600.0,
+      "std_reward": 0.0,
+      "std_length": 0.0,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 14346,
+        "1": 3654
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 2250,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 444.60000000002566,
+    "mean_length": 3600.0,
+    "std_reward": 0.0,
+    "std_length": 0.0,
+    "task_count": 1,
+    "macro_mean_reward": 444.60000000002566,
+    "macro_mean_length": 3600.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_250.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 65.11999999999905,
+      "mean_length": 543.4,
+      "std_reward": 57.274371231815906,
+      "std_length": 461.5935874771225,
+      "episode_rewards": [
+        3.8999999999999986,
+        132.49999999999739,
+        54.90000000000042,
+        3.8999999999999986,
+        130.39999999999745
+      ],
+      "episode_lengths": [
+        50,
+        1084,
+        461,
+        50,
+        1072
+      ],
+      "decoded_action_hist": {
+        "0": 2142,
+        "1": 575
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 250,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 65.11999999999905,
+    "mean_length": 543.4,
+    "std_reward": 57.274371231815906,
+    "std_length": 461.5935874771225,
+    "task_count": 1,
+    "macro_mean_reward": 65.11999999999905,
+    "macro_mean_length": 543.4,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2500.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 370.4600000000205,
+      "mean_length": 3003.0,
+      "std_reward": 148.28000000001026,
+      "std_length": 1194.0,
+      "episode_rewards": [
+        73.89999999999999,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        615,
+        3600,
+        3600,
+        3600,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 11891,
+        "1": 3124
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 2500,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 370.4600000000205,
+    "mean_length": 3003.0,
+    "std_reward": 148.28000000001026,
+    "std_length": 1194.0,
+    "task_count": 1,
+    "macro_mean_reward": 370.4600000000205,
+    "macro_mean_length": 3003.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_2750.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 218.3800000000104,
+      "mean_length": 1778.2,
+      "std_reward": 184.83810646077453,
+      "std_length": 1488.5559982748382,
+      "episode_rewards": [
+        73.89999999999999,
+        444.60000000002566,
+        54.90000000000042,
+        444.60000000002566,
+        73.89999999999999
+      ],
+      "episode_lengths": [
+        615,
+        3600,
+        461,
+        3600,
+        615
+      ],
+      "decoded_action_hist": {
+        "0": 7081,
+        "1": 1810
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 2750,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 218.3800000000104,
+    "mean_length": 1778.2,
+    "std_reward": 184.83810646077453,
+    "std_length": 1488.5559982748382,
+    "task_count": 1,
+    "macro_mean_reward": 218.3800000000104,
+    "macro_mean_length": 1778.2,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3000.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 317.72000000001435,
+      "mean_length": 2578.6,
+      "std_reward": 162.01944821534275,
+      "std_length": 1304.6114517357264,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        54.90000000000042,
+        199.89999999999435,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        461,
+        1632,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 10219,
+        "1": 2674
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 3000,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 317.72000000001435,
+    "mean_length": 2578.6,
+    "std_reward": 162.01944821534275,
+    "std_length": 1304.6114517357264,
+    "task_count": 1,
+    "macro_mean_reward": 317.72000000001435,
+    "macro_mean_length": 2578.6,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3250.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 365.76000000002057,
+      "mean_length": 2965.0,
+      "std_reward": 157.6800000000101,
+      "std_length": 1270.0,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        50.40000000000037
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        425
+      ],
+      "decoded_action_hist": {
+        "0": 11775,
+        "1": 3050
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 3250,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 365.76000000002057,
+    "mean_length": 2965.0,
+    "std_reward": 157.6800000000101,
+    "std_length": 1270.0,
+    "task_count": 1,
+    "macro_mean_reward": 365.76000000002057,
+    "macro_mean_length": 2965.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3500.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 408.66000000001924,
+      "mean_length": 3311.2,
+      "std_reward": 71.8800000000128,
+      "std_length": 577.6,
+      "episode_rewards": [
+        264.89999999999367,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        2156,
+        3600,
+        3600,
+        3600,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 13206,
+        "1": 3350
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 3500,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 408.66000000001924,
+    "mean_length": 3311.2,
+    "std_reward": 71.8800000000128,
+    "std_length": 577.6,
+    "task_count": 1,
+    "macro_mean_reward": 408.66000000001924,
+    "macro_mean_length": 3311.2,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_3750.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 442.26000000002523,
+      "mean_length": 3582.4,
+      "std_reward": 4.680000000000791,
+      "std_length": 35.2,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        432.9000000000237
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        3512
+      ],
+      "decoded_action_hist": {
+        "0": 14270,
+        "1": 3642
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 3750,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 442.26000000002523,
+    "mean_length": 3582.4,
+    "std_reward": 4.680000000000791,
+    "std_length": 35.2,
+    "task_count": 1,
+    "macro_mean_reward": 442.26000000002523,
+    "macro_mean_length": 3582.4,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4000.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 442.26000000002523,
+      "mean_length": 3582.4,
+      "std_reward": 4.680000000000791,
+      "std_length": 35.2,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        432.9000000000237
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        3512
+      ],
+      "decoded_action_hist": {
+        "0": 14256,
+        "1": 3656
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 4000,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 442.26000000002523,
+    "mean_length": 3582.4,
+    "std_reward": 4.680000000000791,
+    "std_length": 35.2,
+    "task_count": 1,
+    "macro_mean_reward": 442.26000000002523,
+    "macro_mean_length": 3582.4,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4250.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 444.60000000002566,
+      "mean_length": 3600.0,
+      "std_reward": 0.0,
+      "std_length": 0.0,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 14338,
+        "1": 3662
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 4250,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 444.60000000002566,
+    "mean_length": 3600.0,
+    "std_reward": 0.0,
+    "std_length": 0.0,
+    "task_count": 1,
+    "macro_mean_reward": 444.60000000002566,
+    "macro_mean_length": 3600.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4500.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 365.76000000002057,
+      "mean_length": 2965.0,
+      "std_reward": 157.6800000000101,
+      "std_length": 1270.0,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        50.40000000000037
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        425
+      ],
+      "decoded_action_hist": {
+        "0": 11866,
+        "1": 2959
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 4500,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 365.76000000002057,
+    "mean_length": 2965.0,
+    "std_reward": 157.6800000000101,
+    "std_length": 1270.0,
+    "task_count": 1,
+    "macro_mean_reward": 365.76000000002057,
+    "macro_mean_length": 2965.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_4750.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 444.60000000002566,
+      "mean_length": 3600.0,
+      "std_reward": 0.0,
+      "std_length": 0.0,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 14315,
+        "1": 3685
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 4750,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 444.60000000002566,
+    "mean_length": 3600.0,
+    "std_reward": 0.0,
+    "std_length": 0.0,
+    "task_count": 1,
+    "macro_mean_reward": 444.60000000002566,
+    "macro_mean_length": 3600.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_500.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 176.28000000000358,
+      "mean_length": 1440.4,
+      "std_reward": 163.1896246702057,
+      "std_length": 1314.9364395285422,
+      "episode_rewards": [
+        270.7999999999948,
+        18.200000000000014,
+        17.90000000000001,
+        129.89999999999748,
+        444.60000000002566
+      ],
+      "episode_lengths": [
+        2206,
+        166,
+        163,
+        1067,
+        3600
+      ],
+      "decoded_action_hist": {
+        "0": 5754,
+        "1": 1448
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 500,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 176.28000000000358,
+    "mean_length": 1440.4,
+    "std_reward": 163.1896246702057,
+    "std_length": 1314.9364395285422,
+    "task_count": 1,
+    "macro_mean_reward": 176.28000000000358,
+    "macro_mean_length": 1440.4,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_5000.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 442.26000000002523,
+      "mean_length": 3582.4,
+      "std_reward": 4.680000000000791,
+      "std_length": 35.2,
+      "episode_rewards": [
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        444.60000000002566,
+        432.9000000000237
+      ],
+      "episode_lengths": [
+        3600,
+        3600,
+        3600,
+        3600,
+        3512
+      ],
+      "decoded_action_hist": {
+        "0": 14239,
+        "1": 3673
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 5000,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 442.26000000002523,
+    "mean_length": 3582.4,
+    "std_reward": 4.680000000000791,
+    "std_length": 35.2,
+    "task_count": 1,
+    "macro_mean_reward": 442.26000000002523,
+    "macro_mean_length": 3582.4,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/eval/mid_train/step_750.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "per_latency": {
+    "flappy/latency_2": {
+      "latency": 2,
+      "num_episodes": 5,
+      "mean_reward": 87.69999999999818,
+      "mean_length": 726.0,
+      "std_reward": 100.46123630535064,
+      "std_length": 810.7465695271242,
+      "episode_rewards": [
+        264.89999999999367,
+        22.40000000000006,
+        12.899999999999974,
+        3.8999999999999986,
+        134.39999999999728
+      ],
+      "episode_lengths": [
+        2156,
+        199,
+        122,
+        50,
+        1103
+      ],
+      "decoded_action_hist": {
+        "0": 2897,
+        "1": 733
+      },
+      "fixed_episode_seeds": true,
+      "eval_seed": 42,
+      "episode_seeds": [
+        42,
+        43,
+        44,
+        45,
+        46
+      ],
+      "episode_indices": [
+        0,
+        1,
+        2,
+        3,
+        4
+      ]
+    }
+  },
+  "aggregate": {
+    "stage": "mid_train",
+    "step": 750,
+    "task": "flappy",
+    "model_alias": "openvla",
+    "fixed_episode_seeds": true,
+    "eval_seed": 42,
+    "total_episodes": 5,
+    "mean_reward": 87.69999999999818,
+    "mean_length": 726.0,
+    "std_reward": 100.46123630535064,
+    "std_length": 810.7465695271242,
+    "task_count": 1,
+    "macro_mean_reward": 87.69999999999818,
+    "macro_mean_length": 726.0,
+    "distributed_eval": false
+  }
+}

flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,217 @@

+framework:
+  qwenvl:
+    base_vlm: /workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    attn_implementation: flash_attention_2
+    enable_gradient_checkpointing: true
+  action_model:
+    state_dim: 7
+    loss_type: discrete_ce
+    action_horizon: 1
+    future_action_window_size: 0
+    past_action_window_size: 0
+    action_dim: 7
+    action_env_dim: 2
+  name: QwenOFT
+datasets:
+  vla_data:
+    dataset_py: lerobot_datasets
+    include_state: true
+    data_root_dir: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
+    data_mix: flappy_train__bridge
+    eval_data_mix: flappy_train__bridge__val
+    custom_mixtures_path: null
+    action_type: discrete
+    sequential_step_sampling: false
+    eval_sequential_step_sampling: null
+    num_workers: 8
+    eval_num_workers: 8
+    prefetch_factor: 4
+    persistent_workers: true
+    pin_memory: true
+    shuffle: true
+    action_balance:
+      enabled: false
+      strategy: balanced_epoch
+      action_key: action_id
+      target_flap_fraction: 0.3
+      noop_id: 0
+      flap_id: 1
+    latency_curriculum:
+      enabled: false
+      strategy: exclusive
+      latencies: null
+      phase_steps: null
+    per_device_batch_size: 32
+    load_all_data_for_training: true
+    num_obs_frames: 1
+    image_mode: single
+    stitch_grid:
+    - 2
+    - 2
+    obs_image_size: null
+    video_backend: torchvision_av
+dataset:
+  source_hf: ${dataset.single_source_hf}
+  config_name: null
+  source_subdir: null
+  converted_name: ${dataset.single_converted_name}
+  single_source_hf: ''
+  mixed_source_hf: ''
+  single_converted_name: flappy_train
+  mixed_converted_name: flappy_mixed_latency_train
+  single_latency_filter: null
+  mixed_latency_filter: null
+  force_download: false
+  setup_force: false
+  skip_verification: false
+  verify_rows: 200
+  max_episodes: null
+  episodes_per_latency: null
+  latency_filter: ${dataset.single_latency_filter}
+  debug_subset:
+    enabled: false
+    max_episodes: 5
+    suffix: debug
+base_model:
+  repo_id: ${bridge_base_model.repo_id.${model}}
+initialization:
+  checkpoint_local_dir: playground/Pretrained_models/Qwen3VL-OFT-Bridge-RT-1
+  checkpoint_hf_repo_id: StarVLA/Qwen3VL-OFT-Bridge-RT-1
+  checkpoint_filename: checkpoints/steps_5000_pytorch_model.pt
+trainer:
+  max_train_steps: 5000
+  num_warmup_steps: 100
+  save_interval: 500
+  eval_interval: 100
+  eval_num_batches: 100
+  per_latency_eval_num_batches: null
+  eval_action_classification: true
+  eval_action_classification_interval: null
+  cc_f1_tolerance: 1
+  learning_rate:
+    base: 2.0e-05
+    qwen_vl_interface: 1.0e-05
+    action_model: 0.0001
+  lr_scheduler_type: cosine_with_min_lr
+  scheduler_specific_kwargs:
+    min_lr: 1.0e-06
+  freeze_modules: ''
+  freeze_llm_bottom_ratio: 0.0
+  loss_scale:
+    vla: 1.0
+    vlm: 0.1
+  max_grad_norm: 1.0
+  weight_decay: 0.0
+  logging_frequency: 1
+  gradient_clipping: 1.0
+  gradient_accumulation_steps: 4
+  distributed_backend: none
+  is_resume: false
+  pretrained_checkpoint: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
+  resume_step: 0
+  reload_modules: null
+  optimizer:
+    name: AdamW
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+    fused: true
+  save_format: pt
+workspace_dir: WORKSPACE_DIR
+run_root_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints
+seed: 42
+wandb_entity: saberrr-zju
+wandb_project: starVLA_rl_games
+auth:
+  env_file: null
+  hf_token_env: HF_TOKEN
+  wandb_api_key_env: WANDB_API_KEY
+paths:
+  run_root_dir: results/Checkpoints
+  dataset_local_dir: playground/Datasets/rl_games
+  dataset_cache_dir: null
+  base_model_dir: ${bridge_base_model.local_dir.${model}}
+  accelerate_config: starVLA/config/deepseeds/deepspeed_zero2.yaml
+launch:
+  use_accelerate: true
+  gpus: null
+  num_processes: 1
+  dry_run: false
+conda:
+  enabled: true
+  env_name: null
+rl_games:
+  model_alias: openvla
+  env_eval:
+    image_size: 224
+    frameskip: 1
+    seed: 42
+    fixed_episode_seeds: true
+    latency_seed_stride: 0
+    task_seed_stride: 0
+    task_description: ''
+    enabled: true
+    distributed_mode: none
+    vectorized:
+      enabled: false
+      batch_size: 1
+    latency:
+      prompt_map_path: /workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
+      mode: single
+      values:
+      - 0
+    mid_train:
+      enabled: true
+      interval_steps: 250
+      latencies:
+      - 2
+      num_episodes: 5
+      max_steps_per_episode: 3600
+    post_train:
+      enabled: false
+      latencies:
+      - 0
+      - 1
+      - 2
+      - 3
+      - 4
+      num_episodes: 5
+      max_steps_per_episode: 3600
+  task: flappy
+  initialization_mode: bridge
+  action_carrier: bridge
+model: openvla
+env: flappy
+init: bridge
+bridge_base_model:
+  repo_id:
+    openvla: Qwen/Qwen3-VL-4B-Instruct
+    pi0: StarVLA/Qwen2.5-VL-3B-Instruct-Action
+    pi05: Qwen/Qwen3-VL-4B-Instruct
+    gr00t: Qwen/Qwen3-VL-4B-Instruct
+  local_dir:
+    openvla: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    pi0: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
+    pi05: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    gr00t: playground/Pretrained_models/Qwen3-VL-4B-Instruct
+mode: single
+checkpoint:
+  load: auto
+  hf_repo_id: null
+  save_best_model: false
+  save_pt_file: false
+  local:
+    keep_last_n: 1
+  sync:
+    enabled: false
+    repo_id: null
+    keep_last_n: 0
+    sync_every_n_checkpoints: 1
+    resume_policy: local_latest
+run_id: flappy_fix_latency_2_200ep_full_tuning_corrected
+output_dir: null
+config_yaml: null
+is_debug: false
+version_id: '0.21'

flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,266 @@

+hydra:
+  run:
+    dir: ${run_root_dir}/${run_id}/hydra
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - model=openvla
+    - env=flappy
+    - init=bridge
+    - mode=single
+    - run_id=flappy_fix_latency_2_200ep_full_tuning_corrected
+    - run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
+    - seed=42
+    - wandb_entity=saberrr-zju
+    - wandb_project=starVLA_rl_games
+    - rl_games.env_eval.enabled=true
+    - checkpoint.sync.enabled=false
+    - checkpoint.sync.keep_last_n=0
+    - checkpoint.local.keep_last_n=1
+    - checkpoint.save_best_model=false
+    - checkpoint.save_pt_file=false
+    - trainer.is_resume=false
+    - trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
+    - trainer.resume_step=0
+    - trainer.max_train_steps=5000
+    - trainer.num_warmup_steps=100
+    - trainer.save_interval=500
+    - trainer.eval_interval=100
+    - trainer.eval_num_batches=100
+    - trainer.eval_action_classification=true
+    - trainer.logging_frequency=1
+    - trainer.gradient_accumulation_steps=4
+    - trainer.distributed_backend=none
+    - trainer.learning_rate.base=2e-05
+    - trainer.learning_rate.qwen_vl_interface=1e-05
+    - trainer.learning_rate.action_model=0.0001
+    - trainer.lr_scheduler_type=cosine_with_min_lr
+    - trainer.scheduler_specific_kwargs.min_lr=1e-06
+    - trainer.freeze_llm_bottom_ratio=0.0
+    - trainer.loss_scale.vla=1.0
+    - trainer.loss_scale.vlm=0.1
+    - trainer.max_grad_norm=1.0
+    - trainer.weight_decay=0.0
+    - trainer.gradient_clipping=1.0
+    - trainer.optimizer.name=AdamW
+    - trainer.optimizer.betas=[0.9,0.95]
+    - trainer.optimizer.eps=1e-08
+    - trainer.optimizer.weight_decay=1e-08
+    - trainer.optimizer.fused=true
+    - trainer.save_format=pt
+    - framework.name=QwenOFT
+    - framework.qwenvl.attn_implementation=flash_attention_2
+    - framework.qwenvl.enable_gradient_checkpointing=true
+    - framework.action_model.action_dim=7
+    - framework.action_model.action_env_dim=2
+    - framework.action_model.state_dim=7
+    - framework.action_model.loss_type=discrete_ce
+    - framework.action_model.action_horizon=1
+    - framework.action_model.future_action_window_size=0
+    - framework.action_model.past_action_window_size=0
+    - datasets.vla_data.include_state=true
+    - datasets.vla_data.action_type=discrete
+    - datasets.vla_data.sequential_step_sampling=false
+    - datasets.vla_data.shuffle=true
+    - datasets.vla_data.num_workers=8
+    - datasets.vla_data.eval_num_workers=8
+    - datasets.vla_data.prefetch_factor=4
+    - datasets.vla_data.persistent_workers=true
+    - datasets.vla_data.pin_memory=true
+    - datasets.vla_data.action_balance.enabled=false
+    - datasets.vla_data.action_balance.strategy=balanced_epoch
+    - datasets.vla_data.action_balance.action_key=action_id
+    - datasets.vla_data.action_balance.target_flap_fraction=0.3
+    - datasets.vla_data.action_balance.noop_id=0
+    - datasets.vla_data.action_balance.flap_id=1
+    - datasets.vla_data.latency_curriculum.enabled=false
+    - datasets.vla_data.latency_curriculum.strategy=exclusive
+    - datasets.vla_data.per_device_batch_size=32
+    - datasets.vla_data.num_workers=8
+    - datasets.vla_data.eval_num_workers=8
+    - datasets.vla_data.prefetch_factor=4
+    - datasets.vla_data.persistent_workers=true
+    - datasets.vla_data.pin_memory=true
+    - datasets.vla_data.load_all_data_for_training=true
+    - datasets.vla_data.video_backend=torchvision_av
+    - datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
+    - datasets.vla_data.data_mix=flappy_train__bridge
+    - datasets.vla_data.eval_data_mix=flappy_train__bridge__val
+    - framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
+    - rl_games.task=flappy
+    - rl_games.model_alias=openvla
+    - rl_games.initialization_mode=bridge
+    - rl_games.action_carrier=bridge
+    - rl_games.env_eval.distributed_mode=none
+    - rl_games.env_eval.latency.mode=single
+    - rl_games.env_eval.frameskip=1
+    - rl_games.env_eval.image_size=224
+    - rl_games.env_eval.seed=42
+    - rl_games.env_eval.fixed_episode_seeds=true
+    - rl_games.env_eval.latency_seed_stride=0
+    - rl_games.env_eval.task_seed_stride=0
+    - rl_games.env_eval.latency.values=[0]
+    - rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
+    - rl_games.env_eval.mid_train.enabled=true
+    - rl_games.env_eval.mid_train.interval_steps=250
+    - rl_games.env_eval.mid_train.num_episodes=5
+    - rl_games.env_eval.mid_train.max_steps_per_episode=3600
+    - rl_games.env_eval.mid_train.latencies=[2]
+    - rl_games.env_eval.post_train.enabled=false
+    - rl_games.env_eval.post_train.num_episodes=5
+    - rl_games.env_eval.post_train.max_steps_per_episode=3600
+    - rl_games.env_eval.post_train.latencies=[0,1,2,3,4]
+  job:
+    name: train_starvla_hydra
+    chdir: false
+    override_dirname: checkpoint.local.keep_last_n=1,checkpoint.save_best_model=false,checkpoint.save_pt_file=false,checkpoint.sync.enabled=false,checkpoint.sync.keep_last_n=0,datasets.vla_data.action_balance.action_key=action_id,datasets.vla_data.action_balance.enabled=false,datasets.vla_data.action_balance.flap_id=1,datasets.vla_data.action_balance.noop_id=0,datasets.vla_data.action_balance.strategy=balanced_epoch,datasets.vla_data.action_balance.target_flap_fraction=0.3,datasets.vla_data.action_type=discrete,datasets.vla_data.data_mix=flappy_train__bridge,datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep,datasets.vla_data.eval_data_mix=flappy_train__bridge__val,datasets.vla_data.eval_num_workers=8,datasets.vla_data.eval_num_workers=8,datasets.vla_data.include_state=true,datasets.vla_data.latency_curriculum.enabled=false,datasets.vla_data.latency_curriculum.strategy=exclusive,datasets.vla_data.load_all_data_for_training=true,datasets.vla_data.num_workers=8,datasets.vla_data.num_workers=8,datasets.vla_data.per_device_batch_size=32,datasets.vla_data.persistent_workers=true,datasets.vla_data.persistent_workers=true,datasets.vla_data.pin_memory=true,datasets.vla_data.pin_memory=true,datasets.vla_data.prefetch_factor=4,datasets.vla_data.prefetch_factor=4,datasets.vla_data.sequential_step_sampling=false,datasets.vla_data.shuffle=true,datasets.vla_data.video_backend=torchvision_av,env=flappy,framework.action_model.action_dim=7,framework.action_model.action_env_dim=2,framework.action_model.action_horizon=1,framework.action_model.future_action_window_size=0,framework.action_model.loss_type=discrete_ce,framework.action_model.past_action_window_size=0,framework.action_model.state_dim=7,framework.name=QwenOFT,framework.qwenvl.attn_implementation=flash_attention_2,framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct,framework.qwenvl.enable_gradient_checkpointing=true,init=bridge,mode=single,model=openvla,rl_games.action_carrier=bridge,rl_games.env_eval.distributed_mode=none,rl_games.env_eval.enabled=true,rl_games.env_eval.fixed_episode_seeds=true,rl_games.env_eval.frameskip=1,rl_games.env_eval.image_size=224,rl_games.env_eval.latency.mode=single,rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json,rl_games.env_eval.latency.values=[0],rl_games.env_eval.latency_seed_stride=0,rl_games.env_eval.mid_train.enabled=true,rl_games.env_eval.mid_train.interval_steps=250,rl_games.env_eval.mid_train.latencies=[2],rl_games.env_eval.mid_train.max_steps_per_episode=3600,rl_games.env_eval.mid_train.num_episodes=5,rl_games.env_eval.post_train.enabled=false,rl_games.env_eval.post_train.latencies=[0,1,2,3,4],rl_games.env_eval.post_train.max_steps_per_episode=3600,rl_games.env_eval.post_train.num_episodes=5,rl_games.env_eval.seed=42,rl_games.env_eval.task_seed_stride=0,rl_games.initialization_mode=bridge,rl_games.model_alias=openvla,rl_games.task=flappy,run_id=flappy_fix_latency_2_200ep_full_tuning_corrected,run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints,seed=42,trainer.distributed_backend=none,trainer.eval_action_classification=true,trainer.eval_interval=100,trainer.eval_num_batches=100,trainer.freeze_llm_bottom_ratio=0.0,trainer.gradient_accumulation_steps=4,trainer.gradient_clipping=1.0,trainer.is_resume=false,trainer.learning_rate.action_model=0.0001,trainer.learning_rate.base=2e-05,trainer.learning_rate.qwen_vl_interface=1e-05,trainer.logging_frequency=1,trainer.loss_scale.vla=1.0,trainer.loss_scale.vlm=0.1,trainer.lr_scheduler_type=cosine_with_min_lr,trainer.max_grad_norm=1.0,trainer.max_train_steps=5000,trainer.num_warmup_steps=100,trainer.optimizer.betas=[0.9,0.95],trainer.optimizer.eps=1e-08,trainer.optimizer.fused=true,trainer.optimizer.name=AdamW,trainer.optimizer.weight_decay=1e-08,trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt,trainer.resume_step=0,trainer.save_format=pt,trainer.save_interval=500,trainer.scheduler_specific_kwargs.min_lr=1e-06,trainer.weight_decay=0.0,wandb_entity=saberrr-zju,wandb_project=starVLA_rl_games
+    id: ???
+    num: ???
+    config_name: train
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.3
+    version_base: '1.1'
+    cwd: /workspace/latency-sensitive-bench/starVLA
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /workspace/latency-sensitive-bench/starVLA/examples/rl_games/config
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/hydra
+    choices:
+      cross_task_setup: null
+      checkpoint: default
+      mode: single
+      init: bridge
+      env: flappy
+      model: openvla
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false

flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1,106 @@

+- model=openvla
+- env=flappy
+- init=bridge
+- mode=single
+- run_id=flappy_fix_latency_2_200ep_full_tuning_corrected
+- run_root_dir=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints
+- seed=42
+- wandb_entity=saberrr-zju
+- wandb_project=starVLA_rl_games
+- rl_games.env_eval.enabled=true
+- checkpoint.sync.enabled=false
+- checkpoint.sync.keep_last_n=0
+- checkpoint.local.keep_last_n=1
+- checkpoint.save_best_model=false
+- checkpoint.save_pt_file=false
+- trainer.is_resume=false
+- trainer.pretrained_checkpoint=/workspace/latency-sensitive-bench/starVLA/results/Checkpoints/flappy_fix_latency_2_200ep_full_tuning_corrected/checkpoints/_initialization/StarVLA_Qwen3VL-OFT-Bridge-RT-1/checkpoints/steps_5000_pytorch_model.pt
+- trainer.resume_step=0
+- trainer.max_train_steps=5000
+- trainer.num_warmup_steps=100
+- trainer.save_interval=500
+- trainer.eval_interval=100
+- trainer.eval_num_batches=100
+- trainer.eval_action_classification=true
+- trainer.logging_frequency=1
+- trainer.gradient_accumulation_steps=4
+- trainer.distributed_backend=none
+- trainer.learning_rate.base=2e-05
+- trainer.learning_rate.qwen_vl_interface=1e-05
+- trainer.learning_rate.action_model=0.0001
+- trainer.lr_scheduler_type=cosine_with_min_lr
+- trainer.scheduler_specific_kwargs.min_lr=1e-06
+- trainer.freeze_llm_bottom_ratio=0.0
+- trainer.loss_scale.vla=1.0
+- trainer.loss_scale.vlm=0.1
+- trainer.max_grad_norm=1.0
+- trainer.weight_decay=0.0
+- trainer.gradient_clipping=1.0
+- trainer.optimizer.name=AdamW
+- trainer.optimizer.betas=[0.9,0.95]
+- trainer.optimizer.eps=1e-08
+- trainer.optimizer.weight_decay=1e-08
+- trainer.optimizer.fused=true
+- trainer.save_format=pt
+- framework.name=QwenOFT
+- framework.qwenvl.attn_implementation=flash_attention_2
+- framework.qwenvl.enable_gradient_checkpointing=true
+- framework.action_model.action_dim=7
+- framework.action_model.action_env_dim=2
+- framework.action_model.state_dim=7
+- framework.action_model.loss_type=discrete_ce
+- framework.action_model.action_horizon=1
+- framework.action_model.future_action_window_size=0
+- framework.action_model.past_action_window_size=0
+- datasets.vla_data.include_state=true
+- datasets.vla_data.action_type=discrete
+- datasets.vla_data.sequential_step_sampling=false
+- datasets.vla_data.shuffle=true
+- datasets.vla_data.num_workers=8
+- datasets.vla_data.eval_num_workers=8
+- datasets.vla_data.prefetch_factor=4
+- datasets.vla_data.persistent_workers=true
+- datasets.vla_data.pin_memory=true
+- datasets.vla_data.action_balance.enabled=false
+- datasets.vla_data.action_balance.strategy=balanced_epoch
+- datasets.vla_data.action_balance.action_key=action_id
+- datasets.vla_data.action_balance.target_flap_fraction=0.3
+- datasets.vla_data.action_balance.noop_id=0
+- datasets.vla_data.action_balance.flap_id=1
+- datasets.vla_data.latency_curriculum.enabled=false
+- datasets.vla_data.latency_curriculum.strategy=exclusive
+- datasets.vla_data.per_device_batch_size=32
+- datasets.vla_data.num_workers=8
+- datasets.vla_data.eval_num_workers=8
+- datasets.vla_data.prefetch_factor=4
+- datasets.vla_data.persistent_workers=true
+- datasets.vla_data.pin_memory=true
+- datasets.vla_data.load_all_data_for_training=true
+- datasets.vla_data.video_backend=torchvision_av
+- datasets.vla_data.data_root_dir=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep
+- datasets.vla_data.data_mix=flappy_train__bridge
+- datasets.vla_data.eval_data_mix=flappy_train__bridge__val
+- framework.qwenvl.base_vlm=/workspace/latency-sensitive-bench/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
+- rl_games.task=flappy
+- rl_games.model_alias=openvla
+- rl_games.initialization_mode=bridge
+- rl_games.action_carrier=bridge
+- rl_games.env_eval.distributed_mode=none
+- rl_games.env_eval.latency.mode=single
+- rl_games.env_eval.frameskip=1
+- rl_games.env_eval.image_size=224
+- rl_games.env_eval.seed=42
+- rl_games.env_eval.fixed_episode_seeds=true
+- rl_games.env_eval.latency_seed_stride=0
+- rl_games.env_eval.task_seed_stride=0
+- rl_games.env_eval.latency.values=[0]
+- rl_games.env_eval.latency.prompt_map_path=/workspace/latency-sensitive-bench/starVLA/data/flappy_fix_latency_2_200ep/flappy_train__bridge/latency_prompt_map.json
+- rl_games.env_eval.mid_train.enabled=true
+- rl_games.env_eval.mid_train.interval_steps=250
+- rl_games.env_eval.mid_train.num_episodes=5
+- rl_games.env_eval.mid_train.max_steps_per_episode=3600
+- rl_games.env_eval.mid_train.latencies=[2]
+- rl_games.env_eval.post_train.enabled=false
+- rl_games.env_eval.post_train.num_episodes=5
+- rl_games.env_eval.post_train.max_steps_per_episode=3600
+- rl_games.env_eval.post_train.latencies=[0,1,2,3,4]

flappy_fix_latency_2_200ep_full_tuning_corrected/hydra/train_starvla_hydra.log ADDED Viewed

The diff for this file is too large to render. See raw diff

flappy_fix_latency_2_200ep_full_tuning_corrected/summary.jsonl ADDED Viewed

	@@ -0,0 +1,10 @@

+{"steps": 500}
+{"steps": 1000}
+{"steps": 1500}
+{"steps": 2000}
+{"steps": 2500}
+{"steps": 3000}
+{"steps": 3500}
+{"steps": 4000}
+{"steps": 4500}
+{"steps": 5000}