{ "run_id": "LIBERO", "run_root_dir": "checkpoints", "seed": 42, "trackers": [ "json" ], "is_debug": false, "framework": { "name": "VLA_JEPA", "qwenvl": { "base_vlm": "/home/dataset-local/models/Qwen3-VL-2B-Instruct", "attn_implementation": "flash_attention_2", "vl_hidden_dim": 2048 }, "action_model": { "action_model_type": "DiT-B", "action_hidden_dim": 1024, "hidden_size": 1024, "add_pos_embed": true, "max_seq_len": 1024, "action_dim": 7, "state_dim": 8, "future_action_window_size": 6, "action_horizon": 7, "past_action_window_size": 0, "repeated_diffusion_steps": 8, "noise_beta_alpha": 1.5, "noise_beta_beta": 1.0, "noise_s": 0.999, "num_timestep_buckets": 1000, "num_inference_timesteps": 4, "num_target_vision_tokens": 32, "diffusion_model_cfg": { "cross_attention_dim": 2048, "dropout": 0.2, "final_dropout": true, "interleave_self_attention": true, "norm_type": "ada_norm", "num_layers": 16, "output_dim": 1024, "positional_embeddings": null } }, "vj2_model": { "base_encoder": "/home/dataset-local/models/vjepa2-vitl-fpc64-256", "depth": 12, "num_heads": 8, "special_action_token": "<|action_{}|>", "num_action_tokens_per_timestep": 8, "embodied_action_token": "<|embodied_action|>", "num_embodied_action_tokens_per_instruction": 32, "num_frames": 8 }, "reduce_in_full_precision": true }, "datasets": { "vla_data": { "dataset_py": "lerobot_datasets", "data_root_dir": "/home/dataset-local/datasets/LeRobot/LEROBOT_LIBERO_DATA", "data_mix": "libero_all", "action_type": "delta_qpos", "CoT_prompt": "Your task is {instruction}. Infer the temporal dynamics from frames {actions} and produce the corresponding policy actions {e_actions}.", "resolution_size": 224, "per_device_batch_size": 32, "video_resolution_size": 256, "load_all_data_for_training": true, "with_state": true } }, "trainer": { "epochs": 100, "max_train_steps": 30000, "num_warmup_steps": 5000, "save_interval": 10000, "eval_interval": 100, "learning_rate": { "base": 3e-05, "qwen_vl_interface": 1e-05, "action_model": 0.0001 }, "lr_scheduler_type": "cosine_with_min_lr", "scheduler_specific_kwargs": { "min_lr": 1e-06 }, "freeze_modules": "", "loss_scale": { "vla": 1.0, "vlm": 0.1 }, "max_grad_norm": 1.0, "warmup_ratio": 0.1, "weight_decay": 0.0, "logging_frequency": 10, "gradient_clipping": 1.0, "gradient_accumulation_steps": 1, "pretrained_checkpoint": "/home/dataset-local/VLA_JEPA/checkpoints/pretrain/VLA-JEPA-pretrain.pt", "optimizer": { "name": "AdamW", "betas": [ 0.9, 0.95 ], "eps": 1e-08, "weight_decay": 1e-08 }, "is_resume": false, "resume_epoch": null, "resume_step": null, "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true }, "output_dir": "checkpoints/LIBERO" }