VLA-JEPA / LIBERO /config.json
ginwind's picture
Upload folder using huggingface_hub
ccfcdff verified
{
"run_id": "LIBERO",
"run_root_dir": "checkpoints",
"seed": 42,
"trackers": [
"json"
],
"is_debug": false,
"framework": {
"name": "VLA_JEPA",
"qwenvl": {
"base_vlm": "/home/dataset-local/models/Qwen3-VL-2B-Instruct",
"attn_implementation": "flash_attention_2",
"vl_hidden_dim": 2048
},
"action_model": {
"action_model_type": "DiT-B",
"action_hidden_dim": 1024,
"hidden_size": 1024,
"add_pos_embed": true,
"max_seq_len": 1024,
"action_dim": 7,
"state_dim": 8,
"future_action_window_size": 6,
"action_horizon": 7,
"past_action_window_size": 0,
"repeated_diffusion_steps": 8,
"noise_beta_alpha": 1.5,
"noise_beta_beta": 1.0,
"noise_s": 0.999,
"num_timestep_buckets": 1000,
"num_inference_timesteps": 4,
"num_target_vision_tokens": 32,
"diffusion_model_cfg": {
"cross_attention_dim": 2048,
"dropout": 0.2,
"final_dropout": true,
"interleave_self_attention": true,
"norm_type": "ada_norm",
"num_layers": 16,
"output_dim": 1024,
"positional_embeddings": null
}
},
"vj2_model": {
"base_encoder": "/home/dataset-local/models/vjepa2-vitl-fpc64-256",
"depth": 12,
"num_heads": 8,
"special_action_token": "<|action_{}|>",
"num_action_tokens_per_timestep": 8,
"embodied_action_token": "<|embodied_action|>",
"num_embodied_action_tokens_per_instruction": 32,
"num_frames": 8
},
"reduce_in_full_precision": true
},
"datasets": {
"vla_data": {
"dataset_py": "lerobot_datasets",
"data_root_dir": "/home/dataset-local/datasets/LeRobot/LEROBOT_LIBERO_DATA",
"data_mix": "libero_all",
"action_type": "delta_qpos",
"CoT_prompt": "Your task is {instruction}. Infer the temporal dynamics from frames {actions} and produce the corresponding policy actions {e_actions}.",
"resolution_size": 224,
"per_device_batch_size": 32,
"video_resolution_size": 256,
"load_all_data_for_training": true,
"with_state": true
}
},
"trainer": {
"epochs": 100,
"max_train_steps": 30000,
"num_warmup_steps": 5000,
"save_interval": 10000,
"eval_interval": 100,
"learning_rate": {
"base": 3e-05,
"qwen_vl_interface": 1e-05,
"action_model": 0.0001
},
"lr_scheduler_type": "cosine_with_min_lr",
"scheduler_specific_kwargs": {
"min_lr": 1e-06
},
"freeze_modules": "",
"loss_scale": {
"vla": 1.0,
"vlm": 0.1
},
"max_grad_norm": 1.0,
"warmup_ratio": 0.1,
"weight_decay": 0.0,
"logging_frequency": 10,
"gradient_clipping": 1.0,
"gradient_accumulation_steps": 1,
"pretrained_checkpoint": "/home/dataset-local/VLA_JEPA/checkpoints/pretrain/VLA-JEPA-pretrain.pt",
"optimizer": {
"name": "AdamW",
"betas": [
0.9,
0.95
],
"eps": 1e-08,
"weight_decay": 1e-08
},
"is_resume": false,
"resume_epoch": null,
"resume_step": null,
"enable_gradient_checkpointing": true,
"enable_mixed_precision_training": true
},
"output_dir": "checkpoints/LIBERO"
}