ginwind
/

VLA-JEPA

Model card Files Files and versions

VLA-JEPA / LIBERO /config.json

ginwind's picture

Upload folder using huggingface_hub

ccfcdff verified about 1 month ago

history blame contribute delete

3.25 kB

	{
	"run_id": "LIBERO",
	"run_root_dir": "checkpoints",
	"seed": 42,
	"trackers": [
	"json"
	],
	"is_debug": false,
	"framework": {
	"name": "VLA_JEPA",
	"qwenvl": {
	"base_vlm": "/home/dataset-local/models/Qwen3-VL-2B-Instruct",
	"attn_implementation": "flash_attention_2",
	"vl_hidden_dim": 2048
	},
	"action_model": {
	"action_model_type": "DiT-B",
	"action_hidden_dim": 1024,
	"hidden_size": 1024,
	"add_pos_embed": true,
	"max_seq_len": 1024,
	"action_dim": 7,
	"state_dim": 8,
	"future_action_window_size": 6,
	"action_horizon": 7,
	"past_action_window_size": 0,
	"repeated_diffusion_steps": 8,
	"noise_beta_alpha": 1.5,
	"noise_beta_beta": 1.0,
	"noise_s": 0.999,
	"num_timestep_buckets": 1000,
	"num_inference_timesteps": 4,
	"num_target_vision_tokens": 32,
	"diffusion_model_cfg": {
	"cross_attention_dim": 2048,
	"dropout": 0.2,
	"final_dropout": true,
	"interleave_self_attention": true,
	"norm_type": "ada_norm",
	"num_layers": 16,
	"output_dim": 1024,
	"positional_embeddings": null
	}
	},
	"vj2_model": {
	"base_encoder": "/home/dataset-local/models/vjepa2-vitl-fpc64-256",
	"depth": 12,
	"num_heads": 8,
	"special_action_token": "<\|action_{}\|>",
	"num_action_tokens_per_timestep": 8,
	"embodied_action_token": "<\|embodied_action\|>",
	"num_embodied_action_tokens_per_instruction": 32,
	"num_frames": 8
	},
	"reduce_in_full_precision": true
	},
	"datasets": {
	"vla_data": {
	"dataset_py": "lerobot_datasets",
	"data_root_dir": "/home/dataset-local/datasets/LeRobot/LEROBOT_LIBERO_DATA",
	"data_mix": "libero_all",
	"action_type": "delta_qpos",
	"CoT_prompt": "Your task is {instruction}. Infer the temporal dynamics from frames {actions} and produce the corresponding policy actions {e_actions}.",
	"resolution_size": 224,
	"per_device_batch_size": 32,
	"video_resolution_size": 256,
	"load_all_data_for_training": true,
	"with_state": true
	}
	},
	"trainer": {
	"epochs": 100,
	"max_train_steps": 30000,
	"num_warmup_steps": 5000,
	"save_interval": 10000,
	"eval_interval": 100,
	"learning_rate": {
	"base": 3e-05,
	"qwen_vl_interface": 1e-05,
	"action_model": 0.0001
	},
	"lr_scheduler_type": "cosine_with_min_lr",
	"scheduler_specific_kwargs": {
	"min_lr": 1e-06
	},
	"freeze_modules": "",
	"loss_scale": {
	"vla": 1.0,
	"vlm": 0.1
	},
	"max_grad_norm": 1.0,
	"warmup_ratio": 0.1,
	"weight_decay": 0.0,
	"logging_frequency": 10,
	"gradient_clipping": 1.0,
	"gradient_accumulation_steps": 1,
	"pretrained_checkpoint": "/home/dataset-local/VLA_JEPA/checkpoints/pretrain/VLA-JEPA-pretrain.pt",
	"optimizer": {
	"name": "AdamW",
	"betas": [
	0.9,
	0.95
	],
	"eps": 1e-08,
	"weight_decay": 1e-08
	},
	"is_resume": false,
	"resume_epoch": null,
	"resume_step": null,
	"enable_gradient_checkpointing": true,
	"enable_mixed_precision_training": true
	},
	"output_dir": "checkpoints/LIBERO"
	}