MagicBot-VGA-Base / train_config.json
zaleni's picture
Upload folder using huggingface_hub
6a0b8f8 verified
Raw
History Blame Contribute Delete
9.63 kB
{
"dataset": {
"type": "cubev2",
"repo_id": "multidata_from_file",
"repo_id_file": "outputs/cubev2/_repo_id_files/cubev2-multidata-delta-pretrain-2026_04_07_07_42_16.txt",
"root": null,
"episodes": null,
"image_transforms": {
"enable": false,
"max_num_transforms": 3,
"random_order": false,
"tfs": {
"brightness": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"brightness": [
0.8,
1.2
]
}
},
"contrast": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"contrast": [
0.8,
1.2
]
}
},
"saturation": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"saturation": [
0.5,
1.5
]
}
},
"hue": {
"weight": 1.0,
"type": "ColorJitter",
"kwargs": {
"hue": [
-0.05,
0.05
]
}
},
"sharpness": {
"weight": 1.0,
"type": "SharpnessJitter",
"kwargs": {
"sharpness": [
0.5,
1.5
]
}
},
"affine": {
"weight": 1.0,
"type": "RandomAffine",
"kwargs": {
"degrees": [
-5.0,
5.0
],
"translate": [
0.05,
0.05
]
}
}
}
},
"revision": null,
"use_imagenet_stats": true,
"use_external_stats": true,
"external_stats_path": null,
"external_stats_root": "norm_stats",
"weight_rules_path": "configs/weight_rules_cubev2_multi.yaml",
"video_backend": "pyav",
"streaming": false,
"dist_loading": true,
"buffer_size": 1024,
"action_mode": "delta",
"repack_transforms": {
"inputs": [],
"outputs": []
},
"data_transforms": {
"inputs": [
{
"type": "delta_action",
"mask": null,
"mapping": {}
},
{
"type": "resize_with_pad",
"height": 224,
"width": 224,
"mode": "bilinear"
},
{
"type": "remap_image_key",
"mapping": {}
},
{
"type": "normalize",
"selected_keys": null,
"mode": "mean_std",
"norm_stats": {}
},
{
"type": "compose_fields",
"mapping": {}
},
{
"type": "pad_state_and_action",
"max_state_dim": 32,
"max_action_dim": 32
},
{
"type": "cubev2_processor",
"pretrained_model_name_or_path": "Qwen/Qwen3-VL-2B-Instruct",
"max_length": 48,
"task_key": "task",
"padding_side": "right",
"padding": "max_length",
"truncation": true,
"spatial_merge_size": 2,
"vision_start_token_id": 151652,
"vision_end_token_id": 151653,
"image_token_id": 151655,
"process": null
},
{
"type": "unify_cubev2_inputs"
}
],
"outputs": []
},
"model_transforms": {
"inputs": [],
"outputs": []
},
"height": 224,
"width": 224,
"max_state_dim": 32,
"max_action_dim": 32,
"qwen3_vl_processor_path": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/Qwen3-VL-2B-Instruct"
},
"env": null,
"policy": {
"type": "cubev2",
"n_obs_steps": 1,
"input_features": {
"observation.state": {
"type": "STATE",
"shape": [
32
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
32
]
}
},
"device": "cuda:0",
"use_amp": false,
"push_to_hub": false,
"repo_id": "lerobot_lab/cubev2",
"private": null,
"tags": null,
"license": null,
"pretrained_path": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/InternVLA-A1-3B",
"qwen3_vl_variant": "qwen3_vl_28l",
"action_expert_variant": "qwen3_28l",
"qwen3_vl_pretrained_path": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/Qwen3-VL-2B-Instruct",
"dtype": "bfloat16",
"chunk_size": 50,
"n_action_steps": 50,
"max_state_dim": 32,
"max_action_dim": 32,
"num_inference_steps": 10,
"time_sampling_beta_alpha": 1.5,
"time_sampling_beta_beta": 1.0,
"time_sampling_scale": 0.999,
"time_sampling_offset": 0.001,
"min_period": 0.004,
"max_period": 4.0,
"image_resolution": [
224,
224
],
"empty_cameras": 0,
"normalization_mapping": {
"VISUAL": "IDENTITY",
"STATE": "IDENTITY",
"ACTION": "IDENTITY"
},
"gradient_checkpointing": false,
"compile_model": false,
"compile_mode": "max-autotune",
"optimizer_lr": 5e-05,
"optimizer_betas": [
0.9,
0.95
],
"optimizer_eps": 1e-08,
"optimizer_weight_decay": 0.01,
"optimizer_grad_clip_norm": 1.0,
"scheduler_warmup_steps": 2000,
"scheduler_decay_steps": 300000,
"scheduler_decay_lr": 1e-05,
"tokenizer_max_length": 48,
"freeze_vision_encoder": false,
"train_expert_only": false,
"train_vlm_only": false,
"scale_factor": 8,
"lambda_gen": 0.01,
"cosmos_tokenizer_path_or_name": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/Cosmos-Tokenizer-CI8x8",
"enable_3d_queries": true,
"num_3d_query_tokens": 432,
"da3_alignment_mode": "query_decoder",
"da3_query_resampler_layers": 1,
"da3_query_resampler_ff_mult": 1,
"query_layer_indices": [
13,
19,
23,
27
],
"da3_variant": "auto",
"da3_teacher_layers": [
11,
15,
19,
23
],
"da3_query_dim": 2048,
"da3_tokens_per_view": 1296,
"da3_num_views": 3,
"lambda_3d": 0.01,
"da3_model_path_or_name": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/DA3-LARGE-1-1",
"da3_model_name": null,
"da3_code_root": null,
"da3_teacher_process_res": 504,
"da3_layer_weights": [
1.0,
1.2,
1.4,
1.6
],
"future_query_init_std": 0.02,
"log_da3_teacher_timing": true
},
"output_dir": "outputs/cubev2/cubev2-multidata-delta-pretrain-2026_04_07_07_42_16",
"job_name": "cubev2-multidata-delta-pretrain-2026_04_07_07_42_16",
"resume": false,
"seed": 42,
"num_workers": 12,
"batch_size": 12,
"steps": 300000,
"eval_freq": 20000,
"log_freq": 25,
"save_checkpoint": true,
"save_freq": 10000,
"use_policy_training_preset": true,
"optimizer": {
"type": "adamw",
"lr": 5e-05,
"weight_decay": 0.01,
"grad_clip_norm": 1.0,
"betas": [
0.9,
0.95
],
"eps": 1e-08
},
"scheduler": {
"type": "cosine_decay_with_warmup",
"num_warmup_steps": 2000,
"num_decay_steps": 300000,
"peak_lr": 5e-05,
"decay_lr": 1e-05
},
"eval": {
"n_episodes": 50,
"batch_size": 50,
"use_async_envs": false
},
"wandb": {
"enable": true,
"disable_artifact": false,
"project": "CUBEv2",
"entity": null,
"notes": null,
"run_id": "3dhym6sa",
"mode": "offline"
},
"checkpoint_path": null,
"rename_map": {}
}