{ "dataset": { "type": "cubev2", "repo_id": "multidata_from_file", "repo_id_file": "outputs/cubev2/_repo_id_files/cubev2-multidata-delta-pretrain-2026_04_07_07_42_16.txt", "root": null, "episodes": null, "image_transforms": { "enable": false, "max_num_transforms": 3, "random_order": false, "tfs": { "brightness": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "brightness": [ 0.8, 1.2 ] } }, "contrast": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "contrast": [ 0.8, 1.2 ] } }, "saturation": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "saturation": [ 0.5, 1.5 ] } }, "hue": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "hue": [ -0.05, 0.05 ] } }, "sharpness": { "weight": 1.0, "type": "SharpnessJitter", "kwargs": { "sharpness": [ 0.5, 1.5 ] } }, "affine": { "weight": 1.0, "type": "RandomAffine", "kwargs": { "degrees": [ -5.0, 5.0 ], "translate": [ 0.05, 0.05 ] } } } }, "revision": null, "use_imagenet_stats": true, "use_external_stats": true, "external_stats_path": null, "external_stats_root": "norm_stats", "weight_rules_path": "configs/weight_rules_cubev2_multi.yaml", "video_backend": "pyav", "streaming": false, "dist_loading": true, "buffer_size": 1024, "action_mode": "delta", "repack_transforms": { "inputs": [], "outputs": [] }, "data_transforms": { "inputs": [ { "type": "delta_action", "mask": null, "mapping": {} }, { "type": "resize_with_pad", "height": 224, "width": 224, "mode": "bilinear" }, { "type": "remap_image_key", "mapping": {} }, { "type": "normalize", "selected_keys": null, "mode": "mean_std", "norm_stats": {} }, { "type": "compose_fields", "mapping": {} }, { "type": "pad_state_and_action", "max_state_dim": 32, "max_action_dim": 32 }, { "type": "cubev2_processor", "pretrained_model_name_or_path": "Qwen/Qwen3-VL-2B-Instruct", "max_length": 48, "task_key": "task", "padding_side": "right", "padding": "max_length", "truncation": true, "spatial_merge_size": 2, "vision_start_token_id": 151652, "vision_end_token_id": 151653, "image_token_id": 151655, "process": null }, { "type": "unify_cubev2_inputs" } ], "outputs": [] }, "model_transforms": { "inputs": [], "outputs": [] }, "height": 224, "width": 224, "max_state_dim": 32, "max_action_dim": 32, "qwen3_vl_processor_path": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/Qwen3-VL-2B-Instruct" }, "env": null, "policy": { "type": "cubev2", "n_obs_steps": 1, "input_features": { "observation.state": { "type": "STATE", "shape": [ 32 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 32 ] } }, "device": "cuda:0", "use_amp": false, "push_to_hub": false, "repo_id": "lerobot_lab/cubev2", "private": null, "tags": null, "license": null, "pretrained_path": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/InternVLA-A1-3B", "qwen3_vl_variant": "qwen3_vl_28l", "action_expert_variant": "qwen3_28l", "qwen3_vl_pretrained_path": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/Qwen3-VL-2B-Instruct", "dtype": "bfloat16", "chunk_size": 50, "n_action_steps": 50, "max_state_dim": 32, "max_action_dim": 32, "num_inference_steps": 10, "time_sampling_beta_alpha": 1.5, "time_sampling_beta_beta": 1.0, "time_sampling_scale": 0.999, "time_sampling_offset": 0.001, "min_period": 0.004, "max_period": 4.0, "image_resolution": [ 224, 224 ], "empty_cameras": 0, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "IDENTITY", "ACTION": "IDENTITY" }, "gradient_checkpointing": false, "compile_model": false, "compile_mode": "max-autotune", "optimizer_lr": 5e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 0.01, "optimizer_grad_clip_norm": 1.0, "scheduler_warmup_steps": 2000, "scheduler_decay_steps": 300000, "scheduler_decay_lr": 1e-05, "tokenizer_max_length": 48, "freeze_vision_encoder": false, "train_expert_only": false, "train_vlm_only": false, "scale_factor": 8, "lambda_gen": 0.01, "cosmos_tokenizer_path_or_name": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/Cosmos-Tokenizer-CI8x8", "enable_3d_queries": true, "num_3d_query_tokens": 432, "da3_alignment_mode": "query_decoder", "da3_query_resampler_layers": 1, "da3_query_resampler_ff_mult": 1, "query_layer_indices": [ 13, 19, 23, 27 ], "da3_variant": "auto", "da3_teacher_layers": [ 11, 15, 19, 23 ], "da3_query_dim": 2048, "da3_tokens_per_view": 1296, "da3_num_views": 3, "lambda_3d": 0.01, "da3_model_path_or_name": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/model/DA3-LARGE-1-1", "da3_model_name": null, "da3_code_root": null, "da3_teacher_process_res": 504, "da3_layer_weights": [ 1.0, 1.2, 1.4, 1.6 ], "future_query_init_std": 0.02, "log_da3_teacher_timing": true }, "output_dir": "outputs/cubev2/cubev2-multidata-delta-pretrain-2026_04_07_07_42_16", "job_name": "cubev2-multidata-delta-pretrain-2026_04_07_07_42_16", "resume": false, "seed": 42, "num_workers": 12, "batch_size": 12, "steps": 300000, "eval_freq": 20000, "log_freq": 25, "save_checkpoint": true, "save_freq": 10000, "use_policy_training_preset": true, "optimizer": { "type": "adamw", "lr": 5e-05, "weight_decay": 0.01, "grad_clip_norm": 1.0, "betas": [ 0.9, 0.95 ], "eps": 1e-08 }, "scheduler": { "type": "cosine_decay_with_warmup", "num_warmup_steps": 2000, "num_decay_steps": 300000, "peak_lr": 5e-05, "decay_lr": 1e-05 }, "eval": { "n_episodes": 50, "batch_size": 50, "use_async_envs": false }, "wandb": { "enable": true, "disable_artifact": false, "project": "CUBEv2", "entity": null, "notes": null, "run_id": "3dhym6sa", "mode": "offline" }, "checkpoint_path": null, "rename_map": {} }