{
    "dataset": {
        "select_dataset": "HumanoidStation_in_jushen",
        "repo_ids": null,
        "roots": null,
        "repo_id": null,
        "root": null,
        "episodes": null,
        "image_transforms": {
            "enable": false,
            "max_num_transforms": 3,
            "random_order": false,
            "tfs": {
                "brightness": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "brightness": [
                            0.8,
                            1.2
                        ]
                    }
                },
                "contrast": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "contrast": [
                            0.8,
                            1.2
                        ]
                    }
                },
                "saturation": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "saturation": [
                            0.5,
                            1.5
                        ]
                    }
                },
                "hue": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "hue": [
                            -0.05,
                            0.05
                        ]
                    }
                },
                "sharpness": {
                    "weight": 1.0,
                    "type": "SharpnessJitter",
                    "kwargs": {
                        "sharpness": [
                            0.5,
                            1.5
                        ]
                    }
                }
            }
        },
        "local_files_only": false,
        "use_imagenet_stats": true,
        "video_backend": "pyav"
    },
    "env": null,
    "policy": {
        "type": "crossvla_stage2_plus",
        "n_obs_steps": 2,
        "action_sample_factor": 1,
        "normalization_mapping": {
            "VISUAL": "IDENTITY",
            "STATE": "MEAN_STD",
            "ACTION": "MEAN_STD"
        },
        "input_features": {
            "observation.state": {
                "type": "STATE",
                "shape": [
                    32
                ]
            },
            "observation.images.image_0": {
                "type": "VISUAL",
                "shape": [
                    224,
                    3,
                    224
                ]
            },
            "observation.images.image_1": {
                "type": "VISUAL",
                "shape": [
                    224,
                    3,
                    224
                ]
            },
            "observation.images.image_2": {
                "type": "VISUAL",
                "shape": [
                    224,
                    3,
                    224
                ]
            },
            "observation.images.image_3": {
                "type": "VISUAL",
                "shape": [
                    224,
                    3,
                    224
                ]
            },
            "observation.images.image_wrist_0": {
                "type": "VISUAL",
                "shape": [
                    224,
                    3,
                    224
                ]
            },
            "observation.images.image_wrist_1": {
                "type": "VISUAL",
                "shape": [
                    224,
                    3,
                    224
                ]
            },
            "observation.state_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            },
            "observation.images.image_0_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            },
            "observation.images.image_1_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            },
            "observation.images.image_2_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            },
            "observation.images.image_3_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            },
            "observation.images.image_wrist_0_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            },
            "observation.images.image_wrist_1_is_pad": {
                "type": "STATE",
                "shape": [
                    2
                ]
            }
        },
        "output_features": {
            "action": {
                "type": "ACTION",
                "shape": [
                    32
                ]
            },
            "action_is_pad": {
                "type": "ACTION",
                "shape": [
                    50
                ]
            }
        },
        "stage1_pretrained_path": "/media/jushen/bamboo-fan/Save/crossvla/stage1_4dataset_10_8_bs12_kl_cb256_0528_stat/checkpoints/275000/pretrained_model/",
        "stage2_pretrained_path": "None",
        "stage2_latent_image_token_check": false,
        "dataset_stats_generate": true,
        "heterogeneous": true,
        "split_dataset": true,
        "real_robot_dev": false,
        "image_interval_steps": 50,
        "action_latent_token_num": 13,
        "mformer_hidden_size": 768,
        "decoder_hidden_size": 768,
        "codebook_embed_dim": 256,
        "codebook_k_size": 256,
        "action_chunk_size": 50,
        "chunk_size": 50,
        "n_action_steps": 50,
        "resampler": true,
        "resampler_dim": 2048,
        "resampler_depth": 3,
        "resampler_dim_head": 128,
        "resampler_heads": 4,
        "resampler_num_media_embeds": 1,
        "resampler_num_latents": 9,
        "max_state_dim": 32,
        "max_action_dim": 32,
        "resize_imgs_with_padding": [
            224,
            224
        ],
        "empty_cameras": 0,
        "adapt_to_pi_aloha": false,
        "use_delta_joint_actions_aloha": false,
        "tokenizer_max_length": 48,
        "proj_width": 1024,
        "num_steps": 10,
        "use_cache": true,
        "attention_implementation": "eager",
        "freeze_vision_encoder": false,
        "freeze_language_encoder": true,
        "train_expert_only": false,
        "train_state_proj": true,
        "optimizer_lr": 0.0001,
        "optimizer_betas": [
            0.9,
            0.95
        ],
        "optimizer_eps": 1e-08,
        "optimizer_weight_decay": 1e-10,
        "scheduler_warmup_steps": 5000,
        "scheduler_decay_steps": 300000,
        "scheduler_decay_lr": 1e-06
    },
    "output_dir": "/media/jushen/bamboo-fan/Save/crossvla/dev/stage2_dev_pretrain_10_8_bs8_stage1_275k",
    "job_name": "stage2_dev_pretrain_10_8_bs8_stage1_275k",
    "resume": true,
    "device": "cuda",
    "use_amp": false,
    "seed": 1000,
    "num_workers": 4,
    "batch_size": 8,
    "gradient_accumulation_steps": 1,
    "steps": 300000,
    "eval_freq": 20000,
    "log_freq": 200,
    "save_checkpoint": true,
    "save_freq": 10000,
    "use_policy_training_preset": true,
    "optimizer": {
        "type": "adamw",
        "lr": 0.0001,
        "weight_decay": 1e-10,
        "grad_clip_norm": 10.0,
        "betas": [
            0.9,
            0.95
        ],
        "eps": 1e-08
    },
    "scheduler": {
        "type": "cosine_decay_with_warmup",
        "num_warmup_steps": 5000,
        "num_decay_steps": 300000,
        "peak_lr": 0.0001,
        "decay_lr": 1e-06
    },
    "eval": {
        "n_episodes": 50,
        "batch_size": 50,
        "use_async_envs": false
    },
    "wandb": {
        "enable": true,
        "disable_artifact": true,
        "project": "crossvla_dev",
        "entity": null,
        "notes": null
    }
}