# PI0 训练配置 - 使用 unified 37D action 表示 # action/state 投影层(原 openpi 硬编码 32D)会在 PI0Framework 初始化时自动替换为 37D, # checkpoint 中对应的 32D 参数加载时自动跳过,其余 backbone 参数正常复用。 run_id: pi0_unified_37d run_root_dir: ./runs seed: 42 trackers: [jsonl, wandb] wandb_entity: timsty wandb_project: vla_jepa is_debug: false framework: name: PI0 # PI0 模型配置 # action_dim 以本项目为准(统一 37D unified action 表示)。 # PI0Pytorch 源码中 action_in_proj / action_out_proj / state_proj 硬编码为 32D, # PI0Framework.__init__ 会调用 _replace_pi0_projection_layers 将其替换为 37D, # 加载 checkpoint 时这些层因 shape 不匹配会自动跳过(保持随机初始化)。 # 其余 VLM backbone 层(PaliGemma、action expert transformer 等)仍正常从 checkpoint 加载。 pi0: paligemma_variant: "gemma_2b" action_expert_variant: "gemma_300m" pi05: false action_dim: 37 # 项目统一维度;投影层会被自动替换,checkpoint 同维度参数跳过加载 state_dim: 74 # unified state 维度;state_proj 替换为 Linear(74, width),与 action_dim 独立 action_horizon: 15 # 与 chunk_size 对齐 dtype: "bfloat16" # 预训练权重路径(pi05_libero 等,action_dim 不匹配时会 strict=False 部分加载) pi0_checkpoint: /mnt/data/fangyu/model/openpi/openpi-assets/checkpoints/pi0_base_torch/model.pt # PaliGemma tokenizer tokenizer_path: /root/.cache/openpi/big_vision/paligemma_tokenizer.model # 图像键名,与 openpi 三视角格式对应;gr1 单视角时配合 replicate_single_view image_keys: - "base_0_rgb" - "left_wrist_0_rgb" - "right_wrist_0_rgb" # 当 dataset 仅提供 1 张图时复制到 3 视角(如 fourier_gr1 video.ego_view) replicate_single_view: true use_state: true # 若 true,根据实际图像数量动态使用 image_keys 的前 N 个;否则固定全部 keys,不足补零 dynamic_image_keys: false num_inference_steps: 10 # 输出截断维度,null 表示输出完整 action_dim effective_action_dim: null datasets: vla_data: dataset_py: lerobot_datasets data_root_dir: /mnt/data/fangyu/dataset/IPEC-COMMUNITY data_mix: cross_embodiedment_simulator default_image_resolution: [3, 224, 224] per_device_batch_size: 32 load_all_data_for_training: true obs: ["image_0"] image_size: [224, 224] video_backend: torchcodec load_video: true chunk_size: 15 state_use_action_chunk: false num_history_steps: 0 include_state: false # 训练 PI0 时不使用 state trainer: epochs: 100 max_train_steps: 20000 num_warmup_steps: 5000 num_stable_steps: 0 save_interval: 5000 max_checkpoints_to_keep: 20 learning_rate: base: 2.5e-5 pi0_model: 2.5e-5 lr_scheduler_type: warmup_stable_cosine scheduler_specific_kwargs: min_lr_ratio: 0.001 freeze_modules: "" warmup_ratio: 0.1 weight_decay: 0.0 logging_frequency: 10 gradient_clipping: 5.0 gradient_accumulation_steps: 1 optimizer: name: AdamW betas: [0.9, 0.95] eps: 1.0e-08 weight_decay: 1.0e-08 is_resume: false pretrained_checkpoint: null enable_gradient_checkpointing: false enable_mixed_precision_training: true