cross13tasks / code /config /training /starvla_train_pi0.yaml
Timsty's picture
Upload folder using huggingface_hub
e94400c verified
# PI0 训练配置 - 使用 unified 37D action 表示
# action/state 投影层(原 openpi 硬编码 32D)会在 PI0Framework 初始化时自动替换为 37D,
# checkpoint 中对应的 32D 参数加载时自动跳过,其余 backbone 参数正常复用。
run_id: pi0_unified_37d
run_root_dir: ./runs
seed: 42
trackers: [jsonl, wandb]
wandb_entity: timsty
wandb_project: vla_jepa
is_debug: false
framework:
name: PI0
# PI0 模型配置
# action_dim 以本项目为准(统一 37D unified action 表示)。
# PI0Pytorch 源码中 action_in_proj / action_out_proj / state_proj 硬编码为 32D,
# PI0Framework.__init__ 会调用 _replace_pi0_projection_layers 将其替换为 37D,
# 加载 checkpoint 时这些层因 shape 不匹配会自动跳过(保持随机初始化)。
# 其余 VLM backbone 层(PaliGemma、action expert transformer 等)仍正常从 checkpoint 加载。
pi0:
paligemma_variant: "gemma_2b"
action_expert_variant: "gemma_300m"
pi05: false
action_dim: 37 # 项目统一维度;投影层会被自动替换,checkpoint 同维度参数跳过加载
state_dim: 74 # unified state 维度;state_proj 替换为 Linear(74, width),与 action_dim 独立
action_horizon: 15 # 与 chunk_size 对齐
dtype: "bfloat16"
# 预训练权重路径(pi05_libero 等,action_dim 不匹配时会 strict=False 部分加载)
pi0_checkpoint: /mnt/data/fangyu/model/openpi/openpi-assets/checkpoints/pi0_base_torch/model.pt
# PaliGemma tokenizer
tokenizer_path: /root/.cache/openpi/big_vision/paligemma_tokenizer.model
# 图像键名,与 openpi 三视角格式对应;gr1 单视角时配合 replicate_single_view
image_keys:
- "base_0_rgb"
- "left_wrist_0_rgb"
- "right_wrist_0_rgb"
# 当 dataset 仅提供 1 张图时复制到 3 视角(如 fourier_gr1 video.ego_view)
replicate_single_view: true
use_state: true
# 若 true,根据实际图像数量动态使用 image_keys 的前 N 个;否则固定全部 keys,不足补零
dynamic_image_keys: false
num_inference_steps: 10
# 输出截断维度,null 表示输出完整 action_dim
effective_action_dim: null
datasets:
vla_data:
dataset_py: lerobot_datasets
data_root_dir: /mnt/data/fangyu/dataset/IPEC-COMMUNITY
data_mix: cross_embodiedment_simulator
default_image_resolution: [3, 224, 224]
per_device_batch_size: 32
load_all_data_for_training: true
obs: ["image_0"]
image_size: [224, 224]
video_backend: torchcodec
load_video: true
chunk_size: 15
state_use_action_chunk: false
num_history_steps: 0
include_state: false # 训练 PI0 时不使用 state
trainer:
epochs: 100
max_train_steps: 20000
num_warmup_steps: 5000
num_stable_steps: 0
save_interval: 5000
max_checkpoints_to_keep: 20
learning_rate:
base: 2.5e-5
pi0_model: 2.5e-5
lr_scheduler_type: warmup_stable_cosine
scheduler_specific_kwargs:
min_lr_ratio: 0.001
freeze_modules: ""
warmup_ratio: 0.1
weight_decay: 0.0
logging_frequency: 10
gradient_clipping: 5.0
gradient_accumulation_steps: 1
optimizer:
name: AdamW
betas: [0.9, 0.95]
eps: 1.0e-08
weight_decay: 1.0e-08
is_resume: false
pretrained_checkpoint: null
enable_gradient_checkpointing: false
enable_mixed_precision_training: true