# PI0 训练配置 - 使用 unified 37D action 表示
# action/state 投影层（原 openpi 硬编码 32D）会在 PI0Framework 初始化时自动替换为 37D，
# checkpoint 中对应的 32D 参数加载时自动跳过，其余 backbone 参数正常复用。

run_id: pi0_unified_37d
run_root_dir: ./runs
seed: 42
trackers: [jsonl, wandb]
wandb_entity: timsty
wandb_project: vla_jepa
is_debug: false

framework:
  name: PI0
  # PI0 模型配置
  # action_dim 以本项目为准（统一 37D unified action 表示）。
  # PI0Pytorch 源码中 action_in_proj / action_out_proj / state_proj 硬编码为 32D，
  # PI0Framework.__init__ 会调用 _replace_pi0_projection_layers 将其替换为 37D，
  # 加载 checkpoint 时这些层因 shape 不匹配会自动跳过（保持随机初始化）。
  # 其余 VLM backbone 层（PaliGemma、action expert transformer 等）仍正常从 checkpoint 加载。
  pi0:
    paligemma_variant: "gemma_2b"
    action_expert_variant: "gemma_300m"
    pi05: false
    action_dim: 37          # 项目统一维度；投影层会被自动替换，checkpoint 同维度参数跳过加载
    state_dim: 74           # unified state 维度；state_proj 替换为 Linear(74, width)，与 action_dim 独立
    action_horizon: 15      # 与 chunk_size 对齐
    dtype: "bfloat16"

  # 预训练权重路径（pi05_libero 等，action_dim 不匹配时会 strict=False 部分加载）
  pi0_checkpoint: /mnt/data/fangyu/model/openpi/openpi-assets/checkpoints/pi0_base_torch/model.pt

  # PaliGemma tokenizer
  tokenizer_path: /root/.cache/openpi/big_vision/paligemma_tokenizer.model

  # 图像键名，与 openpi 三视角格式对应；gr1 单视角时配合 replicate_single_view
  image_keys:
    - "base_0_rgb"
    - "left_wrist_0_rgb"
    - "right_wrist_0_rgb"

  # 当 dataset 仅提供 1 张图时复制到 3 视角（如 fourier_gr1 video.ego_view）
  replicate_single_view: true

  use_state: true

  # 若 true，根据实际图像数量动态使用 image_keys 的前 N 个；否则固定全部 keys，不足补零
  dynamic_image_keys: false

  num_inference_steps: 10

  # 输出截断维度，null 表示输出完整 action_dim
  effective_action_dim: null

datasets:
  vla_data:
    dataset_py: lerobot_datasets
    data_root_dir: /mnt/data/fangyu/dataset/IPEC-COMMUNITY
    data_mix: cross_embodiedment_simulator
    default_image_resolution: [3, 224, 224]
    per_device_batch_size: 32
    load_all_data_for_training: true
    obs: ["image_0"]
    image_size: [224, 224]
    video_backend: torchcodec
    load_video: true
    chunk_size: 15
    state_use_action_chunk: false
    num_history_steps: 0
    include_state: false   # 训练 PI0 时不使用 state

trainer:
  epochs: 100
  max_train_steps: 20000
  num_warmup_steps: 5000
  num_stable_steps: 0
  save_interval: 5000
  max_checkpoints_to_keep: 20

  learning_rate:
    base: 2.5e-5
    pi0_model: 2.5e-5

  lr_scheduler_type: warmup_stable_cosine
  scheduler_specific_kwargs:
    min_lr_ratio: 0.001

  freeze_modules: ""
  warmup_ratio: 0.1
  weight_decay: 0.0
  logging_frequency: 10
  gradient_clipping: 5.0
  gradient_accumulation_steps: 1

  optimizer:
    name: AdamW
    betas: [0.9, 0.95]
    eps: 1.0e-08
    weight_decay: 1.0e-08

  is_resume: false
  pretrained_checkpoint: null
  enable_gradient_checkpointing: false
  enable_mixed_precision_training: true