cross13tasks / code /config /training /starvla_train_pi0.yaml

Upload folder using huggingface_hub

e94400c verified 19 days ago

3.38 kB

	# PI0 训练配置 - 使用 unified 37D action 表示
	# action/state 投影层（原 openpi 硬编码 32D）会在 PI0Framework 初始化时自动替换为 37D，
	# checkpoint 中对应的 32D 参数加载时自动跳过，其余 backbone 参数正常复用。

	run_id: pi0_unified_37d
	run_root_dir: ./runs
	seed: 42
	trackers: [jsonl, wandb]
	wandb_entity: timsty
	wandb_project: vla_jepa
	is_debug: false

	framework:
	name: PI0
	# PI0 模型配置
	# action_dim 以本项目为准（统一 37D unified action 表示）。
	# PI0Pytorch 源码中 action_in_proj / action_out_proj / state_proj 硬编码为 32D，
	# PI0Framework.__init__ 会调用 _replace_pi0_projection_layers 将其替换为 37D，
	# 加载 checkpoint 时这些层因 shape 不匹配会自动跳过（保持随机初始化）。
	# 其余 VLM backbone 层（PaliGemma、action expert transformer 等）仍正常从 checkpoint 加载。
	pi0:
	paligemma_variant: "gemma_2b"
	action_expert_variant: "gemma_300m"
	pi05: false
	action_dim: 37 # 项目统一维度；投影层会被自动替换，checkpoint 同维度参数跳过加载
	state_dim: 74 # unified state 维度；state_proj 替换为 Linear(74, width)，与 action_dim 独立
	action_horizon: 15 # 与 chunk_size 对齐
	dtype: "bfloat16"

	# 预训练权重路径（pi05_libero 等，action_dim 不匹配时会 strict=False 部分加载）
	pi0_checkpoint: /mnt/data/fangyu/model/openpi/openpi-assets/checkpoints/pi0_base_torch/model.pt

	# PaliGemma tokenizer
	tokenizer_path: /root/.cache/openpi/big_vision/paligemma_tokenizer.model

	# 图像键名，与 openpi 三视角格式对应；gr1 单视角时配合 replicate_single_view
	image_keys:
	- "base_0_rgb"
	- "left_wrist_0_rgb"
	- "right_wrist_0_rgb"

	# 当 dataset 仅提供 1 张图时复制到 3 视角（如 fourier_gr1 video.ego_view）
	replicate_single_view: true

	use_state: true

	# 若 true，根据实际图像数量动态使用 image_keys 的前 N 个；否则固定全部 keys，不足补零
	dynamic_image_keys: false

	num_inference_steps: 10

	# 输出截断维度，null 表示输出完整 action_dim
	effective_action_dim: null

	datasets:
	vla_data:
	dataset_py: lerobot_datasets
	data_root_dir: /mnt/data/fangyu/dataset/IPEC-COMMUNITY
	data_mix: cross_embodiedment_simulator
	default_image_resolution: [3, 224, 224]
	per_device_batch_size: 32
	load_all_data_for_training: true
	obs: ["image_0"]
	image_size: [224, 224]
	video_backend: torchcodec
	load_video: true
	chunk_size: 15
	state_use_action_chunk: false
	num_history_steps: 0
	include_state: false # 训练 PI0 时不使用 state

	trainer:
	epochs: 100
	max_train_steps: 20000
	num_warmup_steps: 5000
	num_stable_steps: 0
	save_interval: 5000
	max_checkpoints_to_keep: 20

	learning_rate:
	base: 2.5e-5
	pi0_model: 2.5e-5

	lr_scheduler_type: warmup_stable_cosine
	scheduler_specific_kwargs:
	min_lr_ratio: 0.001

	freeze_modules: ""
	warmup_ratio: 0.1
	weight_decay: 0.0
	logging_frequency: 10
	gradient_clipping: 5.0
	gradient_accumulation_steps: 1

	optimizer:
	name: AdamW
	betas: [0.9, 0.95]
	eps: 1.0e-08
	weight_decay: 1.0e-08

	is_resume: false
	pretrained_checkpoint: null
	enable_gradient_checkpointing: false
	enable_mixed_precision_training: true