chore: publish rSkill OpenRAL/rskill-act-aloha v0.1.0

d4442f6 verified 7 days ago

4.77 kB

	# rSkill manifest — OpenRAL packaging format V1 (CLAUDE.md §6.4)
	# Wraps: lerobot/act_aloha_sim_transfer_cube_human (MIT)
	# Paper: Zhao et al., 2023 — Action Chunking Transformer.
	#
	# LEGACY PROCESSOR PATH: this checkpoint pre-dates lerobot's
	# PolicyProcessorPipeline migration and ships its norm stats inside
	# model.safetensors. The schema's processors block is therefore omitted;
	# the ACT adapter dispatches on manifest.processors is None and falls
	# back to the snapshot_download + _try_load_act_norm_stats path. Migrating
	# to per-file URIs would require re-publishing the upstream checkpoint
	# and is tracked as a follow-up.

	# ── Identity ───────────────────────────────────────────────────────────────
	schema_version: "0.1"
	name: "OpenRAL/rskill-act-aloha"
	version: "0.1.0"
	license: "mit"
	role: "s1"
	kind: "vla" # ADR-00XX: rSkill kind discriminator. "vla" = learnable Vision-Language-Action policy.

	# ── Policy identity ────────────────────────────────────────────────────────
	model_family: "act"

	# ── Compatibility contract ─────────────────────────────────────────────────
	# Bimanual ALOHA (2 × 7-DoF arms = 14-DoF action space). Used by
	# tests/sim/test_aloha_bimanual_act_aloha.py (gym-aloha MuJoCo).
	embodiment_tags:
	- "aloha"

	# ACT for ALOHA cube-transfer ships with a single top-down 480×640 RGB stream.
	sensors_required:
	- modality: "rgb"
	vla_feature_key: "observation.images.top"
	min_width: 640
	min_height: 480

	# Output side (ADR-0013). For the canonical aloha bimanual embodiment the
	# loader auto-fills n_dof (14) + vla_action_key from
	# robots/aloha_bimanual/robot.yaml.
	actuators_required:
	- kind: "joint_position"
	control_mode_semantics:
	mode: "absolute"

	# ── Runtime / weights ──────────────────────────────────────────────────────
	runtime: "pytorch"
	quantization:
	dtype: "fp32"
	backend: "pytorch"
	weights_uri: "hf://lerobot/act_aloha_sim_transfer_cube_human"

	# ── Preprocessing (all knobs needed to interpret IO) ───────────────────────
	# processors omitted — legacy path; norm stats live inside model.safetensors.
	# ACT manages its own preprocessing / state contract inside the lerobot
	# ACTPolicy so nothing else needs to move.

	# ── Execution semantics ────────────────────────────────────────────────────
	chunk_size: 100
	# n_action_steps omitted — ACT default is 1 (per-step re-inference +
	# temporal ensembling, paper-faithful).
	latency_budget:
	# Reference-host measurement (RTX 4070 Laptop, CUDA 12.8, PyTorch 2.10)
	# of the warm full-chunk inference is 16 ms; bf16 autocast is ~12 ms.
	# We pin per_chunk_ms to 25 ms to keep the canonical
	# "tolerance_pct=100 → 2× ceiling" pattern (giving a 50 ms test ceiling,
	# matching the previous _WARM_CHUNK_CEILING_S = 0.050).
	per_chunk_ms: 25.0

	# ── Provenance ─────────────────────────────────────────────────────────────
	# Headline success rate from skills/act-aloha/eval/aloha_transfer_cube.json
	# (50 episodes via `openral benchmark run`).
	benchmarks:
	aloha_transfer_cube: 0.82

	paper_url: "https://arxiv.org/abs/2304.13705"
	source_repo: "hf://lerobot/act_aloha_sim_transfer_cube_human"

	description: >
	Action Chunking Transformer (~52M-param encoder-decoder) finetuned on
	the ALOHA bimanual cube-transfer demonstration set. Action chunks of
	length 100. The published checkpoint predates lerobot's
	PolicyProcessorPipeline migration and ships without normalisation
	buffers — see tests/sim/test_aloha_bimanual_act_aloha.py for the
	resulting numerical-contract caveats.

	# ADR-0022 — action vocabulary surfaced to the reasoner LLM tool
	# palette so it can pick this skill by what it does (action verb +
	# object + scene), not just by its slug.
	actions:
	- "transfer"
	- "pick"
	- "place"
	objects:
	- "cube"
	scenes:
	- "tabletop"

	# ADR-0019 — per-checkpoint action contract (consumed by the dataset bridge
	# to bind the LeRobot v3 `action` feature shape).
	action_contract:
	dim: 14

	# ADR-0019 — per-checkpoint state contract.
	state_contract:
	dim: 14