rskill-act-aloha / rskill.yaml
AdrianLlopart's picture
chore: publish rSkill OpenRAL/rskill-act-aloha v0.1.0
d4442f6 verified
# rSkill manifest β€” OpenRAL packaging format V1 (CLAUDE.md Β§6.4)
# Wraps: lerobot/act_aloha_sim_transfer_cube_human (MIT)
# Paper: Zhao et al., 2023 β€” Action Chunking Transformer.
#
# LEGACY PROCESSOR PATH: this checkpoint pre-dates lerobot's
# PolicyProcessorPipeline migration and ships its norm stats inside
# model.safetensors. The schema's processors block is therefore omitted;
# the ACT adapter dispatches on manifest.processors is None and falls
# back to the snapshot_download + _try_load_act_norm_stats path. Migrating
# to per-file URIs would require re-publishing the upstream checkpoint
# and is tracked as a follow-up.
# ── Identity ───────────────────────────────────────────────────────────────
schema_version: "0.1"
name: "OpenRAL/rskill-act-aloha"
version: "0.1.0"
license: "mit"
role: "s1"
kind: "vla" # ADR-00XX: rSkill kind discriminator. "vla" = learnable Vision-Language-Action policy.
# ── Policy identity ────────────────────────────────────────────────────────
model_family: "act"
# ── Compatibility contract ─────────────────────────────────────────────────
# Bimanual ALOHA (2 Γ— 7-DoF arms = 14-DoF action space). Used by
# tests/sim/test_aloha_bimanual_act_aloha.py (gym-aloha MuJoCo).
embodiment_tags:
- "aloha"
# ACT for ALOHA cube-transfer ships with a single top-down 480Γ—640 RGB stream.
sensors_required:
- modality: "rgb"
vla_feature_key: "observation.images.top"
min_width: 640
min_height: 480
# Output side (ADR-0013). For the canonical aloha bimanual embodiment the
# loader auto-fills n_dof (14) + vla_action_key from
# robots/aloha_bimanual/robot.yaml.
actuators_required:
- kind: "joint_position"
control_mode_semantics:
mode: "absolute"
# ── Runtime / weights ──────────────────────────────────────────────────────
runtime: "pytorch"
quantization:
dtype: "fp32"
backend: "pytorch"
weights_uri: "hf://lerobot/act_aloha_sim_transfer_cube_human"
# ── Preprocessing (all knobs needed to interpret IO) ───────────────────────
# processors omitted β€” legacy path; norm stats live inside model.safetensors.
# ACT manages its own preprocessing / state contract inside the lerobot
# ACTPolicy so nothing else needs to move.
# ── Execution semantics ────────────────────────────────────────────────────
chunk_size: 100
# n_action_steps omitted β€” ACT default is 1 (per-step re-inference +
# temporal ensembling, paper-faithful).
latency_budget:
# Reference-host measurement (RTX 4070 Laptop, CUDA 12.8, PyTorch 2.10)
# of the warm full-chunk inference is 16 ms; bf16 autocast is ~12 ms.
# We pin per_chunk_ms to 25 ms to keep the canonical
# "tolerance_pct=100 β†’ 2Γ— ceiling" pattern (giving a 50 ms test ceiling,
# matching the previous _WARM_CHUNK_CEILING_S = 0.050).
per_chunk_ms: 25.0
# ── Provenance ─────────────────────────────────────────────────────────────
# Headline success rate from skills/act-aloha/eval/aloha_transfer_cube.json
# (50 episodes via `openral benchmark run`).
benchmarks:
aloha_transfer_cube: 0.82
paper_url: "https://arxiv.org/abs/2304.13705"
source_repo: "hf://lerobot/act_aloha_sim_transfer_cube_human"
description: >
Action Chunking Transformer (~52M-param encoder-decoder) finetuned on
the ALOHA bimanual cube-transfer demonstration set. Action chunks of
length 100. The published checkpoint predates lerobot's
PolicyProcessorPipeline migration and ships without normalisation
buffers β€” see tests/sim/test_aloha_bimanual_act_aloha.py for the
resulting numerical-contract caveats.
# ADR-0022 β€” action vocabulary surfaced to the reasoner LLM tool
# palette so it can pick this skill by what it does (action verb +
# object + scene), not just by its slug.
actions:
- "transfer"
- "pick"
- "place"
objects:
- "cube"
scenes:
- "tabletop"
# ADR-0019 β€” per-checkpoint action contract (consumed by the dataset bridge
# to bind the LeRobot v3 `action` feature shape).
action_contract:
dim: 14
# ADR-0019 β€” per-checkpoint state contract.
state_contract:
dim: 14