# rSkill manifest — OpenRAL packaging format V1 (CLAUDE.md §6.4)
# Wraps: lerobot/act_aloha_sim_transfer_cube_human (MIT)
# Paper: Zhao et al., 2023 — Action Chunking Transformer.
#
# LEGACY PROCESSOR PATH: this checkpoint pre-dates lerobot's
# PolicyProcessorPipeline migration and ships its norm stats inside
# model.safetensors. The schema's processors block is therefore omitted;
# the ACT adapter dispatches on manifest.processors is None and falls
# back to the snapshot_download + _try_load_act_norm_stats path. Migrating
# to per-file URIs would require re-publishing the upstream checkpoint
# and is tracked as a follow-up.

# ── Identity ───────────────────────────────────────────────────────────────
schema_version: "0.1"
name: "OpenRAL/rskill-act-aloha"
version: "0.1.0"
license: "mit"
role: "s1"
kind: "vla"  # ADR-00XX: rSkill kind discriminator. "vla" = learnable Vision-Language-Action policy.

# ── Policy identity ────────────────────────────────────────────────────────
model_family: "act"

# ── Compatibility contract ─────────────────────────────────────────────────
# Bimanual ALOHA (2 × 7-DoF arms = 14-DoF action space). Used by
# tests/sim/test_aloha_bimanual_act_aloha.py (gym-aloha MuJoCo).
embodiment_tags:
  - "aloha"

# ACT for ALOHA cube-transfer ships with a single top-down 480×640 RGB stream.
sensors_required:
  - modality: "rgb"
    vla_feature_key: "observation.images.top"
    min_width: 640
    min_height: 480

# Output side (ADR-0013). For the canonical aloha bimanual embodiment the
# loader auto-fills n_dof (14) + vla_action_key from
# robots/aloha_bimanual/robot.yaml.
actuators_required:
  - kind: "joint_position"
    control_mode_semantics:
      mode: "absolute"

# ── Runtime / weights ──────────────────────────────────────────────────────
runtime: "pytorch"
quantization:
  dtype: "fp32"
  backend: "pytorch"
weights_uri: "hf://lerobot/act_aloha_sim_transfer_cube_human"

# ── Preprocessing (all knobs needed to interpret IO) ───────────────────────
# processors omitted — legacy path; norm stats live inside model.safetensors.
# ACT manages its own preprocessing / state contract inside the lerobot
# ACTPolicy so nothing else needs to move.

# ── Execution semantics ────────────────────────────────────────────────────
chunk_size: 100
# n_action_steps omitted — ACT default is 1 (per-step re-inference +
# temporal ensembling, paper-faithful).
latency_budget:
  # Reference-host measurement (RTX 4070 Laptop, CUDA 12.8, PyTorch 2.10)
  # of the warm full-chunk inference is 16 ms; bf16 autocast is ~12 ms.
  # We pin per_chunk_ms to 25 ms to keep the canonical
  # "tolerance_pct=100 → 2× ceiling" pattern (giving a 50 ms test ceiling,
  # matching the previous _WARM_CHUNK_CEILING_S = 0.050).
  per_chunk_ms: 25.0

# ── Provenance ─────────────────────────────────────────────────────────────
# Headline success rate from skills/act-aloha/eval/aloha_transfer_cube.json
# (50 episodes via `openral benchmark run`).
benchmarks:
  aloha_transfer_cube: 0.82

paper_url: "https://arxiv.org/abs/2304.13705"
source_repo: "hf://lerobot/act_aloha_sim_transfer_cube_human"

description: >
  Action Chunking Transformer (~52M-param encoder-decoder) finetuned on
  the ALOHA bimanual cube-transfer demonstration set. Action chunks of
  length 100. The published checkpoint predates lerobot's
  PolicyProcessorPipeline migration and ships without normalisation
  buffers — see tests/sim/test_aloha_bimanual_act_aloha.py for the
  resulting numerical-contract caveats.

# ADR-0022 — action vocabulary surfaced to the reasoner LLM tool
# palette so it can pick this skill by what it does (action verb +
# object + scene), not just by its slug.
actions:
  - "transfer"
  - "pick"
  - "place"
objects:
  - "cube"
scenes:
  - "tabletop"

# ADR-0019 — per-checkpoint action contract (consumed by the dataset bridge
# to bind the LeRobot v3 `action` feature shape).
action_contract:
  dim: 14

# ADR-0019 — per-checkpoint state contract.
state_contract:
  dim: 14