| # rSkill manifest β openral packaging format V1 (CLAUDE.md Β§6.4) | |
| # Wraps: katefgroup/3d_diffuser_actor (diffuser_actor_peract.pth) | |
| # Paper: Ke et al., 2024 β "3D Diffuser Actor: Policy Diffusion with 3D Scene | |
| # Representations" (arXiv:2402.10885). RLBench PerAct 18-task setup. | |
| # | |
| # LICENSE: MIT (code + released checkpoints) β commercially permissive. No | |
| # license guard needed (unlike RVT/RVT-2, which are NVIDIA non-commercial). | |
| # | |
| # RUNTIME: auto-managed out-of-process sidecar (ZMQ + msgpack), ADR-0062. The | |
| # policy AND the CoppeliaSim/PyRep RLBench scene run in their own externally- | |
| # provisioned py3.10 venv (CoppeliaSim is proprietary, free-EDU, NEVER vendored). | |
| # The openral adapter (openral_sim.policies.rlbench_3dda) forks | |
| # tools/rlbench_3dda_sidecar.py on first use; user workflow is one command: | |
| # | |
| # openral benchmark scene --config scenes/benchmark/rlbench_open_drawer.yaml \ | |
| # --rskill rskills/3d-diffuser-actor-rlbench | |
| # | |
| # Verified live on an 8 GB Ada GPU host (2026-06-19): open_drawer 4/4, | |
| # meat_off_grill 3/3, close_jar solved. Inference VRAM peak ~0.43 GB. | |
| # ββ Identity βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| schema_version: "0.1" | |
| name: "OpenRAL/rskill-3d-diffuser-actor-rlbench" | |
| # ADR-0060: the benchmark tasks this checkpoint is validated for (gate). The | |
| # released PerAct checkpoint covers all 18 PerAct tasks; we ship + declare the | |
| # three live-verified starter tasks here (the rest are a follow-up). | |
| evaluated_tasks: | |
| - "rlbench/open_drawer" | |
| - "rlbench/meat_off_grill" | |
| - "rlbench/close_jar" | |
| version: "0.1.0" | |
| license: "mit" | |
| role: "s1" | |
| kind: "vla" | |
| # ββ Policy identity ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| model_family: "diffuser_actor" | |
| # ββ Compatibility contract βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| embodiment_tags: | |
| - "franka_panda" | |
| # RLBench renders four fixed cameras (the PerAct set: left_shoulder / | |
| # right_shoulder / wrist / front) and the policy fuses their RGB-D point clouds | |
| # into a 3D scene representation. Those four are supplied by the SCENE backend | |
| # (openral_sim.backends.rlbench), NOT by the robot's real sensor list β so the | |
| # robot-capability gate uses a coarse modality-count requirement ("an RGB-vision | |
| # embodiment") rather than keyed camera1..4 the franka_panda manifest doesn't | |
| # declare. The per-camera 3D fusion happens inside the policy sidecar. | |
| sensors_required: | |
| - modality: "rgb" | |
| count: 1 | |
| min_width: 128 | |
| min_height: 128 | |
| # The policy emits next-keyframe end-effector poses; RLBench executes each via | |
| # its sampling-based motion planner (EndEffectorPoseViaPlanning). Absolute EE | |
| # pose targets, not deltas. | |
| actuators_required: | |
| - kind: "cartesian_pose" | |
| control_mode_semantics: | |
| mode: "absolute" | |
| reference_frame: "panda_link0" | |
| # ββ Runtime / weights ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| runtime: "pytorch" | |
| min_vram_gb: | |
| bf16: 2.0 | |
| fp32: 2.0 | |
| weights_uri: "hf://katefgroup/3d_diffuser_actor" | |
| # ββ Execution semantics ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # One macro-keypose per step (the scene's mover plans + executes it). 100 DDIM | |
| # denoising steps per keypose; ~1.2 s/keypose on an 8 GB Ada GPU. | |
| chunk_size: 1 | |
| latency_budget: | |
| per_chunk_ms: 3000.0 | |
| # ββ IO contract ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 8-D keyframe action: [x y z qx qy qz qw gripper_open] (world frame). The scene | |
| # sidecar appends the peract fork's ignore_collisions channel + plans the motion. | |
| action_contract: | |
| dim: 8 | |
| slots: | |
| - {range: [0, 6], control_mode: "cartesian_pose", ee: "panda_hand", frame: "panda_link0"} | |
| - {range: [7, 7], control_mode: "gripper_position", ee: "panda_gripper"} | |
| # ββ Provenance βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| paper_url: "https://arxiv.org/abs/2402.10885" | |
| source_repo: "hf://katefgroup/3d_diffuser_actor" | |
| description: > | |
| 3D Diffuser Actor (Ke et al., 2024) β a diffusion policy over end-effector | |
| keyposes fusing multi-view RGB-D into a 3D scene representation, on the RLBench | |
| PerAct 18-task benchmark. Shares the out-of-process CoppeliaSim/PyRep sidecar | |
| with the rlbench scene backend (ADR-0062). MIT code + checkpoints. The PerAct | |
| checkpoint is loaded verbatim; ships three live-verified starter tasks. | |
| # ADR-0022 β action vocabulary surfaced to the reasoner LLM tool palette. | |
| actions: | |
| - "generalist" | |
| - "open" | |
| - "close" | |
| - "pick" | |
| - "place" | |
| objects: [] | |
| scenes: | |
| - "tabletop" | |