AdrianLlopart commited on
Commit
59e3f25
Β·
verified Β·
1 Parent(s): 73940ec

chore: publish rSkill OpenRAL/rskill-diffusion-pusht v0.1.0

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. eval/README.md +1 -1
  3. eval/pusht.json +3 -3
  4. rskill.yaml +38 -18
README.md CHANGED
@@ -54,7 +54,7 @@ exposes the raw key `observation.image`.
54
 
55
  | Field | Value |
56
  | --- | --- |
57
- | `name` | `AdrianLlopart/rskill-diffusion-pusht` |
58
  | `version` | `0.1.0` |
59
  | `license` | `apache-2.0` |
60
  | `role` | `s1` |
@@ -72,13 +72,13 @@ Full schema: `openral_core.RSkillManifest` β€”
72
  ## Reproduction
73
 
74
  ```bash
75
- git clone https://github.com/AdrianLlopart/openral && cd OpenRAL
76
  just bootstrap && uv sync --all-packages --group sim
77
 
78
  # End-to-end via the canonical SimEnvironment config (CPU is enough):
79
  just sim-diffusion-pusht
80
  # which runs:
81
- # ral sim run --config examples/sim/diffusion_pusht.yaml --save-video
82
 
83
  # Sim test (gym_pusht + pymunk):
84
  uv run pytest tests/sim/test_pusht_2d_diffusion_pusht.py -v -m sim
@@ -93,5 +93,5 @@ match the upstream weights. Commercial use is allowed
93
  ## See also
94
 
95
  - [`robots/pusht_2d/README.md`](../../robots/pusht_2d/README.md) β€” RobotDescription manifest.
96
- - [`examples/sim/diffusion_pusht.yaml`](../../examples/sim/diffusion_pusht.yaml) β€” paired SimEnvironment config.
97
  - [`docs/reference/vla_compatibility.md`](../../docs/reference/vla_compatibility.md) β€” VLA Γ— Robot Γ— Sim matrix.
 
54
 
55
  | Field | Value |
56
  | --- | --- |
57
+ | `name` | `OpenRAL/rskill-diffusion-pusht` |
58
  | `version` | `0.1.0` |
59
  | `license` | `apache-2.0` |
60
  | `role` | `s1` |
 
72
  ## Reproduction
73
 
74
  ```bash
75
+ git clone https://github.com/OpenRAL/openral && cd OpenRAL
76
  just bootstrap && uv sync --all-packages --group sim
77
 
78
  # End-to-end via the canonical SimEnvironment config (CPU is enough):
79
  just sim-diffusion-pusht
80
  # which runs:
81
+ # openral sim run --config scenes/benchmarks/diffusion_pusht.yaml --save-video
82
 
83
  # Sim test (gym_pusht + pymunk):
84
  uv run pytest tests/sim/test_pusht_2d_diffusion_pusht.py -v -m sim
 
93
  ## See also
94
 
95
  - [`robots/pusht_2d/README.md`](../../robots/pusht_2d/README.md) β€” RobotDescription manifest.
96
+ - [`scenes/benchmarks/diffusion_pusht.yaml`](../../scenes/benchmarks/diffusion_pusht.yaml) β€” paired SimEnvironment config.
97
  - [`docs/reference/vla_compatibility.md`](../../docs/reference/vla_compatibility.md) β€” VLA Γ— Robot Γ— Sim matrix.
eval/README.md CHANGED
@@ -3,7 +3,7 @@
3
  `pusht.json` is the PushT mean-coverage-IoU benchmark result block for this
4
  rSkill. Validated against
5
  [`openral_core.RSkillEvalResult`](../../../docs/reference/schemas/RSkillEvalResult.json)
6
- at load time by the `rSkill` loader and surfaced by `ral benchmark report`.
7
 
8
  | Field | Value |
9
  | --- | --- |
 
3
  `pusht.json` is the PushT mean-coverage-IoU benchmark result block for this
4
  rSkill. Validated against
5
  [`openral_core.RSkillEvalResult`](../../../docs/reference/schemas/RSkillEvalResult.json)
6
+ at load time by the `rSkill` loader and surfaced by `openral benchmark report`.
7
 
8
  | Field | Value |
9
  | --- | --- |
eval/pusht.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "schema_version": "1",
3
  "source": {
4
  "paper": "https://arxiv.org/abs/2303.04137",
5
  "arxiv": "https://arxiv.org/abs/2303.04137",
6
  "model_variant": "diffusion",
7
- "evaluated_by": "OpenRAL:ral benchmark run",
8
  "reproduced_locally": true,
9
  "reproduction_planned": null,
10
- "reproduction_cli": "ral benchmark run --suite pusht --rskill rskill://diffusion-pusht",
11
  "table": null,
12
  "status": "reproduced"
13
  },
 
1
  {
2
+ "schema_version": "0.1",
3
  "source": {
4
  "paper": "https://arxiv.org/abs/2303.04137",
5
  "arxiv": "https://arxiv.org/abs/2303.04137",
6
  "model_variant": "diffusion",
7
+ "evaluated_by": "OpenRAL:openral benchmark run",
8
  "reproduced_locally": true,
9
  "reproduction_planned": null,
10
+ "reproduction_cli": "openral benchmark run --suite pusht --rskill rskill://diffusion-pusht",
11
  "table": null,
12
  "status": "reproduced"
13
  },
rskill.yaml CHANGED
@@ -2,22 +2,23 @@
2
  # Wraps: lerobot/diffusion_pusht (Apache-2.0)
3
  # Paper: Chi et al., 2023 β€” Diffusion Policy.
4
 
5
- schema_version: "1"
6
-
7
- name: "AdrianLlopart/rskill-diffusion-pusht"
8
  version: "0.1.0"
9
  license: "apache-2.0"
10
  role: "s1"
 
11
 
 
12
  model_family: "diffusion"
13
 
 
14
  # 2-D PushT pseudo-robot (single end-effector pushing a T block). Used by
15
  # tests/sim/test_pusht_2d_diffusion_pusht.py against gym_pusht/PushT-v0.
16
  embodiment_tags:
17
  - "pusht"
18
 
19
- capabilities_required: {}
20
-
21
  # PushT exposes a single 96Γ—96 RGB top-down stream (named
22
  # observation.image, not images.cameraN β€” PushT predates the multi-cam
23
  # convention used by SmolVLA/ACT).
@@ -32,19 +33,32 @@ sensors_required:
32
  # `cartesian_pose` as its supported control mode (the codebase
33
  # convention for the PushT 2-D action regardless of dimensionality).
34
  # The loader auto-fills n_dof (2) + vla_action_key from the robot YAML.
 
35
  actuators_required:
36
  - kind: "cartesian_pose"
 
 
 
37
 
 
38
  runtime: "pytorch"
39
-
40
  quantization:
41
  dtype: "fp32"
42
  backend: "pytorch"
43
-
44
  weights_uri: "hf://lerobot/diffusion_pusht"
45
 
46
- chunk_size: 8
 
 
 
 
 
 
 
47
 
 
 
 
48
  latency_budget:
49
  # Reference-host measurement (RTX 4070 Laptop, CUDA 12.8, PyTorch 2.10)
50
  # of the warm full-chunk inference is 1756 ms β€” Diffusion Policy runs
@@ -52,21 +66,12 @@ latency_budget:
52
  # Pinning per_chunk_ms to 1250 ms with tolerance_pct=100 yields the
53
  # previous 2.5 s ceiling (_WARM_CHUNK_CEILING_S in the sim test).
54
  per_chunk_ms: 1250.0
55
- warmup_ms: 10000.0
56
- load_ms: 30000.0
57
-
58
- fallback_skill_id: null
59
 
 
60
  # Headline success rate from skills/diffusion-pusht/eval/pusht.json.
61
  benchmarks:
62
  pusht: 0.60
63
 
64
- # PushT is a 2-DoF planar pushing benchmark; proprio state is 2-D
65
- # (x, y) of the end effector.
66
- policy_id: "diffusion"
67
- state_contract:
68
- dim: 2
69
-
70
  paper_url: "https://arxiv.org/abs/2303.04137"
71
  source_repo: "hf://lerobot/diffusion_pusht"
72
 
@@ -76,3 +81,18 @@ description: >
76
  horizon of 16. The chunk inference cost is dominated by the denoising
77
  loop, so cached pops are essentially free β€” this is the extreme test
78
  of the queue-drain contract.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # Wraps: lerobot/diffusion_pusht (Apache-2.0)
3
  # Paper: Chi et al., 2023 β€” Diffusion Policy.
4
 
5
+ # ── Identity ───────────────────────────────────────────────────────────────
6
+ schema_version: "0.1"
7
+ name: "OpenRAL/rskill-diffusion-pusht"
8
  version: "0.1.0"
9
  license: "apache-2.0"
10
  role: "s1"
11
+ kind: "vla" # ADR-00XX: rSkill kind discriminator. "vla" = learnable Vision-Language-Action policy.
12
 
13
+ # ── Policy identity ────────────────────────────────────────────────────────
14
  model_family: "diffusion"
15
 
16
+ # ── Compatibility contract ─────────────────────────────────────────────────
17
  # 2-D PushT pseudo-robot (single end-effector pushing a T block). Used by
18
  # tests/sim/test_pusht_2d_diffusion_pusht.py against gym_pusht/PushT-v0.
19
  embodiment_tags:
20
  - "pusht"
21
 
 
 
22
  # PushT exposes a single 96Γ—96 RGB top-down stream (named
23
  # observation.image, not images.cameraN β€” PushT predates the multi-cam
24
  # convention used by SmolVLA/ACT).
 
33
  # `cartesian_pose` as its supported control mode (the codebase
34
  # convention for the PushT 2-D action regardless of dimensionality).
35
  # The loader auto-fills n_dof (2) + vla_action_key from the robot YAML.
36
+ # PushT actions are absolute (x, y) targets in the world/scene frame.
37
  actuators_required:
38
  - kind: "cartesian_pose"
39
+ control_mode_semantics:
40
+ mode: "absolute"
41
+ reference_frame: "world"
42
 
43
+ # ── Runtime / weights ──────────────────────────────────────────────────────
44
  runtime: "pytorch"
 
45
  quantization:
46
  dtype: "fp32"
47
  backend: "pytorch"
 
48
  weights_uri: "hf://lerobot/diffusion_pusht"
49
 
50
+ # ── Preprocessing (all knobs needed to interpret IO) ───────────────────────
51
+ processors:
52
+ preprocessor_uri: "hf://lerobot/diffusion_pusht/policy_preprocessor.json"
53
+ postprocessor_uri: "hf://lerobot/diffusion_pusht/policy_postprocessor.json"
54
+ # PushT is a 2-DoF planar pushing benchmark; proprio state is 2-D
55
+ # (x, y) of the end effector.
56
+ state_contract:
57
+ dim: 2
58
 
59
+ # ── Execution semantics ────────────────────────────────────────────────────
60
+ chunk_size: 8
61
+ # n_action_steps omitted β€” equals chunk_size (Diffusion Policy default).
62
  latency_budget:
63
  # Reference-host measurement (RTX 4070 Laptop, CUDA 12.8, PyTorch 2.10)
64
  # of the warm full-chunk inference is 1756 ms β€” Diffusion Policy runs
 
66
  # Pinning per_chunk_ms to 1250 ms with tolerance_pct=100 yields the
67
  # previous 2.5 s ceiling (_WARM_CHUNK_CEILING_S in the sim test).
68
  per_chunk_ms: 1250.0
 
 
 
 
69
 
70
+ # ── Provenance ─────────────────────────────────────────────────────────────
71
  # Headline success rate from skills/diffusion-pusht/eval/pusht.json.
72
  benchmarks:
73
  pusht: 0.60
74
 
 
 
 
 
 
 
75
  paper_url: "https://arxiv.org/abs/2303.04137"
76
  source_repo: "hf://lerobot/diffusion_pusht"
77
 
 
81
  horizon of 16. The chunk inference cost is dominated by the denoising
82
  loop, so cached pops are essentially free β€” this is the extreme test
83
  of the queue-drain contract.
84
+
85
+ # ADR-0022 β€” action vocabulary surfaced to the reasoner LLM tool
86
+ # palette so it can pick this skill by what it does (action verb +
87
+ # object + scene), not just by its slug.
88
+ actions:
89
+ - "push"
90
+ objects:
91
+ - "t_shape"
92
+ scenes:
93
+ - "tabletop_2d"
94
+
95
+ # ADR-0019 β€” per-checkpoint action contract (consumed by the dataset bridge
96
+ # to bind the LeRobot v3 `action` feature shape).
97
+ action_contract:
98
+ dim: 2