lilkm HF Staff commited on
Commit
41ca747
·
verified ·
1 Parent(s): 08f586f

Add files using upload-large-folder tool

Browse files
Files changed (3) hide show
  1. README.md +60 -0
  2. config.json +58 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets: robometer/RBM-1M
3
+ library_name: lerobot
4
+ license: apache-2.0
5
+ model_name: robometer
6
+ pipeline_tag: robotics
7
+ tags:
8
+ - lerobot
9
+ - zero-shot
10
+ - robotics
11
+ - robometer
12
+ - qwen3-vl
13
+ - reward-model
14
+ - vision-language
15
+ ---
16
+
17
+ # Reward Model Card for robometer
18
+
19
+ <!-- Provide a quick summary of what the reward model is/does. -->
20
+
21
+
22
+ Robometer is a zero-shot general-purpose robotic reward model built on a fine-tuned Qwen3-VL backbone with progress, preference, and success heads. Given a video and a task description it outputs a per-frame progress signal in [0, 1] and a per-frame success probability — suitable for offline reward labelling and for low-frequency reward signals during RL fine-tuning of robot policies.
23
+
24
+
25
+ This reward model has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
26
+ See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
27
+
28
+ ---
29
+
30
+ ## How to Get Started with the Reward Model
31
+
32
+ ### Train from scratch
33
+
34
+ ```bash
35
+ lerobot-train \
36
+ --dataset.repo_id=${HF_USER}/<dataset> \
37
+ --reward_model.type=robometer \
38
+ --output_dir=outputs/train/<desired_reward_model_repo_id> \
39
+ --job_name=lerobot_reward_training \
40
+ --reward_model.device=cuda \
41
+ --reward_model.repo_id=${HF_USER}/<desired_reward_model_repo_id> \
42
+ --wandb.enable=true
43
+ ```
44
+
45
+ _Writes checkpoints to `outputs/train/<desired_reward_model_repo_id>/checkpoints/`._
46
+
47
+ ### Load the reward model in Python
48
+
49
+ ```python
50
+ from lerobot.rewards import make_reward_model
51
+
52
+ reward_model = make_reward_model(pretrained_path="<hf_user>/<reward_model_repo_id>")
53
+ reward = reward_model.compute_reward(batch)
54
+ ```
55
+
56
+ ---
57
+
58
+ ## Model Details
59
+
60
+ - **License:** apache-2.0
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "robometer",
3
+ "input_features": {
4
+ "observation.images.top": {
5
+ "type": "VISUAL",
6
+ "shape": [
7
+ 3,
8
+ 224,
9
+ 224
10
+ ]
11
+ }
12
+ },
13
+ "output_features": {
14
+ "progress": {
15
+ "type": "REWARD",
16
+ "shape": [
17
+ 1
18
+ ]
19
+ },
20
+ "success": {
21
+ "type": "REWARD",
22
+ "shape": [
23
+ 1
24
+ ]
25
+ }
26
+ },
27
+ "device": "cpu",
28
+ "pretrained_path": "robometer/Robometer-4B",
29
+ "push_to_hub": false,
30
+ "repo_id": null,
31
+ "license": "apache-2.0",
32
+ "tags": [
33
+ "reward-model",
34
+ "vision-language",
35
+ "qwen3-vl",
36
+ "zero-shot"
37
+ ],
38
+ "private": null,
39
+ "image_key": "observation.images.top",
40
+ "task_key": "task",
41
+ "default_task": null,
42
+ "max_frames": 8,
43
+ "reward_output": "progress",
44
+ "success_threshold": 0.5,
45
+ "base_model_id": "Qwen/Qwen3-VL-4B-Instruct",
46
+ "torch_dtype": "bfloat16",
47
+ "use_multi_image": true,
48
+ "use_per_frame_progress_token": true,
49
+ "average_temporal_patches": true,
50
+ "frame_pooling": "mean",
51
+ "frame_pooling_attn_temperature": 1.0,
52
+ "progress_loss_type": "discrete",
53
+ "progress_discrete_bins": 10,
54
+ "normalization_mapping": {
55
+ "VISUAL": "IDENTITY",
56
+ "REWARD": "IDENTITY"
57
+ }
58
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37245b849c3b85e757919d6d14b9af7f9342f213b793ebf9b01606e1b4ad7e73
3
+ size 8894103800