schema_version: "0.1" name: "OpenRAL/rskill-robometer-4b-nf4" version: "0.1.0" license: "apache-2.0" role: "s2" kind: "reward" # Embodiment-agnostic: a reward monitor scores any rollout video + task # instruction and is exempt from the rSkill<->robot embodiment gate. embodiment_tags: ["any"] # explicit embodiment-agnostic wildcard (ADR-0071) # Consumes the same RGB camera stream the co-active VLA uses. No actuators. sensors_required: - modality: "rgb" min_width: 224 min_height: 224 actuators_required: [] runtime: "pytorch" # NF4 bitsandbytes quantization (empirically validated, ADR-0057 Phase 2): # 236 Linear modules -> Linear4bit, 8.91 GB bf16 -> 3.33 GB resident, # 3.56 GB peak incl. an 8-frame forward (4.44 GB headroom on an 8 GB GPU). quantization: dtype: "int4" backend: "pytorch" extra: scheme: "nf4" quantizer: "bitsandbytes" compute_dtype: "bfloat16" min_params_to_quantize: 4000000 # RBM cannot be loaded by vanilla AutoModel (config advertises # architectures:["RFM"] with no auto_map). The sidecar loads via the # pinned robometer package: robometer.utils.save.load_model_from_hf. loader: "robometer.utils.save.load_model_from_hf" # transformers MUST be pinned to 4.57.1 (5.x changes processor kwargs and # drops input_ids). robometer package pinned to a669dffc. transformers_pin: "4.57.1" robometer_pin: "a669dffc241d7d76bec12f36efd4084d914d017c" min_vram_gb: fp32: 18.0 bf16: 9.0 int4: 3.6 # Pre-quantized NF4 checkpoint (ADR-0057): the sidecar loads the packed weights # DIRECTLY on the meta device via Params4bit.from_prequantized — no bf16 # materialization, no requantize (~25 s to ready vs ~110 s + a 19 GB CPU spike). # Bit-identical to the bf16+quantize path. Built by # tools/build_robometer_nf4_checkpoint.py from the SHA-pinned upstream below. weights_uri: "hf://OpenRAL/rskill-robometer-4b-nf4" chunk_size: 1 latency_budget: # S2-cadence monitor over a frame window; not a per-control-step signal. per_chunk_ms: 3000.0 source_repo: "hf://robometer/Robometer-4B@beef63bc914c5c189329d49c6d712d96d632aa34" # Reward / progress-monitor contract (ADR-0057). Discrete mode yields # per-frame normalized progress in [0,1] + per-frame success probability. reward: progress_range: [0.0, 1.0] success_threshold: 0.5 preference: false frame_window_s: 8.0 target_fps: 3.0 num_bins: 100 instruction_required: true description: > Robometer-4B (Qwen3-VL-4B robotic reward foundation model, arXiv 2603.02115) as an NF4 reward rSkill. Runs parallel to a VLA: given rollout frames + the task instruction it emits per-frame normalized progress (0-1) and success probability, queried on demand by the Reasoner. Advisory-only — never gates motors. Embodiment-agnostic. Apache-2.0. ADR-0057. actions: - "monitor" objects: - "task progress" - "task success" scenes: - "tabletop" - "kitchen" - "indoor" - "manipulation"