schema_version: "0.1"
name: "OpenRAL/rskill-robometer-4b-nf4"
version: "0.1.0"
license: "apache-2.0"
role: "s2"
kind: "reward"

# Embodiment-agnostic: a reward monitor scores any rollout video + task
# instruction and is exempt from the rSkill<->robot embodiment gate.
embodiment_tags: ["any"]  # explicit embodiment-agnostic wildcard (ADR-0071)

# Consumes the same RGB camera stream the co-active VLA uses. No actuators.
sensors_required:
  - modality: "rgb"
    min_width: 224
    min_height: 224

actuators_required: []

runtime: "pytorch"

# NF4 bitsandbytes quantization (empirically validated, ADR-0057 Phase 2):
# 236 Linear modules -> Linear4bit, 8.91 GB bf16 -> 3.33 GB resident,
# 3.56 GB peak incl. an 8-frame forward (4.44 GB headroom on an 8 GB GPU).
quantization:
  dtype: "int4"
  backend: "pytorch"
  extra:
    scheme: "nf4"
    quantizer: "bitsandbytes"
    compute_dtype: "bfloat16"
    min_params_to_quantize: 4000000
    # RBM cannot be loaded by vanilla AutoModel (config advertises
    # architectures:["RFM"] with no auto_map). The sidecar loads via the
    # pinned robometer package: robometer.utils.save.load_model_from_hf.
    loader: "robometer.utils.save.load_model_from_hf"
    # transformers MUST be pinned to 4.57.1 (5.x changes processor kwargs and
    # drops input_ids). robometer package pinned to a669dffc.
    transformers_pin: "4.57.1"
    robometer_pin: "a669dffc241d7d76bec12f36efd4084d914d017c"

min_vram_gb:
  fp32: 18.0
  bf16: 9.0
  int4: 3.6

# Pre-quantized NF4 checkpoint (ADR-0057): the sidecar loads the packed weights
# DIRECTLY on the meta device via Params4bit.from_prequantized — no bf16
# materialization, no requantize (~25 s to ready vs ~110 s + a 19 GB CPU spike).
# Bit-identical to the bf16+quantize path. Built by
# tools/build_robometer_nf4_checkpoint.py from the SHA-pinned upstream below.
weights_uri: "hf://OpenRAL/rskill-robometer-4b-nf4"

chunk_size: 1
latency_budget:
  # S2-cadence monitor over a frame window; not a per-control-step signal.
  per_chunk_ms: 3000.0

source_repo: "hf://robometer/Robometer-4B@beef63bc914c5c189329d49c6d712d96d632aa34"

# Reward / progress-monitor contract (ADR-0057). Discrete mode yields
# per-frame normalized progress in [0,1] + per-frame success probability.
reward:
  progress_range: [0.0, 1.0]
  success_threshold: 0.5
  preference: false
  frame_window_s: 8.0
  target_fps: 3.0
  num_bins: 100
  instruction_required: true

description: >
  Robometer-4B (Qwen3-VL-4B robotic reward foundation model, arXiv 2603.02115)
  as an NF4 reward rSkill. Runs parallel to a VLA: given rollout frames + the
  task instruction it emits per-frame normalized progress (0-1) and success
  probability, queried on demand by the Reasoner. Advisory-only — never gates
  motors. Embodiment-agnostic. Apache-2.0. ADR-0057.

actions:
  - "monitor"

objects:
  - "task progress"
  - "task success"

scenes:
  - "tabletop"
  - "kitchen"
  - "indoor"
  - "manipulation"