"""Stub reward function for OPD: always returns 0. verl's AgentLoop still calls compute_score() after every rollout even when `distillation.distillation_loss.use_task_rewards=False`. We don't have a task reward for video captioning / open-ended QA, so we return 0 and let the distillation loss drive learning. Signature matches verl's `verl.utils.reward_score.default_compute_score`. """ from __future__ import annotations from typing import Any def compute_score( data_source: str, solution_str: str, ground_truth: str, extra_info: dict[str, Any] | None = None, **kwargs, ) -> float: """Return 0 — the distillation loss carries the signal in OPD.""" return 0.0