| """Stub reward function for OPD: always returns 0. | |
| verl's AgentLoop still calls compute_score() after every rollout even when | |
| `distillation.distillation_loss.use_task_rewards=False`. We don't have a task | |
| reward for video captioning / open-ended QA, so we return 0 and let the | |
| distillation loss drive learning. | |
| Signature matches verl's `verl.utils.reward_score.default_compute_score`. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any | |
| def compute_score( | |
| data_source: str, | |
| solution_str: str, | |
| ground_truth: str, | |
| extra_info: dict[str, Any] | None = None, | |
| **kwargs, | |
| ) -> float: | |
| """Return 0 — the distillation loss carries the signal in OPD.""" | |
| return 0.0 | |