"""Stub reward function for OPD: always returns 0.

verl's AgentLoop still calls compute_score() after every rollout even when
`distillation.distillation_loss.use_task_rewards=False`. We don't have a task
reward for video captioning / open-ended QA, so we return 0 and let the
distillation loss drive learning.

Signature matches verl's `verl.utils.reward_score.default_compute_score`.
"""
from __future__ import annotations

from typing import Any


def compute_score(
    data_source: str,
    solution_str: str,
    ground_truth: str,
    extra_info: dict[str, Any] | None = None,
    **kwargs,
) -> float:
    """Return 0 — the distillation loss carries the signal in OPD."""
    return 0.0