| from __future__ import annotations | |
| from typing import Dict | |
| from .models import RewardModel | |
| STEP_PENALTY = -0.01 | |
| def build_reward(components: Dict[str, float], rationale: str) -> RewardModel: | |
| value = round(sum(components.values()), 4) | |
| return RewardModel(value=value, components=components, rationale=rationale) | |