File size: 1,551 Bytes
1bc6b3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Episode grading utilities.

This module centralizes "final score" computation so benchmark runs and
OpenEnv runs report the same episode score.
"""

from __future__ import annotations

from src.models import State


def grade_episode(task_id: str, state: State | None, rewards: list[float]) -> float:
    """Compute a final episode score in [0.0, 1.0].

    Args:
        task_id: Task identifier.
        state: Final (or current) state.
        rewards: Per-step rewards.

    Returns:
        Normalized score in [0.0, 1.0].
    """

    if not rewards:
        return 0.0

    # Lazy imports avoid circular dependencies (task graders import src.rewards).
    if task_id == "single_incident":
        from src.tasks.single_incident import SingleIncidentGrader

        return float(SingleIncidentGrader().grade(state, rewards) if state is not None else 0.0)

    if task_id == "multi_incident":
        from src.tasks.multi_incident import MultiIncidentGrader

        return float(MultiIncidentGrader().grade(state, rewards) if state is not None else 0.0)

    if task_id == "mass_casualty":
        from src.tasks.mass_casualty import MassCasualtyGrader

        return float(MassCasualtyGrader().grade(state, rewards) if state is not None else 0.0)

    if task_id == "shift_surge":
        from src.tasks.shift_surge import ShiftSurgeGrader

        return float(ShiftSurgeGrader().grade(state, rewards) if state is not None else 0.0)

    # Fallback: mean of rewards (legacy behavior).
    return float(sum(rewards) / max(len(rewards), 1))