File size: 3,788 Bytes
9ba4f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import annotations

from dataclasses import dataclass, asdict
from typing import Any


@dataclass(frozen=True)
class GraderResult:
    task_id: str
    raw_reward: float
    baseline_reward: float
    target_reward: float
    score: float
    completed_orders: int | None = None
    on_time_orders: int | None = None
    late_orders: int | None = None
    expired_orders: int | None = None
    rejected_orders: int | None = None
    invalid_actions: int | None = None
    service_rate: float | None = None
    on_time_rate: float | None = None

    def to_dict(self) -> dict[str, Any]:
        return asdict(self)


def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
    return max(low, min(high, value))


def normalized_score(raw_reward: float, baseline_reward: float, target_reward: float) -> float:
    if target_reward <= baseline_reward:
        return 1.0 if raw_reward >= target_reward else 0.0
    return clamp((raw_reward - baseline_reward) / (target_reward - baseline_reward))


def grade_task(
    task_id: str,
    raw_reward: float,
    baseline_reward: float,
    target_reward: float,
    *,
    completed_orders: int | None = None,
    on_time_orders: int | None = None,
    late_orders: int | None = None,
    expired_orders: int | None = None,
    rejected_orders: int | None = None,
    invalid_actions: int | None = None,
) -> GraderResult:
    total_resolved = (completed_orders or 0) + (expired_orders or 0) + (rejected_orders or 0)
    service_rate = ((completed_orders or 0) / total_resolved) if total_resolved else None
    on_time_rate = ((on_time_orders or 0) / (completed_orders or 1)) if completed_orders else None
    return GraderResult(
        task_id=task_id,
        raw_reward=float(raw_reward),
        baseline_reward=float(baseline_reward),
        target_reward=float(target_reward),
        score=normalized_score(raw_reward, baseline_reward, target_reward),
        completed_orders=completed_orders,
        on_time_orders=on_time_orders,
        late_orders=late_orders,
        expired_orders=expired_orders,
        rejected_orders=rejected_orders,
        invalid_actions=invalid_actions,
        service_rate=service_rate,
        on_time_rate=on_time_rate,
    )


def grade_trajectory(
    task_id: str,
    trajectory_reward: float,
    baseline_reward: float,
    target_reward: float,
    stats: dict[str, int] | None = None,
) -> GraderResult:
    stats = stats or {}
    return grade_task(
        task_id=task_id,
        raw_reward=trajectory_reward,
        baseline_reward=baseline_reward,
        target_reward=target_reward,
        completed_orders=stats.get("completed_orders"),
        on_time_orders=stats.get("on_time_orders"),
        late_orders=stats.get("late_orders"),
        expired_orders=stats.get("expired_orders"),
        rejected_orders=stats.get("rejected_orders"),
        invalid_actions=stats.get("invalid_actions"),
    )


def weighted_mean(results: list[GraderResult], weights: dict[str, float]) -> float:
    if not results:
        return 0.0
    weighted_total = 0.0
    weight_total = 0.0
    for result in results:
        weight = float(weights.get(result.task_id, 1.0))
        weighted_total += result.score * weight
        weight_total += weight
    return 0.0 if weight_total == 0 else weighted_total / weight_total


def summarize_results(results: list[GraderResult], weights: dict[str, float] | None = None) -> dict[str, Any]:
    weights = weights or {}
    return {
        "tasks": [result.to_dict() for result in results],
        "overall_score": weighted_mean(results, weights),
    }


__all__ = [
    "GraderResult",
    "clamp",
    "grade_task",
    "grade_trajectory",
    "normalized_score",
    "summarize_results",
    "weighted_mean",
]