Rishav
Initial Fleetmind environment
9ba4f8a
Raw
History Blame Contribute Delete
3.79 kB
from __future__ import annotations
from dataclasses import dataclass, asdict
from typing import Any
@dataclass(frozen=True)
class GraderResult:
task_id: str
raw_reward: float
baseline_reward: float
target_reward: float
score: float
completed_orders: int | None = None
on_time_orders: int | None = None
late_orders: int | None = None
expired_orders: int | None = None
rejected_orders: int | None = None
invalid_actions: int | None = None
service_rate: float | None = None
on_time_rate: float | None = None
def to_dict(self) -> dict[str, Any]:
return asdict(self)
def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
return max(low, min(high, value))
def normalized_score(raw_reward: float, baseline_reward: float, target_reward: float) -> float:
if target_reward <= baseline_reward:
return 1.0 if raw_reward >= target_reward else 0.0
return clamp((raw_reward - baseline_reward) / (target_reward - baseline_reward))
def grade_task(
task_id: str,
raw_reward: float,
baseline_reward: float,
target_reward: float,
*,
completed_orders: int | None = None,
on_time_orders: int | None = None,
late_orders: int | None = None,
expired_orders: int | None = None,
rejected_orders: int | None = None,
invalid_actions: int | None = None,
) -> GraderResult:
total_resolved = (completed_orders or 0) + (expired_orders or 0) + (rejected_orders or 0)
service_rate = ((completed_orders or 0) / total_resolved) if total_resolved else None
on_time_rate = ((on_time_orders or 0) / (completed_orders or 1)) if completed_orders else None
return GraderResult(
task_id=task_id,
raw_reward=float(raw_reward),
baseline_reward=float(baseline_reward),
target_reward=float(target_reward),
score=normalized_score(raw_reward, baseline_reward, target_reward),
completed_orders=completed_orders,
on_time_orders=on_time_orders,
late_orders=late_orders,
expired_orders=expired_orders,
rejected_orders=rejected_orders,
invalid_actions=invalid_actions,
service_rate=service_rate,
on_time_rate=on_time_rate,
)
def grade_trajectory(
task_id: str,
trajectory_reward: float,
baseline_reward: float,
target_reward: float,
stats: dict[str, int] | None = None,
) -> GraderResult:
stats = stats or {}
return grade_task(
task_id=task_id,
raw_reward=trajectory_reward,
baseline_reward=baseline_reward,
target_reward=target_reward,
completed_orders=stats.get("completed_orders"),
on_time_orders=stats.get("on_time_orders"),
late_orders=stats.get("late_orders"),
expired_orders=stats.get("expired_orders"),
rejected_orders=stats.get("rejected_orders"),
invalid_actions=stats.get("invalid_actions"),
)
def weighted_mean(results: list[GraderResult], weights: dict[str, float]) -> float:
if not results:
return 0.0
weighted_total = 0.0
weight_total = 0.0
for result in results:
weight = float(weights.get(result.task_id, 1.0))
weighted_total += result.score * weight
weight_total += weight
return 0.0 if weight_total == 0 else weighted_total / weight_total
def summarize_results(results: list[GraderResult], weights: dict[str, float] | None = None) -> dict[str, Any]:
weights = weights or {}
return {
"tasks": [result.to_dict() for result in results],
"overall_score": weighted_mean(results, weights),
}
__all__ = [
"GraderResult",
"clamp",
"grade_task",
"grade_trajectory",
"normalized_score",
"summarize_results",
"weighted_mean",
]