Rishav
Harden v3 scoring contract
9850bda
Raw
History Blame Contribute Delete
3.09 kB
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
WorldRegime = Literal[
"visible_ramp",
"decoy_then_shift",
"premium_late_surge",
"congested_pivot",
]
class DifficultyProfile(BaseModel):
model_config = ConfigDict(frozen=True)
task_id: str
zone_count: int
courier_count: int
total_rounds: int
max_repositions_per_round: int
missed_order_penalty: float
move_cost_weight: float
runtime_budget_ms: float
class ZoneSpec(BaseModel):
model_config = ConfigDict(frozen=True)
zone_id: str
label: str
position: tuple[int, int]
class RoundTemplate(BaseModel):
model_config = ConfigDict(frozen=True)
round_index: int
visible_orders_by_zone: tuple[int, ...]
reward_per_order_by_zone: tuple[float, ...]
congestion_multiplier_by_zone: tuple[float, ...]
class HiddenRecipe(BaseModel):
model_config = ConfigDict(frozen=True)
task_id: str
seed: int
profile: DifficultyProfile
world_regime: WorldRegime
hot_zone_index: int
decoy_zone_index: int
support_zone_index: int
premium_zone_index: int
zone_specs: tuple[ZoneSpec, ...]
initial_courier_counts: tuple[int, ...]
rounds: tuple[RoundTemplate, ...]
class ZoneSnapshot(BaseModel):
model_config = ConfigDict(frozen=True)
zone_id: str
label: str
courier_count: int
visible_orders: int
reward_per_order: float
congestion_multiplier: float
class ZoneAllocation(BaseModel):
model_config = ConfigDict(frozen=True)
zone_id: str
courier_count: int
class V3Action(BaseModel):
target_allocations: list[ZoneAllocation] = Field(default_factory=list)
class V3Reward(BaseModel):
step_reward: float
cumulative_reward: float
class V3Feedback(BaseModel):
last_step_reward: float = 0.0
cumulative_reward: float = 0.0
recent_events: list[str] = Field(default_factory=list)
current_pressure: str = ""
class V3ScenarioInfo(BaseModel):
task_id: str
used_seed: int
total_rounds: int
total_couriers: int
max_repositions_per_round: int
objective_brief: str = ""
action_brief: str = ""
episode_brief: str = ""
class V3Observation(BaseModel):
round_index: int
remaining_rounds: int
task_id: str
zones: list[ZoneSnapshot]
feedback: V3Feedback
scenario_info: V3ScenarioInfo
class V3StepResult(BaseModel):
observation: V3Observation
reward: V3Reward
done: bool
info: dict[str, Any]
class V3TaskResult(BaseModel):
task_id: str
raw_reward: float
baseline_reward: float
target_reward: float
score: float
heuristic_reward: float | None = None
class SeedMetadata(BaseModel):
task_id: str
seed: int
world_regime: str
hot_zone: str
decoy_zone: str
premium_zone: str
baseline_reward: float
heuristic_reward: float
target_reward: float
score_gap: float
heuristic_gap: float
solver_runtime_ms: float
runtime_budget_ms: float
admissible: bool