import json import random from pathlib import Path from typing import Dict, Any, List from ..models import AvigilanceObservation, AvigilanceAction, AvigilanceReward, FTOProfile, IncidentReport from ..graders.grader3 import grade_task3 from ..scoring import normalize_open_score class Task3ResourceAllocator: def __init__(self, data_dir: Path, rng: random.Random): self.data_dir = data_dir self.rng = rng self._ftos = self._load_ftos() self._incidents = self._load_incidents() def _load_ftos(self) -> List[Dict[str, Any]]: with open(self.data_dir / "fto_profiles.json", "r") as f: return json.load(f) def _load_incidents(self) -> List[Dict[str, Any]]: with open(self.data_dir / "incident_reports.json", "r") as f: return json.load(f) def sample_scenario(self) -> Dict[str, Any]: # Adjusted for solvability: 2-3 FTOs, 8-12 incidents fto_count = self.rng.randint(2, 3) incident_count = self.rng.randint(8, 12) inspectors = self.rng.randint(2, 3) # Tighter budget: 30-70 hours budget = self.rng.randint(30, 70) return { "scenario_id": f"task3_{self.rng.randint(1000, 9999)}", "fto_audit_queue": self.rng.sample(self._ftos, fto_count), "incident_queue": self.rng.sample(self._incidents, incident_count), "inspector_capacity": inspectors, "week_budget_hours": budget } def build_observation(self, scenario: Dict[str, Any], step_count: int, terminal: bool = False) -> AvigilanceObservation: ftos = [FTOProfile(**{k: v for k, v in f.items() if not k.startswith("_")}) for f in scenario["fto_audit_queue"]] incidents = [IncidentReport(**i) for i in scenario["incident_queue"]] return AvigilanceObservation( task_id="task3", episode_step=step_count, max_steps=2, fto_audit_queue=ftos, incident_queue=incidents, inspector_capacity=scenario["inspector_capacity"], week_budget_hours=scenario["week_budget_hours"], context_note=f"Allocate {scenario['inspector_capacity']} inspectors to the provided audit and incident queues." ) def grade(self, action: AvigilanceAction, scenario: Dict[str, Any]) -> AvigilanceReward: if action.resource_allocation_action is None: return AvigilanceReward( score=normalize_open_score(0.0), accuracy_component=normalize_open_score(0.0), consistency_component=normalize_open_score(0.0), safety_alignment_component=normalize_open_score(0.0), justification_quality=normalize_open_score(0.0), safety_principle_p1_transparency=normalize_open_score(0.0), safety_principle_p2_compliance=normalize_open_score(0.0), safety_principle_p3_consistency=normalize_open_score(0.0), feedback="No resource_allocation_action provided", done=True, ) score = grade_task3( action.resource_allocation_action, [FTOProfile(**{k: v for k, v in f.items() if not k.startswith("_")}) for f in scenario["fto_audit_queue"]], [IncidentReport(**i) for i in scenario["incident_queue"]], scenario["inspector_capacity"], scenario["week_budget_hours"] ) # Determine if done done = False if score > 0.85: done = True return AvigilanceReward( score=score, accuracy_component=normalize_open_score(0.4 if score > 0.4 else score), consistency_component=normalize_open_score(0.2 if score > 0.6 else 0.1), safety_alignment_component=normalize_open_score(0.2 if score > 0.8 else 0.1), justification_quality=normalize_open_score(0.2 if action.resource_allocation_action.priority_rationale else 0.0), safety_principle_p1_transparency=normalize_open_score(1.0 if not action.resource_allocation_action.abstain else 0.5), safety_principle_p2_compliance=normalize_open_score(1.0 if score > 0.3 else 0.0), safety_principle_p3_consistency=normalize_open_score(1.0 if score > 0.7 else 0.5), feedback=f"Resource Allocation Action score: {score}", done=done ) def advance_scenario(self, scenario: Dict[str, Any]) -> Dict[str, Any]: # Simple advance: sample new items for step 2 return self.sample_scenario()