File size: 4,637 Bytes
fe073e2
 
 
 
 
 
aa0019f
fe073e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa0019f
 
 
 
 
 
 
 
 
 
fe073e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa0019f
 
 
 
 
 
 
fe073e2
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import json
import random
from pathlib import Path
from typing import Dict, Any, List
from ..models import AvigilanceObservation, AvigilanceAction, AvigilanceReward, FTOProfile, IncidentReport
from ..graders.grader3 import grade_task3
from ..scoring import normalize_open_score

class Task3ResourceAllocator:
    def __init__(self, data_dir: Path, rng: random.Random):
        self.data_dir = data_dir
        self.rng = rng
        self._ftos = self._load_ftos()
        self._incidents = self._load_incidents()

    def _load_ftos(self) -> List[Dict[str, Any]]:
        with open(self.data_dir / "fto_profiles.json", "r") as f:
            return json.load(f)

    def _load_incidents(self) -> List[Dict[str, Any]]:
        with open(self.data_dir / "incident_reports.json", "r") as f:
            return json.load(f)

    def sample_scenario(self) -> Dict[str, Any]:
        # Adjusted for solvability: 2-3 FTOs, 8-12 incidents
        fto_count = self.rng.randint(2, 3)
        incident_count = self.rng.randint(8, 12)
        inspectors = self.rng.randint(2, 3)
        # Tighter budget: 30-70 hours
        budget = self.rng.randint(30, 70)
        
        return {
            "scenario_id": f"task3_{self.rng.randint(1000, 9999)}",
            "fto_audit_queue": self.rng.sample(self._ftos, fto_count),
            "incident_queue": self.rng.sample(self._incidents, incident_count),
            "inspector_capacity": inspectors,
            "week_budget_hours": budget
        }

    def build_observation(self, scenario: Dict[str, Any], step_count: int, terminal: bool = False) -> AvigilanceObservation:
        ftos = [FTOProfile(**{k: v for k, v in f.items() if not k.startswith("_")}) for f in scenario["fto_audit_queue"]]
        incidents = [IncidentReport(**i) for i in scenario["incident_queue"]]
        
        return AvigilanceObservation(
            task_id="task3",
            episode_step=step_count,
            max_steps=2,
            fto_audit_queue=ftos,
            incident_queue=incidents,
            inspector_capacity=scenario["inspector_capacity"],
            week_budget_hours=scenario["week_budget_hours"],
            context_note=f"Allocate {scenario['inspector_capacity']} inspectors to the provided audit and incident queues."
        )

    def grade(self, action: AvigilanceAction, scenario: Dict[str, Any]) -> AvigilanceReward:
        if action.resource_allocation_action is None:
            return AvigilanceReward(
                score=normalize_open_score(0.0),
                accuracy_component=normalize_open_score(0.0),
                consistency_component=normalize_open_score(0.0),
                safety_alignment_component=normalize_open_score(0.0),
                justification_quality=normalize_open_score(0.0),
                safety_principle_p1_transparency=normalize_open_score(0.0),
                safety_principle_p2_compliance=normalize_open_score(0.0),
                safety_principle_p3_consistency=normalize_open_score(0.0),
                feedback="No resource_allocation_action provided",
                done=True,
            )
            
        score = grade_task3(
            action.resource_allocation_action,
            [FTOProfile(**{k: v for k, v in f.items() if not k.startswith("_")}) for f in scenario["fto_audit_queue"]],
            [IncidentReport(**i) for i in scenario["incident_queue"]],
            scenario["inspector_capacity"],
            scenario["week_budget_hours"]
        )
        
        # Determine if done
        done = False
        if score > 0.85:
            done = True
            
        return AvigilanceReward(
            score=score,
            accuracy_component=normalize_open_score(0.4 if score > 0.4 else score),
            consistency_component=normalize_open_score(0.2 if score > 0.6 else 0.1),
            safety_alignment_component=normalize_open_score(0.2 if score > 0.8 else 0.1),
            justification_quality=normalize_open_score(0.2 if action.resource_allocation_action.priority_rationale else 0.0),
            safety_principle_p1_transparency=normalize_open_score(1.0 if not action.resource_allocation_action.abstain else 0.5),
            safety_principle_p2_compliance=normalize_open_score(1.0 if score > 0.3 else 0.0),
            safety_principle_p3_consistency=normalize_open_score(1.0 if score > 0.7 else 0.5),
            feedback=f"Resource Allocation Action score: {score}",
            done=done
        )

    def advance_scenario(self, scenario: Dict[str, Any]) -> Dict[str, Any]:
        # Simple advance: sample new items for step 2
        return self.sample_scenario()