File size: 4,378 Bytes
8cd3fa7
8520614
 
8cd3fa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aa8acb
8cd3fa7
 
 
 
 
 
 
 
 
 
 
6aa8acb
8cd3fa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6aa8acb
8cd3fa7
 
 
8520614
 
 
 
8cd3fa7
 
6aa8acb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cd3fa7
 
 
 
 
6aa8acb
 
 
8cd3fa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d78cfdc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from __future__ import annotations
from typing import Optional, List, Dict, Literal, Union, Annotated
from pydantic import BaseModel, Field, Discriminator, RootModel
from enum import Enum
import uuid


class PolicyActionType(str, Enum):
    PROPOSE_CLARIFICATION = "propose_clarification"
    PROPOSE_NEW_RULE = "propose_new_rule"
    EVOLVE_POLICY = "evolve_policy"


class ProposeClarificationAction(BaseModel):
    """Easy task: identify an ambiguous policy term and clarify it."""
    action_type: Literal[PolicyActionType.PROPOSE_CLARIFICATION] = PolicyActionType.PROPOSE_CLARIFICATION
    ambiguous_term: str = Field(description="The exact ambiguous term found in policies")
    suggested_definition: str = Field(description="A specific, actionable definition")
    affected_policy_ids: List[str] = Field(default_factory=list, description="Policy IDs this affects")
    justification: str = Field(description="Why this term is ambiguous")
    think: Optional[str] = Field(default=None, description="Chain-of-thought reasoning (earns +0.1 bonus)")
    model_config = {"extra": "allow"}


class ProposeNewRuleAction(BaseModel):
    """Medium task: detect a policy gap and propose a new rule."""
    action_type: Literal[PolicyActionType.PROPOSE_NEW_RULE] = PolicyActionType.PROPOSE_NEW_RULE
    rule_domain: str = Field(description="Domain the rule covers, e.g. 'content_moderation'")
    new_rule: str = Field(description="The new rule text — must be clear and actionable")
    scope: List[str] = Field(description="List of scenario types this rule covers")
    integration_points: List[str] = Field(default_factory=list, description="How it connects to existing policies")
    justification: str = Field(description="Why a gap exists and why this rule fills it")
    think: Optional[str] = Field(default=None, description="Chain-of-thought reasoning (earns +0.1 bonus)")
    model_config = {"extra": "allow"}


class PolicyModification(BaseModel):
    policy_id: str
    change_type: Literal["enhance", "restrict", "add", "remove"]
    new_text: str
    reason: str


class EvolveProcessAction(BaseModel):
    """Hard task: holistically evolve the policy framework."""
    action_type: Literal[PolicyActionType.EVOLVE_POLICY] = PolicyActionType.EVOLVE_POLICY
    policy_modifications: List[PolicyModification] = Field(description="Specific changes to make")
    expected_outcomes: Dict[str, float] = Field(description="Metric name → expected delta (0.0–1.0)")
    rollback_conditions: List[str] = Field(default_factory=list, description="When to revert")
    justification: str = Field(description="Comprehensive reasoning")
    think: Optional[str] = Field(default=None, description="Chain-of-thought reasoning (earns +0.1 bonus)")
    model_config = {"extra": "allow"}


class Action(RootModel):
    root: Annotated[
        Union[ProposeClarificationAction, ProposeNewRuleAction, EvolveProcessAction],
        Discriminator("action_type")
    ]


class TaskInfo(BaseModel):
    """Returned by /tasks endpoint."""
    task_id: str
    difficulty: str
    description: str
    action_schema: dict


class CorpusIncident(BaseModel):
    id: str
    content: str
    system_action: str = "pending"
    model_config = {"extra": "allow"}


class Observation(BaseModel):
    """What the agent sees after reset() or step()."""
    task_id: str
    episode_id: str
    step_count: int
    corpus_size: int = 0
    corpus_shown: int = 0
    data_corpus: List[CorpusIncident] = Field(description="Scenarios/posts/actions for the agent to analyze")
    current_policies: List[Dict] = Field(description="The existing policy set")
    policy_outcomes: Optional[List[Dict]] = Field(default=None, description="Historical outcome data (hard task)")
    system_metrics: Dict[str, float] = Field(default_factory=dict)
    identified_issues: List[Dict] = Field(default_factory=list)
    reward: float = 0.0
    done: bool = False
    info: Dict = Field(default_factory=dict)


class State(BaseModel):
    """Episode metadata — returned by state() endpoint."""
    episode_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
    task_id: str = ""
    step_count: int = 0
    max_steps: int = 5
    current_score: float = 0.0
    best_score: float = 0.0
    actions_taken: List[str] = Field(default_factory=list)
    rewards_history: List[float] = Field(default_factory=list)