File size: 1,701 Bytes
9e245c9 7eba7ef | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | from __future__ import annotations
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class ActionType(str, Enum):
allow = "allow"
flag = "flag"
remove = "remove"
escalate = "escalate"
class StepType(str, Enum):
analyze = "analyze"
retrieve_policy = "retrieve_policy"
decide = "decide"
review = "review"
finalize = "finalize"
class Content(BaseModel):
text: str = ""
image_url: Optional[str] = None
image_description: Optional[str] = None
class PolicyChunk(BaseModel):
policy_id: str = ""
text: str = ""
score: float = 0.0
class Action(BaseModel):
action_type: ActionType
reason: str = ""
class Observation(BaseModel):
content: Optional[Content] = None
policy: List[PolicyChunk] = Field(default_factory=list)
step_type: StepType = StepType.analyze
step_count: int = 0
message: str = ""
reward: float = 0.0
done: bool = False
class State(BaseModel):
episode_id: str = ""
step_count: int = 0
done: bool = False
selected_case_id: Optional[str] = None
reward_breakdown: Dict[str, float] = Field(
default_factory=lambda: {
"analysis_step": 0.0,
"retrieval_step": 0.0,
"correct_decision": 0.0,
"reviewer_agreement": 0.0,
"unsafe_penalty": 0.0,
}
)
final_action: Optional[str] = None
reviewer_note: Optional[str] = None
action_history: List[Dict[str, Any]] = Field(default_factory=list)
retrieved_policy_chunks: List[PolicyChunk] = Field(default_factory=list) |