| from __future__ import annotations | |
| from enum import Enum | |
| from typing import Any, Dict, List, Optional | |
| from pydantic import BaseModel, Field | |
| class ActionType(str, Enum): | |
| allow = "allow" | |
| flag = "flag" | |
| remove = "remove" | |
| escalate = "escalate" | |
| class StepType(str, Enum): | |
| analyze = "analyze" | |
| retrieve_policy = "retrieve_policy" | |
| decide = "decide" | |
| review = "review" | |
| finalize = "finalize" | |
| class Content(BaseModel): | |
| text: str = "" | |
| image_url: Optional[str] = None | |
| image_description: Optional[str] = None | |
| class PolicyChunk(BaseModel): | |
| policy_id: str = "" | |
| text: str = "" | |
| score: float = 0.0 | |
| class Action(BaseModel): | |
| action_type: ActionType | |
| reason: str = "" | |
| class Observation(BaseModel): | |
| content: Optional[Content] = None | |
| policy: List[PolicyChunk] = Field(default_factory=list) | |
| step_type: StepType = StepType.analyze | |
| step_count: int = 0 | |
| message: str = "" | |
| reward: float = 0.0 | |
| done: bool = False | |
| class State(BaseModel): | |
| episode_id: str = "" | |
| step_count: int = 0 | |
| done: bool = False | |
| selected_case_id: Optional[str] = None | |
| reward_breakdown: Dict[str, float] = Field( | |
| default_factory=lambda: { | |
| "analysis_step": 0.0, | |
| "retrieval_step": 0.0, | |
| "correct_decision": 0.0, | |
| "reviewer_agreement": 0.0, | |
| "unsafe_penalty": 0.0, | |
| } | |
| ) | |
| final_action: Optional[str] = None | |
| reviewer_note: Optional[str] = None | |
| action_history: List[Dict[str, Any]] = Field(default_factory=list) | |
| retrieved_policy_chunks: List[PolicyChunk] = Field(default_factory=list) |