from pydantic import BaseModel, Field from typing import Optional, Dict, Any, List class ModerationAction(BaseModel): decision: str reason: str confidence: float = Field(ge=0.0, le=1.0) labels: List[str] = [] class ContentObservation(BaseModel): content_id: str content_type: str text: Optional[str] = None image_description: Optional[str] = None detector_score: Optional[float] = None metadata: Dict[str, Any] = {} step_num: int total_steps: int class StepResult(BaseModel): observation: Optional[ContentObservation] = None reward: float done: bool info: Dict[str, Any] = {} class ResetResult(BaseModel): observation: ContentObservation class EnvState(BaseModel): task: str step_num: int total_steps: int cumulative_reward: float done: bool history: List[Dict[str, Any]] = [] class ResetRequest(BaseModel): task: Optional[str] = "text_spam"