"""Pydantic models for the Feature Flag Cleanup environment.""" from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field, ConfigDict class FlagAction(BaseModel): """Action the agent takes on a feature flag.""" model_config = ConfigDict(extra="forbid", validate_assignment=True) action: str = Field( ..., description="One of: remove, keep, deprecate, escalate, investigate", pattern="^(remove|keep|deprecate|escalate|investigate)$", ) reasoning: str = Field( default="", description="Optional reasoning for the action", ) metadata: Dict[str, Any] = Field(default_factory=dict) class FlagObservation(BaseModel): """Observation presented to the agent for each feature flag.""" model_config = ConfigDict(extra="forbid", validate_assignment=True) flag_name: str = Field(..., description="Name of the feature flag") description: str = Field(..., description="What the flag controls") rollout_percentage: float = Field(..., ge=0.0, le=1.0, description="Current rollout 0.0-1.0") age_days: int = Field(..., ge=0, description="Days since flag was created") last_modified_days: int = Field(..., ge=0, description="Days since last modification") owner: str = Field(..., description="Team or person who owns the flag") owner_active: bool = Field(..., description="Whether the owner is still active") num_code_references: int = Field(..., ge=0, description="Number of code references") has_dependencies: bool = Field(..., description="Whether other flags depend on this one") dependent_flags: List[str] = Field(default_factory=list, description="Flags depending on this one") is_kill_switch: bool = Field(default=False, description="Emergency kill switch flag") has_active_incident: bool = Field(default=False, description="Active incident involving this flag") usage_last_30d: int = Field(default=0, ge=0, description="Flag evaluations in last 30 days") in_active_experiment: bool = Field(default=False, description="Part of a running A/B test") services: List[str] = Field(default_factory=list, description="Services referencing this flag") task_id: str = Field(..., description="Current task identifier") flags_remaining: int = Field(..., ge=0, description="Flags left to process") # Rich context fields (Weakness #2 fix: richer observations) code_snippet: str = Field(default="", description="Code snippet showing how the flag is used") last_commit_message: str = Field(default="", description="Last git commit that modified this flag") pr_context: str = Field(default="", description="Context from the PR that introduced this flag") related_incidents: List[str] = Field(default_factory=list, description="Past incident IDs involving this flag") investigation_notes: str = Field(default="", description="Notes revealed by investigate action") # Cascading context (Weakness #1 fix: decisions affect future flags) previously_removed: List[str] = Field(default_factory=list, description="Flags already removed this episode") cascade_warning: str = Field(default="", description="Warning if a dependency was removed earlier") done: bool = Field(default=False) reward: Optional[float] = Field(default=None) metadata: Dict[str, Any] = Field(default_factory=dict) class FlagState(BaseModel): """Internal state of the environment.""" model_config = ConfigDict(extra="allow", validate_assignment=True) episode_id: Optional[str] = Field(default=None) step_count: int = Field(default=0, ge=0) task_id: str = Field(default="easy") current_flag_index: int = Field(default=0, ge=0) total_flags: int = Field(default=0, ge=0) cumulative_reward: float = Field(default=0.0) flags_processed: List[str] = Field(default_factory=list) flags_removed: List[str] = Field(default_factory=list) investigation_count: int = Field(default=0, ge=0)