Spaces:
Sleeping
Sleeping
Improve environment depth: investigate action, cascading deps, rich observations, harder hard task
5f6895d | """Pydantic models for the Feature Flag Cleanup environment.""" | |
| from typing import Any, Dict, List, Optional | |
| from pydantic import BaseModel, Field, ConfigDict | |
| class FlagAction(BaseModel): | |
| """Action the agent takes on a feature flag.""" | |
| model_config = ConfigDict(extra="forbid", validate_assignment=True) | |
| action: str = Field( | |
| ..., | |
| description="One of: remove, keep, deprecate, escalate, investigate", | |
| pattern="^(remove|keep|deprecate|escalate|investigate)$", | |
| ) | |
| reasoning: str = Field( | |
| default="", | |
| description="Optional reasoning for the action", | |
| ) | |
| metadata: Dict[str, Any] = Field(default_factory=dict) | |
| class FlagObservation(BaseModel): | |
| """Observation presented to the agent for each feature flag.""" | |
| model_config = ConfigDict(extra="forbid", validate_assignment=True) | |
| flag_name: str = Field(..., description="Name of the feature flag") | |
| description: str = Field(..., description="What the flag controls") | |
| rollout_percentage: float = Field(..., ge=0.0, le=1.0, description="Current rollout 0.0-1.0") | |
| age_days: int = Field(..., ge=0, description="Days since flag was created") | |
| last_modified_days: int = Field(..., ge=0, description="Days since last modification") | |
| owner: str = Field(..., description="Team or person who owns the flag") | |
| owner_active: bool = Field(..., description="Whether the owner is still active") | |
| num_code_references: int = Field(..., ge=0, description="Number of code references") | |
| has_dependencies: bool = Field(..., description="Whether other flags depend on this one") | |
| dependent_flags: List[str] = Field(default_factory=list, description="Flags depending on this one") | |
| is_kill_switch: bool = Field(default=False, description="Emergency kill switch flag") | |
| has_active_incident: bool = Field(default=False, description="Active incident involving this flag") | |
| usage_last_30d: int = Field(default=0, ge=0, description="Flag evaluations in last 30 days") | |
| in_active_experiment: bool = Field(default=False, description="Part of a running A/B test") | |
| services: List[str] = Field(default_factory=list, description="Services referencing this flag") | |
| task_id: str = Field(..., description="Current task identifier") | |
| flags_remaining: int = Field(..., ge=0, description="Flags left to process") | |
| # Rich context fields (Weakness #2 fix: richer observations) | |
| code_snippet: str = Field(default="", description="Code snippet showing how the flag is used") | |
| last_commit_message: str = Field(default="", description="Last git commit that modified this flag") | |
| pr_context: str = Field(default="", description="Context from the PR that introduced this flag") | |
| related_incidents: List[str] = Field(default_factory=list, description="Past incident IDs involving this flag") | |
| investigation_notes: str = Field(default="", description="Notes revealed by investigate action") | |
| # Cascading context (Weakness #1 fix: decisions affect future flags) | |
| previously_removed: List[str] = Field(default_factory=list, description="Flags already removed this episode") | |
| cascade_warning: str = Field(default="", description="Warning if a dependency was removed earlier") | |
| done: bool = Field(default=False) | |
| reward: Optional[float] = Field(default=None) | |
| metadata: Dict[str, Any] = Field(default_factory=dict) | |
| class FlagState(BaseModel): | |
| """Internal state of the environment.""" | |
| model_config = ConfigDict(extra="allow", validate_assignment=True) | |
| episode_id: Optional[str] = Field(default=None) | |
| step_count: int = Field(default=0, ge=0) | |
| task_id: str = Field(default="easy") | |
| current_flag_index: int = Field(default=0, ge=0) | |
| total_flags: int = Field(default=0, ge=0) | |
| cumulative_reward: float = Field(default=0.0) | |
| flags_processed: List[str] = Field(default_factory=list) | |
| flags_removed: List[str] = Field(default_factory=list) | |
| investigation_count: int = Field(default=0, ge=0) | |