Spaces:
Running
Running
| """ | |
| Data models for the Skill Invocation Environment. | |
| This environment trains LLMs to decide WHEN to invoke procedural knowledge (skills) | |
| during task-solving. The agent receives a task + skill catalog, must decide which | |
| skills to load for full procedural knowledge, then submits a solution. | |
| Context cost model: each loaded skill costs context budget. The reward penalizes | |
| bloat (unnecessary skills loaded at submit time) and rewards precision. | |
| """ | |
| from typing import Optional | |
| from pydantic import Field | |
| from openenv.core.env_server.types import Action, Observation, State | |
| class SkillDescription(Action): | |
| """A short description of a skill visible in the catalog.""" | |
| id: str = Field(..., description="Unique skill identifier") | |
| name: str = Field(..., description="Human-readable skill name") | |
| description: str = Field( | |
| ..., description="1-2 sentence summary of what the skill covers" | |
| ) | |
| class SkillInvocationAction(Action): | |
| """Agent's action — list, load, unload, or submit.""" | |
| action_type: str = Field( | |
| ..., | |
| description=( | |
| '"load" to load a skill, "unload" to unload a skill, ' | |
| 'or "submit" to submit answer' | |
| ), | |
| ) | |
| skill_id: Optional[str] = Field( | |
| default=None, description='Skill ID (required for load/unload)' | |
| ) | |
| answer: Optional[str] = Field( | |
| default=None, | |
| description='Solution text (required for submit)', | |
| json_schema_extra={"type": "string", "maxLength": 100000} | |
| ) | |
| class SkillInvocationObservation(Observation): | |
| """What the agent sees at each step.""" | |
| task_description: str = Field(default="", description="The task to solve") | |
| skill_catalog: list[dict] = Field( | |
| default_factory=list, | |
| description="Available skills with id, name, and description", | |
| ) | |
| difficulty: str = Field(default="easy", description="Task difficulty level") | |
| # Skill context management | |
| loaded_skills: list[str] = Field( | |
| default_factory=list, description="IDs of currently loaded skills" | |
| ) | |
| loaded_skill_contents: dict = Field( | |
| default_factory=dict, | |
| description="Mapping of skill_id -> full_content for loaded skills", | |
| ) | |
| context_budget_used: int = Field( | |
| default=0, description="Number of skills currently loaded" | |
| ) | |
| context_budget_total: int = Field( | |
| default=5, description="Max skills that can be loaded simultaneously" | |
| ) | |
| # Backward compat | |
| skill_content: Optional[str] = Field( | |
| default=None, description="Full content of last loaded skill" | |
| ) | |
| remaining_invocations: int = Field( | |
| default=5, description="Remaining context budget (backward compat)" | |
| ) | |
| verification_result: Optional[str] = Field( | |
| default=None, description="Result of answer verification" | |
| ) | |
| skills_invoked: list[str] = Field( | |
| default_factory=list, description="IDs of all skills ever loaded this episode" | |
| ) | |
| messages: list[str] = Field( | |
| default_factory=list, description="Running log of actions/observations" | |
| ) | |
| class SkillInvocationState(State): | |
| """Internal episode state tracked server-side.""" | |
| task_id: str = Field(default="", description="Current task ID") | |
| loaded_skills: list[str] = Field( | |
| default_factory=list, description="Currently loaded skills (in context)" | |
| ) | |
| skills_ever_loaded: list[str] = Field( | |
| default_factory=list, description="All skills ever loaded this episode" | |
| ) | |
| difficulty: str = Field(default="easy", description="Task difficulty") | |
| done: bool = Field(default=False, description="Whether episode is finished") | |
| context_budget_total: int = Field(default=5, description="Max simultaneous skills") | |
| # Backward compat | |
| skills_invoked: list[str] = Field( | |
| default_factory=list, description="Alias for skills_ever_loaded" | |
| ) | |
| remaining_invocations: int = Field( | |
| default=5, description="Remaining context budget" | |
| ) | |