clarify-rl / models.py
Anurag Agarwal
ClarifyRL: initial HF Space deploy
2414d31
"""
Data models for the ClarifyRL Environment.
This environment uses MCP (Model Context Protocol) for tool-based interactions.
The agent discovers tools via ListToolsAction and invokes them via CallToolAction.
"""
from typing import Optional
from pydantic import Field
from openenv.core.env_server.mcp_types import (
CallToolAction,
CallToolObservation,
ListToolsAction,
ListToolsObservation,
)
from openenv.core.env_server.types import Action, Observation, State
ClarifyAction = CallToolAction
ClarifyObservation = CallToolObservation
class ClarifyState(State):
"""Extended state for ClarifyRL episodes."""
task_id: str = Field(default="", description="Current task identifier (easy/medium/hard)")
task_title: str = Field(default="", description="Human-readable title for the current task")
questions_asked: list[str] = Field(
default_factory=list,
description="Clarifying questions the agent has asked so far",
)
questions_remaining: int = Field(
default=6,
description="Number of questions the agent can still ask",
)
answers_received: list[str] = Field(
default_factory=list,
description="Answers received from the simulated user",
)
fields_revealed: list[str] = Field(
default_factory=list,
description="Profile fields that have been revealed through questions",
)
plan_submitted: bool = Field(default=False, description="Whether the agent has submitted a plan")
episode_done: bool = Field(default=False, description="Whether the episode has ended")
final_score: Optional[float] = Field(
default=None,
description="Final rubric score once the episode completes",
)
score_breakdown: Optional[dict] = Field(
default=None,
description="Per-component rubric breakdown",
)
__all__ = [
"ClarifyAction",
"ClarifyObservation",
"ClarifyState",
"CallToolAction",
"CallToolObservation",
"ListToolsAction",
"ListToolsObservation",
"Action",
"Observation",
"State",
]