# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for the REPL Environment. The REPL environment provides a Python REPL for training language models on code execution tasks, based on the Recursive Language Models (RLM) paradigm. Supports two finalization patterns: 1. RLM-style: print('FINAL(answer)') or print('FINAL_VAR(var_name)') 2. Prime Intellect style: answer = {"content": "...", "ready": True} """ from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field # Support both in-repo and standalone imports try: from openenv.core.env_server.types import Action, Observation, State except ImportError: from openenv.core.env_server.types import Action, Observation, State class REPLAction(Action): """Action containing Python code to execute in the REPL. Supports multiple finalization patterns: 1. RLM-style: print('FINAL(answer)') or print('FINAL_VAR(var_name)') in code 2. Prime Intellect style: answer = {"content": "...", "ready": True} in namespace 3. Explicit: Set is_final=True with final_answer """ code: str = Field(default="", description="Python code to execute") is_final: bool = Field( default=False, description="Whether this action signals the final answer" ) final_answer: Optional[str] = Field( default=None, description="Final answer if is_final=True" ) class CodeBlockResult(BaseModel): """Result of executing a single code block.""" stdout: str = Field(default="", description="Standard output from execution") stderr: str = Field(default="", description="Standard error from execution") locals_snapshot: Dict[str, str] = Field( default_factory=dict, description="String representations of new/modified variables", ) execution_time: float = Field( default=0.0, ge=0, description="Execution time in seconds" ) success: bool = Field(default=True, description="Whether execution succeeded") exception: Optional[str] = Field( default=None, description="Exception message if execution failed" ) class REPLObservation(Observation): """Observation returned after code execution in the REPL.""" result: CodeBlockResult = Field( default_factory=CodeBlockResult, description="Result of code execution" ) context_preview: Optional[str] = Field( default=None, description="Preview of the context (first N chars) if context is loaded", ) context_length: int = Field( default=0, ge=0, description="Total length of context in characters" ) available_variables: List[str] = Field( default_factory=list, description="List of variable names available in the namespace", ) iteration: int = Field(default=0, ge=0, description="Current iteration number") max_iterations: int = Field( default=30, ge=1, description="Maximum allowed iterations" ) class REPLState(State): """Extended state for REPL environment.""" context: Optional[str] = Field( default=None, description="The context/problem to work with" ) task_prompt: Optional[str] = Field( default=None, description="The task description to solve" ) iteration: int = Field(default=0, ge=0, description="Current iteration number") max_iterations: int = Field( default=30, ge=1, description="Max iterations before termination" ) namespace_keys: List[str] = Field( default_factory=list, description="Variables currently in namespace" ) final_answer: Optional[str] = Field( default=None, description="Final answer if episode is complete" ) total_execution_time: float = Field( default=0.0, ge=0, description="Total code execution time in seconds" )