Spaces:
Running
Running
File size: 4,031 Bytes
07fffa0 4721d14 07fffa0 4721d14 07fffa0 4721d14 07fffa0 4721d14 07fffa0 4721d14 07fffa0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for the REPL Environment.
The REPL environment provides a Python REPL for training language models
on code execution tasks, based on the Recursive Language Models (RLM) paradigm.
Supports two finalization patterns:
1. RLM-style: print('FINAL(answer)') or print('FINAL_VAR(var_name)')
2. Prime Intellect style: answer = {"content": "...", "ready": True}
"""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
# Support both in-repo and standalone imports
try:
from openenv.core.env_server.types import Action, Observation, State
except ImportError:
from openenv.core.env_server.types import Action, Observation, State
class REPLAction(Action):
"""Action containing Python code to execute in the REPL.
Supports multiple finalization patterns:
1. RLM-style: print('FINAL(answer)') or print('FINAL_VAR(var_name)') in code
2. Prime Intellect style: answer = {"content": "...", "ready": True} in namespace
3. Explicit: Set is_final=True with final_answer
"""
code: str = Field(default="", description="Python code to execute")
is_final: bool = Field(
default=False,
description="Whether this action signals the final answer",
)
final_answer: Optional[str] = Field(
default=None, description="Final answer if is_final=True"
)
class CodeBlockResult(BaseModel):
"""Result of executing a single code block."""
stdout: str = Field(
default="", description="Standard output from execution"
)
stderr: str = Field(default="", description="Standard error from execution")
locals_snapshot: Dict[str, str] = Field(
default_factory=dict,
description="String representations of new/modified variables",
)
execution_time: float = Field(
default=0.0, ge=0, description="Execution time in seconds"
)
success: bool = Field(
default=True, description="Whether execution succeeded"
)
exception: Optional[str] = Field(
default=None, description="Exception message if execution failed"
)
class REPLObservation(Observation):
"""Observation returned after code execution in the REPL."""
result: CodeBlockResult = Field(
default_factory=CodeBlockResult, description="Result of code execution"
)
context_preview: Optional[str] = Field(
default=None,
description="Preview of the context (first N chars) if context is loaded",
)
context_length: int = Field(
default=0, ge=0, description="Total length of context in characters"
)
available_variables: List[str] = Field(
default_factory=list,
description="List of variable names available in the namespace",
)
iteration: int = Field(
default=0, ge=0, description="Current iteration number"
)
max_iterations: int = Field(
default=30, ge=1, description="Maximum allowed iterations"
)
class REPLState(State):
"""Extended state for REPL environment."""
context: Optional[str] = Field(
default=None, description="The context/problem to work with"
)
task_prompt: Optional[str] = Field(
default=None, description="The task description to solve"
)
iteration: int = Field(
default=0, ge=0, description="Current iteration number"
)
max_iterations: int = Field(
default=30, ge=1, description="Max iterations before termination"
)
namespace_keys: List[str] = Field(
default_factory=list, description="Variables currently in namespace"
)
final_answer: Optional[str] = Field(
default=None, description="Final answer if episode is complete"
)
total_execution_time: float = Field(
default=0.0, ge=0, description="Total code execution time in seconds"
)
|