File size: 4,031 Bytes
07fffa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4721d14
 
07fffa0
 
 
 
 
 
 
 
 
4721d14
 
 
07fffa0
 
 
 
 
 
 
 
4721d14
 
 
07fffa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4721d14
 
 
07fffa0
 
 
 
 
 
 
 
 
 
 
 
 
 
4721d14
 
 
07fffa0
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the REPL Environment.

The REPL environment provides a Python REPL for training language models
on code execution tasks, based on the Recursive Language Models (RLM) paradigm.

Supports two finalization patterns:
1. RLM-style: print('FINAL(answer)') or print('FINAL_VAR(var_name)')
2. Prime Intellect style: answer = {"content": "...", "ready": True}
"""

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field

# Support both in-repo and standalone imports
try:
    from openenv.core.env_server.types import Action, Observation, State
except ImportError:
    from openenv.core.env_server.types import Action, Observation, State


class REPLAction(Action):
    """Action containing Python code to execute in the REPL.

    Supports multiple finalization patterns:
    1. RLM-style: print('FINAL(answer)') or print('FINAL_VAR(var_name)') in code
    2. Prime Intellect style: answer = {"content": "...", "ready": True} in namespace
    3. Explicit: Set is_final=True with final_answer
    """

    code: str = Field(default="", description="Python code to execute")
    is_final: bool = Field(
        default=False,
        description="Whether this action signals the final answer",
    )
    final_answer: Optional[str] = Field(
        default=None, description="Final answer if is_final=True"
    )


class CodeBlockResult(BaseModel):
    """Result of executing a single code block."""

    stdout: str = Field(
        default="", description="Standard output from execution"
    )
    stderr: str = Field(default="", description="Standard error from execution")
    locals_snapshot: Dict[str, str] = Field(
        default_factory=dict,
        description="String representations of new/modified variables",
    )
    execution_time: float = Field(
        default=0.0, ge=0, description="Execution time in seconds"
    )
    success: bool = Field(
        default=True, description="Whether execution succeeded"
    )
    exception: Optional[str] = Field(
        default=None, description="Exception message if execution failed"
    )


class REPLObservation(Observation):
    """Observation returned after code execution in the REPL."""

    result: CodeBlockResult = Field(
        default_factory=CodeBlockResult, description="Result of code execution"
    )
    context_preview: Optional[str] = Field(
        default=None,
        description="Preview of the context (first N chars) if context is loaded",
    )
    context_length: int = Field(
        default=0, ge=0, description="Total length of context in characters"
    )
    available_variables: List[str] = Field(
        default_factory=list,
        description="List of variable names available in the namespace",
    )
    iteration: int = Field(
        default=0, ge=0, description="Current iteration number"
    )
    max_iterations: int = Field(
        default=30, ge=1, description="Maximum allowed iterations"
    )


class REPLState(State):
    """Extended state for REPL environment."""

    context: Optional[str] = Field(
        default=None, description="The context/problem to work with"
    )
    task_prompt: Optional[str] = Field(
        default=None, description="The task description to solve"
    )
    iteration: int = Field(
        default=0, ge=0, description="Current iteration number"
    )
    max_iterations: int = Field(
        default=30, ge=1, description="Max iterations before termination"
    )
    namespace_keys: List[str] = Field(
        default_factory=list, description="Variables currently in namespace"
    )
    final_answer: Optional[str] = Field(
        default=None, description="Final answer if episode is complete"
    )
    total_execution_time: float = Field(
        default=0.0, ge=0, description="Total code execution time in seconds"
    )