sergiopaniego HF Staff commited on
Commit
4721d14
·
verified ·
1 Parent(s): 07fffa0

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. client.py +7 -2
  2. models.py +14 -5
  3. prompts.py +7 -4
  4. server/app.py +4 -1
  5. server/python_executor.py +43 -20
  6. server/repl_environment.py +38 -16
client.py CHANGED
@@ -189,6 +189,7 @@ class REPLEnv:
189
  hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
190
  When provided, the server uses this token for sub-LLM calls
191
  instead of its own configured token.
 
192
  llm_model: Optional model name for LLM functions (default: Qwen3-Coder-480B).
193
 
194
  Returns:
@@ -264,7 +265,9 @@ class REPLEnv:
264
  Returns:
265
  StepResult with done=True.
266
  """
267
- return self.step(REPLAction(code="", is_final=True, final_answer=answer))
 
 
268
 
269
  def get_variable(self, name: str) -> StepResult[REPLObservation]:
270
  """
@@ -312,7 +315,9 @@ class REPLEnv:
312
  self._remote_client.close()
313
  self._remote_client = None
314
 
315
- def _wrap_observation(self, obs: REPLObservation) -> StepResult[REPLObservation]:
 
 
316
  """Wrap a local REPLObservation in a StepResult."""
317
  return StepResult(
318
  observation=obs,
 
189
  hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
190
  When provided, the server uses this token for sub-LLM calls
191
  instead of its own configured token.
192
+ Security: Token is NOT stored in state or logged.
193
  llm_model: Optional model name for LLM functions (default: Qwen3-Coder-480B).
194
 
195
  Returns:
 
265
  Returns:
266
  StepResult with done=True.
267
  """
268
+ return self.step(
269
+ REPLAction(code="", is_final=True, final_answer=answer)
270
+ )
271
 
272
  def get_variable(self, name: str) -> StepResult[REPLObservation]:
273
  """
 
315
  self._remote_client.close()
316
  self._remote_client = None
317
 
318
+ def _wrap_observation(
319
+ self, obs: REPLObservation
320
+ ) -> StepResult[REPLObservation]:
321
  """Wrap a local REPLObservation in a StepResult."""
322
  return StepResult(
323
  observation=obs,
models.py CHANGED
@@ -37,7 +37,8 @@ class REPLAction(Action):
37
 
38
  code: str = Field(default="", description="Python code to execute")
39
  is_final: bool = Field(
40
- default=False, description="Whether this action signals the final answer"
 
41
  )
42
  final_answer: Optional[str] = Field(
43
  default=None, description="Final answer if is_final=True"
@@ -47,7 +48,9 @@ class REPLAction(Action):
47
  class CodeBlockResult(BaseModel):
48
  """Result of executing a single code block."""
49
 
50
- stdout: str = Field(default="", description="Standard output from execution")
 
 
51
  stderr: str = Field(default="", description="Standard error from execution")
52
  locals_snapshot: Dict[str, str] = Field(
53
  default_factory=dict,
@@ -56,7 +59,9 @@ class CodeBlockResult(BaseModel):
56
  execution_time: float = Field(
57
  default=0.0, ge=0, description="Execution time in seconds"
58
  )
59
- success: bool = Field(default=True, description="Whether execution succeeded")
 
 
60
  exception: Optional[str] = Field(
61
  default=None, description="Exception message if execution failed"
62
  )
@@ -79,7 +84,9 @@ class REPLObservation(Observation):
79
  default_factory=list,
80
  description="List of variable names available in the namespace",
81
  )
82
- iteration: int = Field(default=0, ge=0, description="Current iteration number")
 
 
83
  max_iterations: int = Field(
84
  default=30, ge=1, description="Maximum allowed iterations"
85
  )
@@ -94,7 +101,9 @@ class REPLState(State):
94
  task_prompt: Optional[str] = Field(
95
  default=None, description="The task description to solve"
96
  )
97
- iteration: int = Field(default=0, ge=0, description="Current iteration number")
 
 
98
  max_iterations: int = Field(
99
  default=30, ge=1, description="Max iterations before termination"
100
  )
 
37
 
38
  code: str = Field(default="", description="Python code to execute")
39
  is_final: bool = Field(
40
+ default=False,
41
+ description="Whether this action signals the final answer",
42
  )
43
  final_answer: Optional[str] = Field(
44
  default=None, description="Final answer if is_final=True"
 
48
  class CodeBlockResult(BaseModel):
49
  """Result of executing a single code block."""
50
 
51
+ stdout: str = Field(
52
+ default="", description="Standard output from execution"
53
+ )
54
  stderr: str = Field(default="", description="Standard error from execution")
55
  locals_snapshot: Dict[str, str] = Field(
56
  default_factory=dict,
 
59
  execution_time: float = Field(
60
  default=0.0, ge=0, description="Execution time in seconds"
61
  )
62
+ success: bool = Field(
63
+ default=True, description="Whether execution succeeded"
64
+ )
65
  exception: Optional[str] = Field(
66
  default=None, description="Exception message if execution failed"
67
  )
 
84
  default_factory=list,
85
  description="List of variable names available in the namespace",
86
  )
87
+ iteration: int = Field(
88
+ default=0, ge=0, description="Current iteration number"
89
+ )
90
  max_iterations: int = Field(
91
  default=30, ge=1, description="Maximum allowed iterations"
92
  )
 
101
  task_prompt: Optional[str] = Field(
102
  default=None, description="The task description to solve"
103
  )
104
+ iteration: int = Field(
105
+ default=0, ge=0, description="Current iteration number"
106
+ )
107
  max_iterations: int = Field(
108
  default=30, ge=1, description="Max iterations before termination"
109
  )
prompts.py CHANGED
@@ -278,10 +278,13 @@ def build_user_prompt(
278
  else USER_PROMPT
279
  )
280
  else:
281
- prompt = "The history before is your previous interactions with the REPL environment. " + (
282
- USER_PROMPT_WITH_ROOT.format(root_prompt=root_prompt)
283
- if root_prompt
284
- else USER_PROMPT
 
 
 
285
  )
286
 
287
  # Inform model about multiple contexts if present
 
278
  else USER_PROMPT
279
  )
280
  else:
281
+ prompt = (
282
+ "The history before is your previous interactions with the REPL environment. "
283
+ + (
284
+ USER_PROMPT_WITH_ROOT.format(root_prompt=root_prompt)
285
+ if root_prompt
286
+ else USER_PROMPT
287
+ )
288
  )
289
 
290
  # Inform model about multiple contexts if present
server/app.py CHANGED
@@ -34,6 +34,7 @@ Environment Variables:
34
  HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
35
  LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
36
  """
 
37
  import os
38
 
39
  # Support both in-repo and standalone imports
@@ -60,7 +61,9 @@ if HF_TOKEN:
60
  print(f"[REPL Server] Default model: {LLM_MODEL}")
61
  else:
62
  print("[REPL Server] No server HF_TOKEN configured")
63
- print("[REPL Server] LLM functions will be enabled if client passes hf_token in reset()")
 
 
64
 
65
  # Simple factory - LLM functions are created dynamically in reset() based on token
66
  env_factory = REPLEnvironment
 
34
  HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
35
  LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
36
  """
37
+
38
  import os
39
 
40
  # Support both in-repo and standalone imports
 
61
  print(f"[REPL Server] Default model: {LLM_MODEL}")
62
  else:
63
  print("[REPL Server] No server HF_TOKEN configured")
64
+ print(
65
+ "[REPL Server] LLM functions will be enabled if client passes hf_token in reset()"
66
+ )
67
 
68
  # Simple factory - LLM functions are created dynamically in reset() based on token
69
  env_factory = REPLEnvironment
server/python_executor.py CHANGED
@@ -42,18 +42,19 @@ class PythonExecutor:
42
  def __init__(
43
  self,
44
  max_output_length: int = 8192,
45
- timeout: float = 30.0,
46
  allowed_imports: Optional[List[str]] = None,
47
  ):
48
  """Initialize the executor.
49
 
50
  Args:
51
  max_output_length: Maximum characters for stdout/stderr (default 8192)
52
- timeout: Execution timeout in seconds (passed to LocalPythonExecutor)
53
  allowed_imports: List of allowed module names for import
 
 
 
 
54
  """
55
  self.max_output_length = max_output_length
56
- self.timeout = timeout
57
 
58
  # Default allowed imports for RLM tasks
59
  default_imports = [
@@ -107,7 +108,9 @@ class PythonExecutor:
107
  """Register helper functions with the executor."""
108
  helpers = {
109
  "format_exc": traceback.format_exc,
110
- "safe_json_dumps": lambda obj: json.dumps(obj, default=lambda o: repr(o)),
 
 
111
  }
112
  # Register helpers as callable tools
113
  for name, func in helpers.items():
@@ -120,7 +123,10 @@ class PythonExecutor:
120
  # Type ignore: smolagents accepts callables despite Tool type hint
121
  self._executor.send_tools(self._callable_tools) # type: ignore[arg-type]
122
  except Exception:
123
- logger.debug("send_tools failed; continuing without extra tools", exc_info=True)
 
 
 
124
 
125
  def set_context(self, context: str, variable_name: str = "context") -> None:
126
  """Load context into namespace as a variable.
@@ -139,11 +145,13 @@ class PythonExecutor:
139
  value: Variable value
140
  """
141
  # Access the executor's internal state to set variables
142
- if hasattr(self._executor, 'state'):
143
  self._executor.state[name] = value
144
  else:
145
  # Fallback: store in injected vars for later retrieval
146
- self._executor._injected_vars = getattr(self._executor, '_injected_vars', {})
 
 
147
  self._executor._injected_vars[name] = value
148
 
149
  self._user_variables.add(name)
@@ -158,11 +166,11 @@ class PythonExecutor:
158
  The variable value or None if not found
159
  """
160
  # Try to get from executor's state
161
- if hasattr(self._executor, 'state'):
162
  return self._executor.state.get(name)
163
 
164
  # Fallback to injected vars
165
- if hasattr(self._executor, '_injected_vars'):
166
  return self._executor._injected_vars.get(name)
167
 
168
  return None
@@ -176,9 +184,9 @@ class PythonExecutor:
176
  variables = set()
177
 
178
  # Get from executor's state
179
- if hasattr(self._executor, 'state'):
180
  for key in self._executor.state:
181
- if not key.startswith('_'):
182
  variables.add(key)
183
 
184
  # Include tracked user variables
@@ -203,7 +211,7 @@ class PythonExecutor:
203
 
204
  # Track state before execution
205
  pre_state_keys = set()
206
- if hasattr(self._executor, 'state'):
207
  pre_state_keys = set(self._executor.state.keys())
208
 
209
  stdout_parts: list[str] = []
@@ -249,29 +257,38 @@ class PythonExecutor:
249
  success = False
250
  exception_msg = str(ex)
251
  except Exception:
252
- logger.debug("Failed to read exec_result.exception", exc_info=True)
 
 
253
 
254
  # Determine success from exit_code if available
255
  try:
256
  if hasattr(exec_result, "exit_code"):
257
- if exec_result.exit_code is not None and exec_result.exit_code != 0:
 
 
 
258
  success = False
259
  elif hasattr(exec_result, "success"):
260
  success = bool(exec_result.success)
261
  except Exception:
262
- logger.debug("Failed to determine exec_result exit code", exc_info=True)
 
 
263
 
264
  except Exception as e:
265
  success = False
266
- exception_msg = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
 
 
267
  stderr_parts.append(exception_msg)
268
 
269
  execution_time = time.time() - start_time
270
 
271
  # Capture new/modified variables
272
- if hasattr(self._executor, 'state'):
273
  for key in self._executor.state:
274
- if key not in pre_state_keys and not key.startswith('_'):
275
  try:
276
  val = self._executor.state[key]
277
  val_repr = repr(val)
@@ -288,10 +305,16 @@ class PythonExecutor:
288
 
289
  # Truncate output to max_output_length
290
  if len(stdout) > self.max_output_length:
291
- stdout = stdout[:self.max_output_length] + f"\n... (truncated, total {len(stdout)} chars)"
 
 
 
292
 
293
  if len(stderr) > self.max_output_length:
294
- stderr = stderr[:self.max_output_length] + f"\n... (truncated, total {len(stderr)} chars)"
 
 
 
295
 
296
  return {
297
  "stdout": stdout,
 
42
  def __init__(
43
  self,
44
  max_output_length: int = 8192,
 
45
  allowed_imports: Optional[List[str]] = None,
46
  ):
47
  """Initialize the executor.
48
 
49
  Args:
50
  max_output_length: Maximum characters for stdout/stderr (default 8192)
 
51
  allowed_imports: List of allowed module names for import
52
+
53
+ Note:
54
+ smolagents.LocalPythonExecutor does NOT support wall-clock timeouts.
55
+ Instead, it limits operations (10M ops) and while iterations (1M).
56
  """
57
  self.max_output_length = max_output_length
 
58
 
59
  # Default allowed imports for RLM tasks
60
  default_imports = [
 
108
  """Register helper functions with the executor."""
109
  helpers = {
110
  "format_exc": traceback.format_exc,
111
+ "safe_json_dumps": lambda obj: json.dumps(
112
+ obj, default=lambda o: repr(o)
113
+ ),
114
  }
115
  # Register helpers as callable tools
116
  for name, func in helpers.items():
 
123
  # Type ignore: smolagents accepts callables despite Tool type hint
124
  self._executor.send_tools(self._callable_tools) # type: ignore[arg-type]
125
  except Exception:
126
+ logger.debug(
127
+ "send_tools failed; continuing without extra tools",
128
+ exc_info=True,
129
+ )
130
 
131
  def set_context(self, context: str, variable_name: str = "context") -> None:
132
  """Load context into namespace as a variable.
 
145
  value: Variable value
146
  """
147
  # Access the executor's internal state to set variables
148
+ if hasattr(self._executor, "state"):
149
  self._executor.state[name] = value
150
  else:
151
  # Fallback: store in injected vars for later retrieval
152
+ self._executor._injected_vars = getattr(
153
+ self._executor, "_injected_vars", {}
154
+ )
155
  self._executor._injected_vars[name] = value
156
 
157
  self._user_variables.add(name)
 
166
  The variable value or None if not found
167
  """
168
  # Try to get from executor's state
169
+ if hasattr(self._executor, "state"):
170
  return self._executor.state.get(name)
171
 
172
  # Fallback to injected vars
173
+ if hasattr(self._executor, "_injected_vars"):
174
  return self._executor._injected_vars.get(name)
175
 
176
  return None
 
184
  variables = set()
185
 
186
  # Get from executor's state
187
+ if hasattr(self._executor, "state"):
188
  for key in self._executor.state:
189
+ if not key.startswith("_"):
190
  variables.add(key)
191
 
192
  # Include tracked user variables
 
211
 
212
  # Track state before execution
213
  pre_state_keys = set()
214
+ if hasattr(self._executor, "state"):
215
  pre_state_keys = set(self._executor.state.keys())
216
 
217
  stdout_parts: list[str] = []
 
257
  success = False
258
  exception_msg = str(ex)
259
  except Exception:
260
+ logger.debug(
261
+ "Failed to read exec_result.exception", exc_info=True
262
+ )
263
 
264
  # Determine success from exit_code if available
265
  try:
266
  if hasattr(exec_result, "exit_code"):
267
+ if (
268
+ exec_result.exit_code is not None
269
+ and exec_result.exit_code != 0
270
+ ):
271
  success = False
272
  elif hasattr(exec_result, "success"):
273
  success = bool(exec_result.success)
274
  except Exception:
275
+ logger.debug(
276
+ "Failed to determine exec_result exit code", exc_info=True
277
+ )
278
 
279
  except Exception as e:
280
  success = False
281
+ exception_msg = (
282
+ f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
283
+ )
284
  stderr_parts.append(exception_msg)
285
 
286
  execution_time = time.time() - start_time
287
 
288
  # Capture new/modified variables
289
+ if hasattr(self._executor, "state"):
290
  for key in self._executor.state:
291
+ if key not in pre_state_keys and not key.startswith("_"):
292
  try:
293
  val = self._executor.state[key]
294
  val_repr = repr(val)
 
305
 
306
  # Truncate output to max_output_length
307
  if len(stdout) > self.max_output_length:
308
+ stdout = (
309
+ stdout[: self.max_output_length]
310
+ + f"\n... (truncated, total {len(stdout)} chars)"
311
+ )
312
 
313
  if len(stderr) > self.max_output_length:
314
+ stderr = (
315
+ stderr[: self.max_output_length]
316
+ + f"\n... (truncated, total {len(stderr)} chars)"
317
+ )
318
 
319
  return {
320
  "stdout": stdout,
server/repl_environment.py CHANGED
@@ -102,8 +102,12 @@ class REPLEnvironment(Environment):
102
  llm_batch_fn: Optional function for llm_query_batched() support
103
  """
104
  self.initial_context = context or os.environ.get("REPL_CONTEXT", "")
105
- self.initial_task_prompt = task_prompt or os.environ.get("REPL_TASK_PROMPT", "")
106
- self.max_iterations = int(os.environ.get("REPL_MAX_ITERATIONS", max_iterations))
 
 
 
 
107
  self.max_output_length = max_output_length
108
  self.context_preview_length = context_preview_length
109
 
@@ -130,8 +134,11 @@ class REPLEnvironment(Environment):
130
 
131
  This allows clients to use their own HF token instead of the server's.
132
 
 
 
 
133
  Args:
134
- hf_token: HuggingFace API token
135
  llm_model: Model to use (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
136
  """
137
  from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -204,6 +211,7 @@ class REPLEnvironment(Environment):
204
  task_prompt: Task description (overrides initial_task_prompt)
205
  hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
206
  If provided, creates LLM functions using this token.
 
207
  llm_model: Optional model name for LLM functions (default: from env or Qwen3-Coder)
208
  **kwargs: Additional reset parameters
209
 
@@ -212,7 +220,7 @@ class REPLEnvironment(Environment):
212
  """
213
  effective_context = context or self.initial_context
214
  effective_task_prompt = task_prompt or self.initial_task_prompt
215
-
216
  # Create LLM functions if not already provided at init
217
  # Priority: client hf_token > server HF_TOKEN env var
218
  if not self.llm_query_fn:
@@ -234,7 +242,9 @@ class REPLEnvironment(Environment):
234
  )
235
 
236
  # Initialize executor
237
- self._executor = PythonExecutor(max_output_length=self.max_output_length)
 
 
238
 
239
  # Initialize answer dict (Prime Intellect style)
240
  self._executor.set_variable("answer", {"content": "", "ready": False})
@@ -248,8 +258,12 @@ class REPLEnvironment(Environment):
248
  if self.llm_query_fn:
249
  self._executor.inject_function("llm_query", self.llm_query_fn)
250
  if self.llm_batch_fn:
251
- self._executor.inject_function("llm_query_batched", self.llm_batch_fn) # Official name
252
- self._executor.inject_function("llm_batch", self.llm_batch_fn) # Alias
 
 
 
 
253
 
254
  # Inject FINAL helper function so both FINAL(x) and print(f'FINAL({x})') work
255
  # Returns the FINAL pattern as a string so it appears in stdout for detection
@@ -271,7 +285,9 @@ class REPLEnvironment(Environment):
271
  value = executor.get_variable(var_name_clean)
272
  if value is not None:
273
  return f"FINAL({value})"
274
- return f"FINAL_VAR({var_name_clean})" # Fallback for regex detection
 
 
275
 
276
  self._executor.inject_function("FINAL_VAR", final_var_helper)
277
 
@@ -282,14 +298,12 @@ class REPLEnvironment(Environment):
282
  message_parts = ["REPL environment initialized."]
283
  if effective_context:
284
  message_parts.append(
285
- f"Context loaded ({len(effective_context)} chars). "
286
- "Use 'context' variable to access it."
287
  )
288
  if effective_task_prompt:
289
  message_parts.append(f"Task: {effective_task_prompt}")
290
  message_parts.append(
291
- "Use answer['content'] to store your answer, "
292
- "and set answer['ready'] = True when done."
293
  )
294
 
295
  return REPLObservation(
@@ -335,7 +349,9 @@ class REPLEnvironment(Environment):
335
  REPLObservation with execution results
336
  """
337
  if self._state is None or self._executor is None:
338
- raise RuntimeError("Environment not initialized. Call reset() first.")
 
 
339
 
340
  self._state.step_count += 1
341
  self._state.iteration += 1
@@ -393,7 +409,9 @@ class REPLEnvironment(Environment):
393
  if self._state.context
394
  else None
395
  ),
396
- context_length=len(self._state.context) if self._state.context else 0,
 
 
397
  available_variables=self._state.namespace_keys,
398
  iteration=self._state.iteration,
399
  max_iterations=self.max_iterations,
@@ -472,7 +490,9 @@ class REPLEnvironment(Environment):
472
  done=True,
473
  reward=reward,
474
  metadata={
475
- "final_answer": self._state.final_answer if self._state else None,
 
 
476
  "total_execution_time": (
477
  self._state.total_execution_time if self._state else 0
478
  ),
@@ -491,7 +511,9 @@ class REPLEnvironment(Environment):
491
  RuntimeError: If environment not initialized
492
  """
493
  if self._state is None:
494
- raise RuntimeError("Environment not initialized. Call reset() first.")
 
 
495
  return self._state
496
 
497
  def close(self) -> None:
 
102
  llm_batch_fn: Optional function for llm_query_batched() support
103
  """
104
  self.initial_context = context or os.environ.get("REPL_CONTEXT", "")
105
+ self.initial_task_prompt = task_prompt or os.environ.get(
106
+ "REPL_TASK_PROMPT", ""
107
+ )
108
+ self.max_iterations = int(
109
+ os.environ.get("REPL_MAX_ITERATIONS", max_iterations)
110
+ )
111
  self.max_output_length = max_output_length
112
  self.context_preview_length = context_preview_length
113
 
 
134
 
135
  This allows clients to use their own HF token instead of the server's.
136
 
137
+ Security: The token is used only to initialize the InferenceClient
138
+ and is NOT stored in state, logged, or persisted anywhere.
139
+
140
  Args:
141
+ hf_token: HuggingFace API token (not logged or persisted)
142
  llm_model: Model to use (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
143
  """
144
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
211
  task_prompt: Task description (overrides initial_task_prompt)
212
  hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
213
  If provided, creates LLM functions using this token.
214
+ Security: Token is NOT stored in state or logged.
215
  llm_model: Optional model name for LLM functions (default: from env or Qwen3-Coder)
216
  **kwargs: Additional reset parameters
217
 
 
220
  """
221
  effective_context = context or self.initial_context
222
  effective_task_prompt = task_prompt or self.initial_task_prompt
223
+
224
  # Create LLM functions if not already provided at init
225
  # Priority: client hf_token > server HF_TOKEN env var
226
  if not self.llm_query_fn:
 
242
  )
243
 
244
  # Initialize executor
245
+ self._executor = PythonExecutor(
246
+ max_output_length=self.max_output_length
247
+ )
248
 
249
  # Initialize answer dict (Prime Intellect style)
250
  self._executor.set_variable("answer", {"content": "", "ready": False})
 
258
  if self.llm_query_fn:
259
  self._executor.inject_function("llm_query", self.llm_query_fn)
260
  if self.llm_batch_fn:
261
+ self._executor.inject_function(
262
+ "llm_query_batched", self.llm_batch_fn
263
+ ) # Official name
264
+ self._executor.inject_function(
265
+ "llm_batch", self.llm_batch_fn
266
+ ) # Alias
267
 
268
  # Inject FINAL helper function so both FINAL(x) and print(f'FINAL({x})') work
269
  # Returns the FINAL pattern as a string so it appears in stdout for detection
 
285
  value = executor.get_variable(var_name_clean)
286
  if value is not None:
287
  return f"FINAL({value})"
288
+ return (
289
+ f"FINAL_VAR({var_name_clean})" # Fallback for regex detection
290
+ )
291
 
292
  self._executor.inject_function("FINAL_VAR", final_var_helper)
293
 
 
298
  message_parts = ["REPL environment initialized."]
299
  if effective_context:
300
  message_parts.append(
301
+ f"Context loaded ({len(effective_context)} chars). Use 'context' variable to access it."
 
302
  )
303
  if effective_task_prompt:
304
  message_parts.append(f"Task: {effective_task_prompt}")
305
  message_parts.append(
306
+ "Use answer['content'] to store your answer, and set answer['ready'] = True when done."
 
307
  )
308
 
309
  return REPLObservation(
 
349
  REPLObservation with execution results
350
  """
351
  if self._state is None or self._executor is None:
352
+ raise RuntimeError(
353
+ "Environment not initialized. Call reset() first."
354
+ )
355
 
356
  self._state.step_count += 1
357
  self._state.iteration += 1
 
409
  if self._state.context
410
  else None
411
  ),
412
+ context_length=len(self._state.context)
413
+ if self._state.context
414
+ else 0,
415
  available_variables=self._state.namespace_keys,
416
  iteration=self._state.iteration,
417
  max_iterations=self.max_iterations,
 
490
  done=True,
491
  reward=reward,
492
  metadata={
493
+ "final_answer": self._state.final_answer
494
+ if self._state
495
+ else None,
496
  "total_execution_time": (
497
  self._state.total_execution_time if self._state else 0
498
  ),
 
511
  RuntimeError: If environment not initialized
512
  """
513
  if self._state is None:
514
+ raise RuntimeError(
515
+ "Environment not initialized. Call reset() first."
516
+ )
517
  return self._state
518
 
519
  def close(self) -> None: