Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- client.py +7 -2
- models.py +14 -5
- prompts.py +7 -4
- server/app.py +4 -1
- server/python_executor.py +43 -20
- server/repl_environment.py +38 -16
client.py
CHANGED
|
@@ -189,6 +189,7 @@ class REPLEnv:
|
|
| 189 |
hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
|
| 190 |
When provided, the server uses this token for sub-LLM calls
|
| 191 |
instead of its own configured token.
|
|
|
|
| 192 |
llm_model: Optional model name for LLM functions (default: Qwen3-Coder-480B).
|
| 193 |
|
| 194 |
Returns:
|
|
@@ -264,7 +265,9 @@ class REPLEnv:
|
|
| 264 |
Returns:
|
| 265 |
StepResult with done=True.
|
| 266 |
"""
|
| 267 |
-
return self.step(
|
|
|
|
|
|
|
| 268 |
|
| 269 |
def get_variable(self, name: str) -> StepResult[REPLObservation]:
|
| 270 |
"""
|
|
@@ -312,7 +315,9 @@ class REPLEnv:
|
|
| 312 |
self._remote_client.close()
|
| 313 |
self._remote_client = None
|
| 314 |
|
| 315 |
-
def _wrap_observation(
|
|
|
|
|
|
|
| 316 |
"""Wrap a local REPLObservation in a StepResult."""
|
| 317 |
return StepResult(
|
| 318 |
observation=obs,
|
|
|
|
| 189 |
hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
|
| 190 |
When provided, the server uses this token for sub-LLM calls
|
| 191 |
instead of its own configured token.
|
| 192 |
+
Security: Token is NOT stored in state or logged.
|
| 193 |
llm_model: Optional model name for LLM functions (default: Qwen3-Coder-480B).
|
| 194 |
|
| 195 |
Returns:
|
|
|
|
| 265 |
Returns:
|
| 266 |
StepResult with done=True.
|
| 267 |
"""
|
| 268 |
+
return self.step(
|
| 269 |
+
REPLAction(code="", is_final=True, final_answer=answer)
|
| 270 |
+
)
|
| 271 |
|
| 272 |
def get_variable(self, name: str) -> StepResult[REPLObservation]:
|
| 273 |
"""
|
|
|
|
| 315 |
self._remote_client.close()
|
| 316 |
self._remote_client = None
|
| 317 |
|
| 318 |
+
def _wrap_observation(
|
| 319 |
+
self, obs: REPLObservation
|
| 320 |
+
) -> StepResult[REPLObservation]:
|
| 321 |
"""Wrap a local REPLObservation in a StepResult."""
|
| 322 |
return StepResult(
|
| 323 |
observation=obs,
|
models.py
CHANGED
|
@@ -37,7 +37,8 @@ class REPLAction(Action):
|
|
| 37 |
|
| 38 |
code: str = Field(default="", description="Python code to execute")
|
| 39 |
is_final: bool = Field(
|
| 40 |
-
default=False,
|
|
|
|
| 41 |
)
|
| 42 |
final_answer: Optional[str] = Field(
|
| 43 |
default=None, description="Final answer if is_final=True"
|
|
@@ -47,7 +48,9 @@ class REPLAction(Action):
|
|
| 47 |
class CodeBlockResult(BaseModel):
|
| 48 |
"""Result of executing a single code block."""
|
| 49 |
|
| 50 |
-
stdout: str = Field(
|
|
|
|
|
|
|
| 51 |
stderr: str = Field(default="", description="Standard error from execution")
|
| 52 |
locals_snapshot: Dict[str, str] = Field(
|
| 53 |
default_factory=dict,
|
|
@@ -56,7 +59,9 @@ class CodeBlockResult(BaseModel):
|
|
| 56 |
execution_time: float = Field(
|
| 57 |
default=0.0, ge=0, description="Execution time in seconds"
|
| 58 |
)
|
| 59 |
-
success: bool = Field(
|
|
|
|
|
|
|
| 60 |
exception: Optional[str] = Field(
|
| 61 |
default=None, description="Exception message if execution failed"
|
| 62 |
)
|
|
@@ -79,7 +84,9 @@ class REPLObservation(Observation):
|
|
| 79 |
default_factory=list,
|
| 80 |
description="List of variable names available in the namespace",
|
| 81 |
)
|
| 82 |
-
iteration: int = Field(
|
|
|
|
|
|
|
| 83 |
max_iterations: int = Field(
|
| 84 |
default=30, ge=1, description="Maximum allowed iterations"
|
| 85 |
)
|
|
@@ -94,7 +101,9 @@ class REPLState(State):
|
|
| 94 |
task_prompt: Optional[str] = Field(
|
| 95 |
default=None, description="The task description to solve"
|
| 96 |
)
|
| 97 |
-
iteration: int = Field(
|
|
|
|
|
|
|
| 98 |
max_iterations: int = Field(
|
| 99 |
default=30, ge=1, description="Max iterations before termination"
|
| 100 |
)
|
|
|
|
| 37 |
|
| 38 |
code: str = Field(default="", description="Python code to execute")
|
| 39 |
is_final: bool = Field(
|
| 40 |
+
default=False,
|
| 41 |
+
description="Whether this action signals the final answer",
|
| 42 |
)
|
| 43 |
final_answer: Optional[str] = Field(
|
| 44 |
default=None, description="Final answer if is_final=True"
|
|
|
|
| 48 |
class CodeBlockResult(BaseModel):
|
| 49 |
"""Result of executing a single code block."""
|
| 50 |
|
| 51 |
+
stdout: str = Field(
|
| 52 |
+
default="", description="Standard output from execution"
|
| 53 |
+
)
|
| 54 |
stderr: str = Field(default="", description="Standard error from execution")
|
| 55 |
locals_snapshot: Dict[str, str] = Field(
|
| 56 |
default_factory=dict,
|
|
|
|
| 59 |
execution_time: float = Field(
|
| 60 |
default=0.0, ge=0, description="Execution time in seconds"
|
| 61 |
)
|
| 62 |
+
success: bool = Field(
|
| 63 |
+
default=True, description="Whether execution succeeded"
|
| 64 |
+
)
|
| 65 |
exception: Optional[str] = Field(
|
| 66 |
default=None, description="Exception message if execution failed"
|
| 67 |
)
|
|
|
|
| 84 |
default_factory=list,
|
| 85 |
description="List of variable names available in the namespace",
|
| 86 |
)
|
| 87 |
+
iteration: int = Field(
|
| 88 |
+
default=0, ge=0, description="Current iteration number"
|
| 89 |
+
)
|
| 90 |
max_iterations: int = Field(
|
| 91 |
default=30, ge=1, description="Maximum allowed iterations"
|
| 92 |
)
|
|
|
|
| 101 |
task_prompt: Optional[str] = Field(
|
| 102 |
default=None, description="The task description to solve"
|
| 103 |
)
|
| 104 |
+
iteration: int = Field(
|
| 105 |
+
default=0, ge=0, description="Current iteration number"
|
| 106 |
+
)
|
| 107 |
max_iterations: int = Field(
|
| 108 |
default=30, ge=1, description="Max iterations before termination"
|
| 109 |
)
|
prompts.py
CHANGED
|
@@ -278,10 +278,13 @@ def build_user_prompt(
|
|
| 278 |
else USER_PROMPT
|
| 279 |
)
|
| 280 |
else:
|
| 281 |
-
prompt =
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
| 285 |
)
|
| 286 |
|
| 287 |
# Inform model about multiple contexts if present
|
|
|
|
| 278 |
else USER_PROMPT
|
| 279 |
)
|
| 280 |
else:
|
| 281 |
+
prompt = (
|
| 282 |
+
"The history before is your previous interactions with the REPL environment. "
|
| 283 |
+
+ (
|
| 284 |
+
USER_PROMPT_WITH_ROOT.format(root_prompt=root_prompt)
|
| 285 |
+
if root_prompt
|
| 286 |
+
else USER_PROMPT
|
| 287 |
+
)
|
| 288 |
)
|
| 289 |
|
| 290 |
# Inform model about multiple contexts if present
|
server/app.py
CHANGED
|
@@ -34,6 +34,7 @@ Environment Variables:
|
|
| 34 |
HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
|
| 35 |
LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
|
| 36 |
"""
|
|
|
|
| 37 |
import os
|
| 38 |
|
| 39 |
# Support both in-repo and standalone imports
|
|
@@ -60,7 +61,9 @@ if HF_TOKEN:
|
|
| 60 |
print(f"[REPL Server] Default model: {LLM_MODEL}")
|
| 61 |
else:
|
| 62 |
print("[REPL Server] No server HF_TOKEN configured")
|
| 63 |
-
print(
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Simple factory - LLM functions are created dynamically in reset() based on token
|
| 66 |
env_factory = REPLEnvironment
|
|
|
|
| 34 |
HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
|
| 35 |
LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
|
| 36 |
"""
|
| 37 |
+
|
| 38 |
import os
|
| 39 |
|
| 40 |
# Support both in-repo and standalone imports
|
|
|
|
| 61 |
print(f"[REPL Server] Default model: {LLM_MODEL}")
|
| 62 |
else:
|
| 63 |
print("[REPL Server] No server HF_TOKEN configured")
|
| 64 |
+
print(
|
| 65 |
+
"[REPL Server] LLM functions will be enabled if client passes hf_token in reset()"
|
| 66 |
+
)
|
| 67 |
|
| 68 |
# Simple factory - LLM functions are created dynamically in reset() based on token
|
| 69 |
env_factory = REPLEnvironment
|
server/python_executor.py
CHANGED
|
@@ -42,18 +42,19 @@ class PythonExecutor:
|
|
| 42 |
def __init__(
|
| 43 |
self,
|
| 44 |
max_output_length: int = 8192,
|
| 45 |
-
timeout: float = 30.0,
|
| 46 |
allowed_imports: Optional[List[str]] = None,
|
| 47 |
):
|
| 48 |
"""Initialize the executor.
|
| 49 |
|
| 50 |
Args:
|
| 51 |
max_output_length: Maximum characters for stdout/stderr (default 8192)
|
| 52 |
-
timeout: Execution timeout in seconds (passed to LocalPythonExecutor)
|
| 53 |
allowed_imports: List of allowed module names for import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
"""
|
| 55 |
self.max_output_length = max_output_length
|
| 56 |
-
self.timeout = timeout
|
| 57 |
|
| 58 |
# Default allowed imports for RLM tasks
|
| 59 |
default_imports = [
|
|
@@ -107,7 +108,9 @@ class PythonExecutor:
|
|
| 107 |
"""Register helper functions with the executor."""
|
| 108 |
helpers = {
|
| 109 |
"format_exc": traceback.format_exc,
|
| 110 |
-
"safe_json_dumps": lambda obj: json.dumps(
|
|
|
|
|
|
|
| 111 |
}
|
| 112 |
# Register helpers as callable tools
|
| 113 |
for name, func in helpers.items():
|
|
@@ -120,7 +123,10 @@ class PythonExecutor:
|
|
| 120 |
# Type ignore: smolagents accepts callables despite Tool type hint
|
| 121 |
self._executor.send_tools(self._callable_tools) # type: ignore[arg-type]
|
| 122 |
except Exception:
|
| 123 |
-
logger.debug(
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
def set_context(self, context: str, variable_name: str = "context") -> None:
|
| 126 |
"""Load context into namespace as a variable.
|
|
@@ -139,11 +145,13 @@ class PythonExecutor:
|
|
| 139 |
value: Variable value
|
| 140 |
"""
|
| 141 |
# Access the executor's internal state to set variables
|
| 142 |
-
if hasattr(self._executor,
|
| 143 |
self._executor.state[name] = value
|
| 144 |
else:
|
| 145 |
# Fallback: store in injected vars for later retrieval
|
| 146 |
-
self._executor._injected_vars = getattr(
|
|
|
|
|
|
|
| 147 |
self._executor._injected_vars[name] = value
|
| 148 |
|
| 149 |
self._user_variables.add(name)
|
|
@@ -158,11 +166,11 @@ class PythonExecutor:
|
|
| 158 |
The variable value or None if not found
|
| 159 |
"""
|
| 160 |
# Try to get from executor's state
|
| 161 |
-
if hasattr(self._executor,
|
| 162 |
return self._executor.state.get(name)
|
| 163 |
|
| 164 |
# Fallback to injected vars
|
| 165 |
-
if hasattr(self._executor,
|
| 166 |
return self._executor._injected_vars.get(name)
|
| 167 |
|
| 168 |
return None
|
|
@@ -176,9 +184,9 @@ class PythonExecutor:
|
|
| 176 |
variables = set()
|
| 177 |
|
| 178 |
# Get from executor's state
|
| 179 |
-
if hasattr(self._executor,
|
| 180 |
for key in self._executor.state:
|
| 181 |
-
if not key.startswith(
|
| 182 |
variables.add(key)
|
| 183 |
|
| 184 |
# Include tracked user variables
|
|
@@ -203,7 +211,7 @@ class PythonExecutor:
|
|
| 203 |
|
| 204 |
# Track state before execution
|
| 205 |
pre_state_keys = set()
|
| 206 |
-
if hasattr(self._executor,
|
| 207 |
pre_state_keys = set(self._executor.state.keys())
|
| 208 |
|
| 209 |
stdout_parts: list[str] = []
|
|
@@ -249,29 +257,38 @@ class PythonExecutor:
|
|
| 249 |
success = False
|
| 250 |
exception_msg = str(ex)
|
| 251 |
except Exception:
|
| 252 |
-
logger.debug(
|
|
|
|
|
|
|
| 253 |
|
| 254 |
# Determine success from exit_code if available
|
| 255 |
try:
|
| 256 |
if hasattr(exec_result, "exit_code"):
|
| 257 |
-
if
|
|
|
|
|
|
|
|
|
|
| 258 |
success = False
|
| 259 |
elif hasattr(exec_result, "success"):
|
| 260 |
success = bool(exec_result.success)
|
| 261 |
except Exception:
|
| 262 |
-
logger.debug(
|
|
|
|
|
|
|
| 263 |
|
| 264 |
except Exception as e:
|
| 265 |
success = False
|
| 266 |
-
exception_msg =
|
|
|
|
|
|
|
| 267 |
stderr_parts.append(exception_msg)
|
| 268 |
|
| 269 |
execution_time = time.time() - start_time
|
| 270 |
|
| 271 |
# Capture new/modified variables
|
| 272 |
-
if hasattr(self._executor,
|
| 273 |
for key in self._executor.state:
|
| 274 |
-
if key not in pre_state_keys and not key.startswith(
|
| 275 |
try:
|
| 276 |
val = self._executor.state[key]
|
| 277 |
val_repr = repr(val)
|
|
@@ -288,10 +305,16 @@ class PythonExecutor:
|
|
| 288 |
|
| 289 |
# Truncate output to max_output_length
|
| 290 |
if len(stdout) > self.max_output_length:
|
| 291 |
-
stdout =
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
if len(stderr) > self.max_output_length:
|
| 294 |
-
stderr =
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
return {
|
| 297 |
"stdout": stdout,
|
|
|
|
| 42 |
def __init__(
|
| 43 |
self,
|
| 44 |
max_output_length: int = 8192,
|
|
|
|
| 45 |
allowed_imports: Optional[List[str]] = None,
|
| 46 |
):
|
| 47 |
"""Initialize the executor.
|
| 48 |
|
| 49 |
Args:
|
| 50 |
max_output_length: Maximum characters for stdout/stderr (default 8192)
|
|
|
|
| 51 |
allowed_imports: List of allowed module names for import
|
| 52 |
+
|
| 53 |
+
Note:
|
| 54 |
+
smolagents.LocalPythonExecutor does NOT support wall-clock timeouts.
|
| 55 |
+
Instead, it limits operations (10M ops) and while iterations (1M).
|
| 56 |
"""
|
| 57 |
self.max_output_length = max_output_length
|
|
|
|
| 58 |
|
| 59 |
# Default allowed imports for RLM tasks
|
| 60 |
default_imports = [
|
|
|
|
| 108 |
"""Register helper functions with the executor."""
|
| 109 |
helpers = {
|
| 110 |
"format_exc": traceback.format_exc,
|
| 111 |
+
"safe_json_dumps": lambda obj: json.dumps(
|
| 112 |
+
obj, default=lambda o: repr(o)
|
| 113 |
+
),
|
| 114 |
}
|
| 115 |
# Register helpers as callable tools
|
| 116 |
for name, func in helpers.items():
|
|
|
|
| 123 |
# Type ignore: smolagents accepts callables despite Tool type hint
|
| 124 |
self._executor.send_tools(self._callable_tools) # type: ignore[arg-type]
|
| 125 |
except Exception:
|
| 126 |
+
logger.debug(
|
| 127 |
+
"send_tools failed; continuing without extra tools",
|
| 128 |
+
exc_info=True,
|
| 129 |
+
)
|
| 130 |
|
| 131 |
def set_context(self, context: str, variable_name: str = "context") -> None:
|
| 132 |
"""Load context into namespace as a variable.
|
|
|
|
| 145 |
value: Variable value
|
| 146 |
"""
|
| 147 |
# Access the executor's internal state to set variables
|
| 148 |
+
if hasattr(self._executor, "state"):
|
| 149 |
self._executor.state[name] = value
|
| 150 |
else:
|
| 151 |
# Fallback: store in injected vars for later retrieval
|
| 152 |
+
self._executor._injected_vars = getattr(
|
| 153 |
+
self._executor, "_injected_vars", {}
|
| 154 |
+
)
|
| 155 |
self._executor._injected_vars[name] = value
|
| 156 |
|
| 157 |
self._user_variables.add(name)
|
|
|
|
| 166 |
The variable value or None if not found
|
| 167 |
"""
|
| 168 |
# Try to get from executor's state
|
| 169 |
+
if hasattr(self._executor, "state"):
|
| 170 |
return self._executor.state.get(name)
|
| 171 |
|
| 172 |
# Fallback to injected vars
|
| 173 |
+
if hasattr(self._executor, "_injected_vars"):
|
| 174 |
return self._executor._injected_vars.get(name)
|
| 175 |
|
| 176 |
return None
|
|
|
|
| 184 |
variables = set()
|
| 185 |
|
| 186 |
# Get from executor's state
|
| 187 |
+
if hasattr(self._executor, "state"):
|
| 188 |
for key in self._executor.state:
|
| 189 |
+
if not key.startswith("_"):
|
| 190 |
variables.add(key)
|
| 191 |
|
| 192 |
# Include tracked user variables
|
|
|
|
| 211 |
|
| 212 |
# Track state before execution
|
| 213 |
pre_state_keys = set()
|
| 214 |
+
if hasattr(self._executor, "state"):
|
| 215 |
pre_state_keys = set(self._executor.state.keys())
|
| 216 |
|
| 217 |
stdout_parts: list[str] = []
|
|
|
|
| 257 |
success = False
|
| 258 |
exception_msg = str(ex)
|
| 259 |
except Exception:
|
| 260 |
+
logger.debug(
|
| 261 |
+
"Failed to read exec_result.exception", exc_info=True
|
| 262 |
+
)
|
| 263 |
|
| 264 |
# Determine success from exit_code if available
|
| 265 |
try:
|
| 266 |
if hasattr(exec_result, "exit_code"):
|
| 267 |
+
if (
|
| 268 |
+
exec_result.exit_code is not None
|
| 269 |
+
and exec_result.exit_code != 0
|
| 270 |
+
):
|
| 271 |
success = False
|
| 272 |
elif hasattr(exec_result, "success"):
|
| 273 |
success = bool(exec_result.success)
|
| 274 |
except Exception:
|
| 275 |
+
logger.debug(
|
| 276 |
+
"Failed to determine exec_result exit code", exc_info=True
|
| 277 |
+
)
|
| 278 |
|
| 279 |
except Exception as e:
|
| 280 |
success = False
|
| 281 |
+
exception_msg = (
|
| 282 |
+
f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
|
| 283 |
+
)
|
| 284 |
stderr_parts.append(exception_msg)
|
| 285 |
|
| 286 |
execution_time = time.time() - start_time
|
| 287 |
|
| 288 |
# Capture new/modified variables
|
| 289 |
+
if hasattr(self._executor, "state"):
|
| 290 |
for key in self._executor.state:
|
| 291 |
+
if key not in pre_state_keys and not key.startswith("_"):
|
| 292 |
try:
|
| 293 |
val = self._executor.state[key]
|
| 294 |
val_repr = repr(val)
|
|
|
|
| 305 |
|
| 306 |
# Truncate output to max_output_length
|
| 307 |
if len(stdout) > self.max_output_length:
|
| 308 |
+
stdout = (
|
| 309 |
+
stdout[: self.max_output_length]
|
| 310 |
+
+ f"\n... (truncated, total {len(stdout)} chars)"
|
| 311 |
+
)
|
| 312 |
|
| 313 |
if len(stderr) > self.max_output_length:
|
| 314 |
+
stderr = (
|
| 315 |
+
stderr[: self.max_output_length]
|
| 316 |
+
+ f"\n... (truncated, total {len(stderr)} chars)"
|
| 317 |
+
)
|
| 318 |
|
| 319 |
return {
|
| 320 |
"stdout": stdout,
|
server/repl_environment.py
CHANGED
|
@@ -102,8 +102,12 @@ class REPLEnvironment(Environment):
|
|
| 102 |
llm_batch_fn: Optional function for llm_query_batched() support
|
| 103 |
"""
|
| 104 |
self.initial_context = context or os.environ.get("REPL_CONTEXT", "")
|
| 105 |
-
self.initial_task_prompt = task_prompt or os.environ.get(
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
self.max_output_length = max_output_length
|
| 108 |
self.context_preview_length = context_preview_length
|
| 109 |
|
|
@@ -130,8 +134,11 @@ class REPLEnvironment(Environment):
|
|
| 130 |
|
| 131 |
This allows clients to use their own HF token instead of the server's.
|
| 132 |
|
|
|
|
|
|
|
|
|
|
| 133 |
Args:
|
| 134 |
-
hf_token: HuggingFace API token
|
| 135 |
llm_model: Model to use (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
|
| 136 |
"""
|
| 137 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
@@ -204,6 +211,7 @@ class REPLEnvironment(Environment):
|
|
| 204 |
task_prompt: Task description (overrides initial_task_prompt)
|
| 205 |
hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
|
| 206 |
If provided, creates LLM functions using this token.
|
|
|
|
| 207 |
llm_model: Optional model name for LLM functions (default: from env or Qwen3-Coder)
|
| 208 |
**kwargs: Additional reset parameters
|
| 209 |
|
|
@@ -212,7 +220,7 @@ class REPLEnvironment(Environment):
|
|
| 212 |
"""
|
| 213 |
effective_context = context or self.initial_context
|
| 214 |
effective_task_prompt = task_prompt or self.initial_task_prompt
|
| 215 |
-
|
| 216 |
# Create LLM functions if not already provided at init
|
| 217 |
# Priority: client hf_token > server HF_TOKEN env var
|
| 218 |
if not self.llm_query_fn:
|
|
@@ -234,7 +242,9 @@ class REPLEnvironment(Environment):
|
|
| 234 |
)
|
| 235 |
|
| 236 |
# Initialize executor
|
| 237 |
-
self._executor = PythonExecutor(
|
|
|
|
|
|
|
| 238 |
|
| 239 |
# Initialize answer dict (Prime Intellect style)
|
| 240 |
self._executor.set_variable("answer", {"content": "", "ready": False})
|
|
@@ -248,8 +258,12 @@ class REPLEnvironment(Environment):
|
|
| 248 |
if self.llm_query_fn:
|
| 249 |
self._executor.inject_function("llm_query", self.llm_query_fn)
|
| 250 |
if self.llm_batch_fn:
|
| 251 |
-
self._executor.inject_function(
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
# Inject FINAL helper function so both FINAL(x) and print(f'FINAL({x})') work
|
| 255 |
# Returns the FINAL pattern as a string so it appears in stdout for detection
|
|
@@ -271,7 +285,9 @@ class REPLEnvironment(Environment):
|
|
| 271 |
value = executor.get_variable(var_name_clean)
|
| 272 |
if value is not None:
|
| 273 |
return f"FINAL({value})"
|
| 274 |
-
return
|
|
|
|
|
|
|
| 275 |
|
| 276 |
self._executor.inject_function("FINAL_VAR", final_var_helper)
|
| 277 |
|
|
@@ -282,14 +298,12 @@ class REPLEnvironment(Environment):
|
|
| 282 |
message_parts = ["REPL environment initialized."]
|
| 283 |
if effective_context:
|
| 284 |
message_parts.append(
|
| 285 |
-
f"Context loaded ({len(effective_context)} chars). "
|
| 286 |
-
"Use 'context' variable to access it."
|
| 287 |
)
|
| 288 |
if effective_task_prompt:
|
| 289 |
message_parts.append(f"Task: {effective_task_prompt}")
|
| 290 |
message_parts.append(
|
| 291 |
-
"Use answer['content'] to store your answer, "
|
| 292 |
-
"and set answer['ready'] = True when done."
|
| 293 |
)
|
| 294 |
|
| 295 |
return REPLObservation(
|
|
@@ -335,7 +349,9 @@ class REPLEnvironment(Environment):
|
|
| 335 |
REPLObservation with execution results
|
| 336 |
"""
|
| 337 |
if self._state is None or self._executor is None:
|
| 338 |
-
raise RuntimeError(
|
|
|
|
|
|
|
| 339 |
|
| 340 |
self._state.step_count += 1
|
| 341 |
self._state.iteration += 1
|
|
@@ -393,7 +409,9 @@ class REPLEnvironment(Environment):
|
|
| 393 |
if self._state.context
|
| 394 |
else None
|
| 395 |
),
|
| 396 |
-
context_length=len(self._state.context)
|
|
|
|
|
|
|
| 397 |
available_variables=self._state.namespace_keys,
|
| 398 |
iteration=self._state.iteration,
|
| 399 |
max_iterations=self.max_iterations,
|
|
@@ -472,7 +490,9 @@ class REPLEnvironment(Environment):
|
|
| 472 |
done=True,
|
| 473 |
reward=reward,
|
| 474 |
metadata={
|
| 475 |
-
"final_answer": self._state.final_answer
|
|
|
|
|
|
|
| 476 |
"total_execution_time": (
|
| 477 |
self._state.total_execution_time if self._state else 0
|
| 478 |
),
|
|
@@ -491,7 +511,9 @@ class REPLEnvironment(Environment):
|
|
| 491 |
RuntimeError: If environment not initialized
|
| 492 |
"""
|
| 493 |
if self._state is None:
|
| 494 |
-
raise RuntimeError(
|
|
|
|
|
|
|
| 495 |
return self._state
|
| 496 |
|
| 497 |
def close(self) -> None:
|
|
|
|
| 102 |
llm_batch_fn: Optional function for llm_query_batched() support
|
| 103 |
"""
|
| 104 |
self.initial_context = context or os.environ.get("REPL_CONTEXT", "")
|
| 105 |
+
self.initial_task_prompt = task_prompt or os.environ.get(
|
| 106 |
+
"REPL_TASK_PROMPT", ""
|
| 107 |
+
)
|
| 108 |
+
self.max_iterations = int(
|
| 109 |
+
os.environ.get("REPL_MAX_ITERATIONS", max_iterations)
|
| 110 |
+
)
|
| 111 |
self.max_output_length = max_output_length
|
| 112 |
self.context_preview_length = context_preview_length
|
| 113 |
|
|
|
|
| 134 |
|
| 135 |
This allows clients to use their own HF token instead of the server's.
|
| 136 |
|
| 137 |
+
Security: The token is used only to initialize the InferenceClient
|
| 138 |
+
and is NOT stored in state, logged, or persisted anywhere.
|
| 139 |
+
|
| 140 |
Args:
|
| 141 |
+
hf_token: HuggingFace API token (not logged or persisted)
|
| 142 |
llm_model: Model to use (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
|
| 143 |
"""
|
| 144 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
| 211 |
task_prompt: Task description (overrides initial_task_prompt)
|
| 212 |
hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
|
| 213 |
If provided, creates LLM functions using this token.
|
| 214 |
+
Security: Token is NOT stored in state or logged.
|
| 215 |
llm_model: Optional model name for LLM functions (default: from env or Qwen3-Coder)
|
| 216 |
**kwargs: Additional reset parameters
|
| 217 |
|
|
|
|
| 220 |
"""
|
| 221 |
effective_context = context or self.initial_context
|
| 222 |
effective_task_prompt = task_prompt or self.initial_task_prompt
|
| 223 |
+
|
| 224 |
# Create LLM functions if not already provided at init
|
| 225 |
# Priority: client hf_token > server HF_TOKEN env var
|
| 226 |
if not self.llm_query_fn:
|
|
|
|
| 242 |
)
|
| 243 |
|
| 244 |
# Initialize executor
|
| 245 |
+
self._executor = PythonExecutor(
|
| 246 |
+
max_output_length=self.max_output_length
|
| 247 |
+
)
|
| 248 |
|
| 249 |
# Initialize answer dict (Prime Intellect style)
|
| 250 |
self._executor.set_variable("answer", {"content": "", "ready": False})
|
|
|
|
| 258 |
if self.llm_query_fn:
|
| 259 |
self._executor.inject_function("llm_query", self.llm_query_fn)
|
| 260 |
if self.llm_batch_fn:
|
| 261 |
+
self._executor.inject_function(
|
| 262 |
+
"llm_query_batched", self.llm_batch_fn
|
| 263 |
+
) # Official name
|
| 264 |
+
self._executor.inject_function(
|
| 265 |
+
"llm_batch", self.llm_batch_fn
|
| 266 |
+
) # Alias
|
| 267 |
|
| 268 |
# Inject FINAL helper function so both FINAL(x) and print(f'FINAL({x})') work
|
| 269 |
# Returns the FINAL pattern as a string so it appears in stdout for detection
|
|
|
|
| 285 |
value = executor.get_variable(var_name_clean)
|
| 286 |
if value is not None:
|
| 287 |
return f"FINAL({value})"
|
| 288 |
+
return (
|
| 289 |
+
f"FINAL_VAR({var_name_clean})" # Fallback for regex detection
|
| 290 |
+
)
|
| 291 |
|
| 292 |
self._executor.inject_function("FINAL_VAR", final_var_helper)
|
| 293 |
|
|
|
|
| 298 |
message_parts = ["REPL environment initialized."]
|
| 299 |
if effective_context:
|
| 300 |
message_parts.append(
|
| 301 |
+
f"Context loaded ({len(effective_context)} chars). Use 'context' variable to access it."
|
|
|
|
| 302 |
)
|
| 303 |
if effective_task_prompt:
|
| 304 |
message_parts.append(f"Task: {effective_task_prompt}")
|
| 305 |
message_parts.append(
|
| 306 |
+
"Use answer['content'] to store your answer, and set answer['ready'] = True when done."
|
|
|
|
| 307 |
)
|
| 308 |
|
| 309 |
return REPLObservation(
|
|
|
|
| 349 |
REPLObservation with execution results
|
| 350 |
"""
|
| 351 |
if self._state is None or self._executor is None:
|
| 352 |
+
raise RuntimeError(
|
| 353 |
+
"Environment not initialized. Call reset() first."
|
| 354 |
+
)
|
| 355 |
|
| 356 |
self._state.step_count += 1
|
| 357 |
self._state.iteration += 1
|
|
|
|
| 409 |
if self._state.context
|
| 410 |
else None
|
| 411 |
),
|
| 412 |
+
context_length=len(self._state.context)
|
| 413 |
+
if self._state.context
|
| 414 |
+
else 0,
|
| 415 |
available_variables=self._state.namespace_keys,
|
| 416 |
iteration=self._state.iteration,
|
| 417 |
max_iterations=self.max_iterations,
|
|
|
|
| 490 |
done=True,
|
| 491 |
reward=reward,
|
| 492 |
metadata={
|
| 493 |
+
"final_answer": self._state.final_answer
|
| 494 |
+
if self._state
|
| 495 |
+
else None,
|
| 496 |
"total_execution_time": (
|
| 497 |
self._state.total_execution_time if self._state else 0
|
| 498 |
),
|
|
|
|
| 511 |
RuntimeError: If environment not initialized
|
| 512 |
"""
|
| 513 |
if self._state is None:
|
| 514 |
+
raise RuntimeError(
|
| 515 |
+
"Environment not initialized. Call reset() first."
|
| 516 |
+
)
|
| 517 |
return self._state
|
| 518 |
|
| 519 |
def close(self) -> None:
|