Spaces:

sergiopaniego
/

repl

Running

App Files Files Community

sergiopaniego HF Staff commited on 5 days ago

Commit

4721d14

verified ·

1 Parent(s): 07fffa0

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

client.py +7 -2
models.py +14 -5
prompts.py +7 -4
server/app.py +4 -1
server/python_executor.py +43 -20
server/repl_environment.py +38 -16

client.py CHANGED Viewed

@@ -189,6 +189,7 @@ class REPLEnv:
             hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
                       When provided, the server uses this token for sub-LLM calls
                       instead of its own configured token.
             llm_model: Optional model name for LLM functions (default: Qwen3-Coder-480B).
         Returns:
@@ -264,7 +265,9 @@ class REPLEnv:
         Returns:
             StepResult with done=True.
         """
-        return self.step(REPLAction(code="", is_final=True, final_answer=answer))
     def get_variable(self, name: str) -> StepResult[REPLObservation]:
         """
@@ -312,7 +315,9 @@ class REPLEnv:
             self._remote_client.close()
             self._remote_client = None
-    def _wrap_observation(self, obs: REPLObservation) -> StepResult[REPLObservation]:
         """Wrap a local REPLObservation in a StepResult."""
         return StepResult(
             observation=obs,

             hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
                       When provided, the server uses this token for sub-LLM calls
                       instead of its own configured token.
+                      Security: Token is NOT stored in state or logged.
             llm_model: Optional model name for LLM functions (default: Qwen3-Coder-480B).
         Returns:
         Returns:
             StepResult with done=True.
         """
+        return self.step(
+            REPLAction(code="", is_final=True, final_answer=answer)
+        )
     def get_variable(self, name: str) -> StepResult[REPLObservation]:
         """
             self._remote_client.close()
             self._remote_client = None
+    def _wrap_observation(
+        self, obs: REPLObservation
+    ) -> StepResult[REPLObservation]:
         """Wrap a local REPLObservation in a StepResult."""
         return StepResult(
             observation=obs,

models.py CHANGED Viewed

@@ -37,7 +37,8 @@ class REPLAction(Action):
     code: str = Field(default="", description="Python code to execute")
     is_final: bool = Field(
-        default=False, description="Whether this action signals the final answer"
     )
     final_answer: Optional[str] = Field(
         default=None, description="Final answer if is_final=True"
@@ -47,7 +48,9 @@ class REPLAction(Action):
 class CodeBlockResult(BaseModel):
     """Result of executing a single code block."""
-    stdout: str = Field(default="", description="Standard output from execution")
     stderr: str = Field(default="", description="Standard error from execution")
     locals_snapshot: Dict[str, str] = Field(
         default_factory=dict,
@@ -56,7 +59,9 @@ class CodeBlockResult(BaseModel):
     execution_time: float = Field(
         default=0.0, ge=0, description="Execution time in seconds"
     )
-    success: bool = Field(default=True, description="Whether execution succeeded")
     exception: Optional[str] = Field(
         default=None, description="Exception message if execution failed"
     )
@@ -79,7 +84,9 @@ class REPLObservation(Observation):
         default_factory=list,
         description="List of variable names available in the namespace",
     )
-    iteration: int = Field(default=0, ge=0, description="Current iteration number")
     max_iterations: int = Field(
         default=30, ge=1, description="Maximum allowed iterations"
     )
@@ -94,7 +101,9 @@ class REPLState(State):
     task_prompt: Optional[str] = Field(
         default=None, description="The task description to solve"
     )
-    iteration: int = Field(default=0, ge=0, description="Current iteration number")
     max_iterations: int = Field(
         default=30, ge=1, description="Max iterations before termination"
     )

     code: str = Field(default="", description="Python code to execute")
     is_final: bool = Field(
+        default=False,
+        description="Whether this action signals the final answer",
     )
     final_answer: Optional[str] = Field(
         default=None, description="Final answer if is_final=True"
 class CodeBlockResult(BaseModel):
     """Result of executing a single code block."""
+    stdout: str = Field(
+        default="", description="Standard output from execution"
+    )
     stderr: str = Field(default="", description="Standard error from execution")
     locals_snapshot: Dict[str, str] = Field(
         default_factory=dict,
     execution_time: float = Field(
         default=0.0, ge=0, description="Execution time in seconds"
     )
+    success: bool = Field(
+        default=True, description="Whether execution succeeded"
+    )
     exception: Optional[str] = Field(
         default=None, description="Exception message if execution failed"
     )
         default_factory=list,
         description="List of variable names available in the namespace",
     )
+    iteration: int = Field(
+        default=0, ge=0, description="Current iteration number"
+    )
     max_iterations: int = Field(
         default=30, ge=1, description="Maximum allowed iterations"
     )
     task_prompt: Optional[str] = Field(
         default=None, description="The task description to solve"
     )
+    iteration: int = Field(
+        default=0, ge=0, description="Current iteration number"
+    )
     max_iterations: int = Field(
         default=30, ge=1, description="Max iterations before termination"
     )

prompts.py CHANGED Viewed

@@ -278,10 +278,13 @@ def build_user_prompt(
             else USER_PROMPT
         )
     else:
-        prompt = "The history before is your previous interactions with the REPL environment. " + (
-            USER_PROMPT_WITH_ROOT.format(root_prompt=root_prompt)
-            if root_prompt
-            else USER_PROMPT
         )
     # Inform model about multiple contexts if present

             else USER_PROMPT
         )
     else:
+        prompt = (
+            "The history before is your previous interactions with the REPL environment. "
+            + (
+                USER_PROMPT_WITH_ROOT.format(root_prompt=root_prompt)
+                if root_prompt
+                else USER_PROMPT
+            )
         )
     # Inform model about multiple contexts if present

server/app.py CHANGED Viewed

@@ -34,6 +34,7 @@ Environment Variables:
     HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
     LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
 """
 import os
 # Support both in-repo and standalone imports
@@ -60,7 +61,9 @@ if HF_TOKEN:
     print(f"[REPL Server] Default model: {LLM_MODEL}")
 else:
     print("[REPL Server] No server HF_TOKEN configured")
-    print("[REPL Server] LLM functions will be enabled if client passes hf_token in reset()")
 # Simple factory - LLM functions are created dynamically in reset() based on token
 env_factory = REPLEnvironment

     HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
     LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
 """
 import os
 # Support both in-repo and standalone imports
     print(f"[REPL Server] Default model: {LLM_MODEL}")
 else:
     print("[REPL Server] No server HF_TOKEN configured")
+    print(
+        "[REPL Server] LLM functions will be enabled if client passes hf_token in reset()"
+    )
 # Simple factory - LLM functions are created dynamically in reset() based on token
 env_factory = REPLEnvironment

server/python_executor.py CHANGED Viewed

@@ -42,18 +42,19 @@ class PythonExecutor:
     def __init__(
         self,
         max_output_length: int = 8192,
-        timeout: float = 30.0,
         allowed_imports: Optional[List[str]] = None,
     ):
         """Initialize the executor.
         Args:
             max_output_length: Maximum characters for stdout/stderr (default 8192)
-            timeout: Execution timeout in seconds (passed to LocalPythonExecutor)
             allowed_imports: List of allowed module names for import
         """
         self.max_output_length = max_output_length
-        self.timeout = timeout
         # Default allowed imports for RLM tasks
         default_imports = [
@@ -107,7 +108,9 @@ class PythonExecutor:
         """Register helper functions with the executor."""
         helpers = {
             "format_exc": traceback.format_exc,
-            "safe_json_dumps": lambda obj: json.dumps(obj, default=lambda o: repr(o)),
         }
         # Register helpers as callable tools
         for name, func in helpers.items():
@@ -120,7 +123,10 @@ class PythonExecutor:
                 # Type ignore: smolagents accepts callables despite Tool type hint
                 self._executor.send_tools(self._callable_tools)  # type: ignore[arg-type]
             except Exception:
-                logger.debug("send_tools failed; continuing without extra tools", exc_info=True)
     def set_context(self, context: str, variable_name: str = "context") -> None:
         """Load context into namespace as a variable.
@@ -139,11 +145,13 @@ class PythonExecutor:
             value: Variable value
         """
         # Access the executor's internal state to set variables
-        if hasattr(self._executor, 'state'):
             self._executor.state[name] = value
         else:
             # Fallback: store in injected vars for later retrieval
-            self._executor._injected_vars = getattr(self._executor, '_injected_vars', {})
             self._executor._injected_vars[name] = value
         self._user_variables.add(name)
@@ -158,11 +166,11 @@ class PythonExecutor:
             The variable value or None if not found
         """
         # Try to get from executor's state
-        if hasattr(self._executor, 'state'):
             return self._executor.state.get(name)
         # Fallback to injected vars
-        if hasattr(self._executor, '_injected_vars'):
             return self._executor._injected_vars.get(name)
         return None
@@ -176,9 +184,9 @@ class PythonExecutor:
         variables = set()
         # Get from executor's state
-        if hasattr(self._executor, 'state'):
             for key in self._executor.state:
-                if not key.startswith('_'):
                     variables.add(key)
         # Include tracked user variables
@@ -203,7 +211,7 @@ class PythonExecutor:
         # Track state before execution
         pre_state_keys = set()
-        if hasattr(self._executor, 'state'):
             pre_state_keys = set(self._executor.state.keys())
         stdout_parts: list[str] = []
@@ -249,29 +257,38 @@ class PythonExecutor:
                     success = False
                     exception_msg = str(ex)
             except Exception:
-                logger.debug("Failed to read exec_result.exception", exc_info=True)
             # Determine success from exit_code if available
             try:
                 if hasattr(exec_result, "exit_code"):
-                    if exec_result.exit_code is not None and exec_result.exit_code != 0:
                         success = False
                 elif hasattr(exec_result, "success"):
                     success = bool(exec_result.success)
             except Exception:
-                logger.debug("Failed to determine exec_result exit code", exc_info=True)
         except Exception as e:
             success = False
-            exception_msg = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
             stderr_parts.append(exception_msg)
         execution_time = time.time() - start_time
         # Capture new/modified variables
-        if hasattr(self._executor, 'state'):
             for key in self._executor.state:
-                if key not in pre_state_keys and not key.startswith('_'):
                     try:
                         val = self._executor.state[key]
                         val_repr = repr(val)
@@ -288,10 +305,16 @@ class PythonExecutor:
         # Truncate output to max_output_length
         if len(stdout) > self.max_output_length:
-            stdout = stdout[:self.max_output_length] + f"\n... (truncated, total {len(stdout)} chars)"
         if len(stderr) > self.max_output_length:
-            stderr = stderr[:self.max_output_length] + f"\n... (truncated, total {len(stderr)} chars)"
         return {
             "stdout": stdout,

     def __init__(
         self,
         max_output_length: int = 8192,
         allowed_imports: Optional[List[str]] = None,
     ):
         """Initialize the executor.
         Args:
             max_output_length: Maximum characters for stdout/stderr (default 8192)
             allowed_imports: List of allowed module names for import
+        Note:
+            smolagents.LocalPythonExecutor does NOT support wall-clock timeouts.
+            Instead, it limits operations (10M ops) and while iterations (1M).
         """
         self.max_output_length = max_output_length
         # Default allowed imports for RLM tasks
         default_imports = [
         """Register helper functions with the executor."""
         helpers = {
             "format_exc": traceback.format_exc,
+            "safe_json_dumps": lambda obj: json.dumps(
+                obj, default=lambda o: repr(o)
+            ),
         }
         # Register helpers as callable tools
         for name, func in helpers.items():
                 # Type ignore: smolagents accepts callables despite Tool type hint
                 self._executor.send_tools(self._callable_tools)  # type: ignore[arg-type]
             except Exception:
+                logger.debug(
+                    "send_tools failed; continuing without extra tools",
+                    exc_info=True,
+                )
     def set_context(self, context: str, variable_name: str = "context") -> None:
         """Load context into namespace as a variable.
             value: Variable value
         """
         # Access the executor's internal state to set variables
+        if hasattr(self._executor, "state"):
             self._executor.state[name] = value
         else:
             # Fallback: store in injected vars for later retrieval
+            self._executor._injected_vars = getattr(
+                self._executor, "_injected_vars", {}
+            )
             self._executor._injected_vars[name] = value
         self._user_variables.add(name)
             The variable value or None if not found
         """
         # Try to get from executor's state
+        if hasattr(self._executor, "state"):
             return self._executor.state.get(name)
         # Fallback to injected vars
+        if hasattr(self._executor, "_injected_vars"):
             return self._executor._injected_vars.get(name)
         return None
         variables = set()
         # Get from executor's state
+        if hasattr(self._executor, "state"):
             for key in self._executor.state:
+                if not key.startswith("_"):
                     variables.add(key)
         # Include tracked user variables
         # Track state before execution
         pre_state_keys = set()
+        if hasattr(self._executor, "state"):
             pre_state_keys = set(self._executor.state.keys())
         stdout_parts: list[str] = []
                     success = False
                     exception_msg = str(ex)
             except Exception:
+                logger.debug(
+                    "Failed to read exec_result.exception", exc_info=True
+                )
             # Determine success from exit_code if available
             try:
                 if hasattr(exec_result, "exit_code"):
+                    if (
+                        exec_result.exit_code is not None
+                        and exec_result.exit_code != 0
+                    ):
                         success = False
                 elif hasattr(exec_result, "success"):
                     success = bool(exec_result.success)
             except Exception:
+                logger.debug(
+                    "Failed to determine exec_result exit code", exc_info=True
+                )
         except Exception as e:
             success = False
+            exception_msg = (
+                f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
+            )
             stderr_parts.append(exception_msg)
         execution_time = time.time() - start_time
         # Capture new/modified variables
+        if hasattr(self._executor, "state"):
             for key in self._executor.state:
+                if key not in pre_state_keys and not key.startswith("_"):
                     try:
                         val = self._executor.state[key]
                         val_repr = repr(val)
         # Truncate output to max_output_length
         if len(stdout) > self.max_output_length:
+            stdout = (
+                stdout[: self.max_output_length]
+                + f"\n... (truncated, total {len(stdout)} chars)"
+            )
         if len(stderr) > self.max_output_length:
+            stderr = (
+                stderr[: self.max_output_length]
+                + f"\n... (truncated, total {len(stderr)} chars)"
+            )
         return {
             "stdout": stdout,

server/repl_environment.py CHANGED Viewed

@@ -102,8 +102,12 @@ class REPLEnvironment(Environment):
             llm_batch_fn: Optional function for llm_query_batched() support
         """
         self.initial_context = context or os.environ.get("REPL_CONTEXT", "")
-        self.initial_task_prompt = task_prompt or os.environ.get("REPL_TASK_PROMPT", "")
-        self.max_iterations = int(os.environ.get("REPL_MAX_ITERATIONS", max_iterations))
         self.max_output_length = max_output_length
         self.context_preview_length = context_preview_length
@@ -130,8 +134,11 @@ class REPLEnvironment(Environment):
         This allows clients to use their own HF token instead of the server's.
         Args:
-            hf_token: HuggingFace API token
             llm_model: Model to use (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
         """
         from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -204,6 +211,7 @@ class REPLEnvironment(Environment):
             task_prompt: Task description (overrides initial_task_prompt)
             hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
                       If provided, creates LLM functions using this token.
             llm_model: Optional model name for LLM functions (default: from env or Qwen3-Coder)
             **kwargs: Additional reset parameters
@@ -212,7 +220,7 @@ class REPLEnvironment(Environment):
         """
         effective_context = context or self.initial_context
         effective_task_prompt = task_prompt or self.initial_task_prompt
         # Create LLM functions if not already provided at init
         # Priority: client hf_token > server HF_TOKEN env var
         if not self.llm_query_fn:
@@ -234,7 +242,9 @@ class REPLEnvironment(Environment):
         )
         # Initialize executor
-        self._executor = PythonExecutor(max_output_length=self.max_output_length)
         # Initialize answer dict (Prime Intellect style)
         self._executor.set_variable("answer", {"content": "", "ready": False})
@@ -248,8 +258,12 @@ class REPLEnvironment(Environment):
         if self.llm_query_fn:
             self._executor.inject_function("llm_query", self.llm_query_fn)
         if self.llm_batch_fn:
-            self._executor.inject_function("llm_query_batched", self.llm_batch_fn)  # Official name
-            self._executor.inject_function("llm_batch", self.llm_batch_fn)  # Alias
         # Inject FINAL helper function so both FINAL(x) and print(f'FINAL({x})') work
         # Returns the FINAL pattern as a string so it appears in stdout for detection
@@ -271,7 +285,9 @@ class REPLEnvironment(Environment):
             value = executor.get_variable(var_name_clean)
             if value is not None:
                 return f"FINAL({value})"
-            return f"FINAL_VAR({var_name_clean})"  # Fallback for regex detection
         self._executor.inject_function("FINAL_VAR", final_var_helper)
@@ -282,14 +298,12 @@ class REPLEnvironment(Environment):
         message_parts = ["REPL environment initialized."]
         if effective_context:
             message_parts.append(
-                f"Context loaded ({len(effective_context)} chars). "
-                "Use 'context' variable to access it."
             )
         if effective_task_prompt:
             message_parts.append(f"Task: {effective_task_prompt}")
         message_parts.append(
-            "Use answer['content'] to store your answer, "
-            "and set answer['ready'] = True when done."
         )
         return REPLObservation(
@@ -335,7 +349,9 @@ class REPLEnvironment(Environment):
             REPLObservation with execution results
         """
         if self._state is None or self._executor is None:
-            raise RuntimeError("Environment not initialized. Call reset() first.")
         self._state.step_count += 1
         self._state.iteration += 1
@@ -393,7 +409,9 @@ class REPLEnvironment(Environment):
                 if self._state.context
                 else None
             ),
-            context_length=len(self._state.context) if self._state.context else 0,
             available_variables=self._state.namespace_keys,
             iteration=self._state.iteration,
             max_iterations=self.max_iterations,
@@ -472,7 +490,9 @@ class REPLEnvironment(Environment):
             done=True,
             reward=reward,
             metadata={
-                "final_answer": self._state.final_answer if self._state else None,
                 "total_execution_time": (
                     self._state.total_execution_time if self._state else 0
                 ),
@@ -491,7 +511,9 @@ class REPLEnvironment(Environment):
             RuntimeError: If environment not initialized
         """
         if self._state is None:
-            raise RuntimeError("Environment not initialized. Call reset() first.")
         return self._state
     def close(self) -> None:

             llm_batch_fn: Optional function for llm_query_batched() support
         """
         self.initial_context = context or os.environ.get("REPL_CONTEXT", "")
+        self.initial_task_prompt = task_prompt or os.environ.get(
+            "REPL_TASK_PROMPT", ""
+        )
+        self.max_iterations = int(
+            os.environ.get("REPL_MAX_ITERATIONS", max_iterations)
+        )
         self.max_output_length = max_output_length
         self.context_preview_length = context_preview_length
         This allows clients to use their own HF token instead of the server's.
+        Security: The token is used only to initialize the InferenceClient
+        and is NOT stored in state, logged, or persisted anywhere.
         Args:
+            hf_token: HuggingFace API token (not logged or persisted)
             llm_model: Model to use (default: Qwen/Qwen3-Coder-480B-A35B-Instruct)
         """
         from concurrent.futures import ThreadPoolExecutor, as_completed
             task_prompt: Task description (overrides initial_task_prompt)
             hf_token: Optional HuggingFace token for llm_query/llm_query_batched.
                       If provided, creates LLM functions using this token.
+                      Security: Token is NOT stored in state or logged.
             llm_model: Optional model name for LLM functions (default: from env or Qwen3-Coder)
             **kwargs: Additional reset parameters
         """
         effective_context = context or self.initial_context
         effective_task_prompt = task_prompt or self.initial_task_prompt
         # Create LLM functions if not already provided at init
         # Priority: client hf_token > server HF_TOKEN env var
         if not self.llm_query_fn:
         )
         # Initialize executor
+        self._executor = PythonExecutor(
+            max_output_length=self.max_output_length
+        )
         # Initialize answer dict (Prime Intellect style)
         self._executor.set_variable("answer", {"content": "", "ready": False})
         if self.llm_query_fn:
             self._executor.inject_function("llm_query", self.llm_query_fn)
         if self.llm_batch_fn:
+            self._executor.inject_function(
+                "llm_query_batched", self.llm_batch_fn
+            )  # Official name
+            self._executor.inject_function(
+                "llm_batch", self.llm_batch_fn
+            )  # Alias
         # Inject FINAL helper function so both FINAL(x) and print(f'FINAL({x})') work
         # Returns the FINAL pattern as a string so it appears in stdout for detection
             value = executor.get_variable(var_name_clean)
             if value is not None:
                 return f"FINAL({value})"
+            return (
+                f"FINAL_VAR({var_name_clean})"  # Fallback for regex detection
+            )
         self._executor.inject_function("FINAL_VAR", final_var_helper)
         message_parts = ["REPL environment initialized."]
         if effective_context:
             message_parts.append(
+                f"Context loaded ({len(effective_context)} chars). Use 'context' variable to access it."
             )
         if effective_task_prompt:
             message_parts.append(f"Task: {effective_task_prompt}")
         message_parts.append(
+            "Use answer['content'] to store your answer, and set answer['ready'] = True when done."
         )
         return REPLObservation(
             REPLObservation with execution results
         """
         if self._state is None or self._executor is None:
+            raise RuntimeError(
+                "Environment not initialized. Call reset() first."
+            )
         self._state.step_count += 1
         self._state.iteration += 1
                 if self._state.context
                 else None
             ),
+            context_length=len(self._state.context)
+            if self._state.context
+            else 0,
             available_variables=self._state.namespace_keys,
             iteration=self._state.iteration,
             max_iterations=self.max_iterations,
             done=True,
             reward=reward,
             metadata={
+                "final_answer": self._state.final_answer
+                if self._state
+                else None,
                 "total_execution_time": (
                     self._state.total_execution_time if self._state else 0
                 ),
             RuntimeError: If environment not initialized
         """
         if self._state is None:
+            raise RuntimeError(
+                "Environment not initialized. Call reset() first."
+            )
         return self._state
     def close(self) -> None: