Spaces:

frontier-ai
/

gMAS

Running

File size: 7,520 Bytes
"""
Code Interpreter tool — Python code execution.

Allows agents to execute Python code in an isolated environment.
Supports timeouts and output limits.
"""

import io
import traceback
from contextlib import redirect_stderr, redirect_stdout
from typing import Any

from .base import BaseTool, ToolResult


class CodeInterpreterTool(BaseTool):
    """
    Tool for executing Python code.

    Executes Python code and returns the result. Supports:
    - Execution time limit
    - Output size limit
    - Safe sandbox (restricted builtins)

    Example:
        tool = CodeInterpreterTool(timeout=10, max_output_size=4096)
        result = tool.execute(code="print(2 + 2)")

        if result.success:
            print(result.output)  # "4"
        else:
            print(f"Error: {result.error}")

    """

    def __init__(
        self,
        timeout: int = 30,
        max_output_size: int = 8192,
        *,
        safe_mode: bool = True,
    ):
        """
        Create CodeInterpreterTool.

        Args:
            timeout: Maximum execution time in seconds.
            max_output_size: Maximum output size in bytes.
            safe_mode: If True, restricts available builtins for safety.

        """
        self._timeout = timeout
        self._max_output_size = max_output_size
        self._safe_mode = safe_mode

        # Safe builtins for sandbox
        self._safe_builtins = {
            # Types
            "bool": bool,
            "int": int,
            "float": float,
            "str": str,
            "list": list,
            "dict": dict,
            "tuple": tuple,
            "set": set,
            "frozenset": frozenset,
            "bytes": bytes,
            "bytearray": bytearray,
            # Functions
            "abs": abs,
            "all": all,
            "any": any,
            "bin": bin,
            "chr": chr,
            "divmod": divmod,
            "enumerate": enumerate,
            "filter": filter,
            "format": format,
            "hash": hash,
            "hex": hex,
            "len": len,
            "map": map,
            "max": max,
            "min": min,
            "oct": oct,
            "ord": ord,
            "pow": pow,
            "print": print,
            "range": range,
            "repr": repr,
            "reversed": reversed,
            "round": round,
            "slice": slice,
            "sorted": sorted,
            "sum": sum,
            "zip": zip,
            # Exceptions
            "Exception": Exception,
            "ValueError": ValueError,
            "TypeError": TypeError,
            "KeyError": KeyError,
            "IndexError": IndexError,
            "ZeroDivisionError": ZeroDivisionError,
            # Other
            "True": True,
            "False": False,
            "None": None,
            "isinstance": isinstance,
            "issubclass": issubclass,
            "type": type,
            "callable": callable,
            "hasattr": hasattr,
            "getattr": getattr,
            "setattr": setattr,
            "iter": iter,
            "next": next,
            "input": lambda _: "",  # Input is blocked
        }

    @property
    def name(self) -> str:
        return "code_interpreter"

    @property
    def description(self) -> str:
        return (
            "Execute Python code and return the output. "
            "Use for calculations, data processing, and algorithmic tasks. "
            "The code runs in a sandboxed environment with limited access."
        )

    @property
    def parameters_schema(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "code": {
                    "type": "string",
                    "description": "Python code to execute. Can be multi-line.",
                },
            },
            "required": ["code"],
        }

    def _get_safe_globals(self) -> dict[str, Any]:
        """Get safe globals for exec."""
        import collections
        import datetime
        import functools
        import itertools
        import json
        import math
        import random
        import re
        import statistics

        return {
            "__builtins__": self._safe_builtins if self._safe_mode else __builtins__,
            # Safe modules
            "math": math,
            "statistics": statistics,
            "json": json,
            "re": re,
            "datetime": datetime,
            "collections": collections,
            "itertools": itertools,
            "functools": functools,
            "random": random,
        }

    def execute(self, code: str = "", **_kwargs: Any) -> ToolResult:
        """
        Execute Python code.

        Args:
            code: Python code to execute.

        Returns:
            ToolResult with output or error.

        """
        if not code:
            return ToolResult(
                tool_name=self.name,
                success=False,
                error="No code provided",
            )

        # Capture stdout and stderr
        stdout_capture = io.StringIO()
        stderr_capture = io.StringIO()

        try:
            # Prepare the environment
            # Use a single dict for globals and locals to avoid
            # scoping issues (functions defined in exec() must be
            # visible when called)
            exec_globals = self._get_safe_globals()

            # Execute code
            with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
                # Compile to determine type (expression or statement)
                try:
                    # Try as expression (to return a result)
                    compiled = compile(code, "<code>", "eval")
                    result = eval(compiled, exec_globals)
                    if result is not None:
                        pass
                except SyntaxError:
                    # Execute as statements
                    # Use a single dict for globals and locals
                    exec(code, exec_globals)

            # Collect output
            stdout_output = stdout_capture.getvalue()
            stderr_output = stderr_capture.getvalue()

            output = stdout_output
            if stderr_output:
                output += f"\n[stderr]\n{stderr_output}"

            # Limit output size
            if len(output) > self._max_output_size:
                output = output[: self._max_output_size] + "\n... (output truncated)"

            return ToolResult(
                tool_name=self.name,
                success=True,
                output=output.strip() if output else "(no output)",
            )

        except (
            ValueError,
            TypeError,
            SyntaxError,
            NameError,
            AttributeError,
            KeyError,
            IndexError,
            ZeroDivisionError,
            RuntimeError,
            OSError,
        ) as e:
            # Format error
            error_output = stderr_capture.getvalue()
            _ = traceback.format_exc()  # Available for debugging

            # Extract only the useful part of the traceback
            error_msg = f"{type(e).__name__}: {e}"
            if error_output:
                error_msg = f"{error_output}\n{error_msg}"

            return ToolResult(
                tool_name=self.name,
                success=False,
                error=error_msg,
                output=stdout_capture.getvalue(),
            )