HF_Agents_Final_Project

Runtime error

App Files Files Community

Yago Bolivar commited on May 12, 2025

Commit

0d2816b

1 Parent(s): b7e30dd

feat: implement CodeExecutionTool for safe code execution and output extraction

Browse files

test: add unit tests for CodeExecutionTool's safety analysis and functionality

Files changed (3) hide show

src/python_tool.py +216 -0
tests/__init__.py +0 -0
tests/test_python_tool.py +44 -0

src/python_tool.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import ast
+import contextlib
+import io
+import signal
+import re
+import traceback
+from typing import Dict, Any, Optional, Union, List
+class CodeExecutionTool:
+    """Tool to safely execute Python code files and extract numeric outputs."""
+    def __init__(self, timeout: int = 5, max_output_size: int = 10000):
+        self.timeout = timeout  # Maximum execution time in seconds
+        self.max_output_size = max_output_size
+        # Restricted imports - add more as needed
+        self.banned_modules = [
+            'os', 'subprocess', 'sys', 'builtins', 'importlib', 'eval',
+            'pickle', 'requests', 'socket', 'shutil'
+        ]
+    def _analyze_code_safety(self, code: str) -> Dict[str, Any]:
+        """Perform static analysis to check for potentially harmful code."""
+        try:
+            parsed = ast.parse(code)
+            # Check for banned imports
+            imports = []
+            for node in ast.walk(parsed):
+                if isinstance(node, ast.Import):
+                    imports.extend(n.name for n in node.names)
+                elif isinstance(node, ast.ImportFrom):
+                    imports.append(node.module)
+            dangerous_imports = [imp for imp in imports if any(
+                banned in imp for banned in self.banned_modules)]
+            if dangerous_imports:
+                return {
+                    "safe": False,
+                    "reason": f"Potentially harmful imports detected: {dangerous_imports}"
+                }
+            # Check for exec/eval usage
+            for node in ast.walk(parsed):
+                if isinstance(node, ast.Call) and hasattr(node, 'func'):
+                    if isinstance(node.func, ast.Name) and node.func.id in ['exec', 'eval']:
+                        return {
+                            "safe": False,
+                            "reason": "Contains exec() or eval() calls"
+                        }
+            return {"safe": True}
+        except SyntaxError:
+            return {"safe": False, "reason": "Invalid Python syntax"}
+    def _timeout_handler(self, signum, frame):
+        """Handler for timeout signal."""
+        raise TimeoutError("Code execution timed out")
+    def _extract_numeric_value(self, output: str) -> Optional[Union[int, float]]:
+        """Extract the final numeric value from output."""
+        # First try to get the last line that's a number
+        lines = [line.strip() for line in output.strip().split('\n') if line.strip()]
+        for line in reversed(lines):
+            # Try direct conversion first
+            try:
+                return float(line)
+            except ValueError:
+                pass
+            # Try to extract numeric portion if embedded in text
+            numeric_match = re.search(r'[-+]?\d*\.?\d+', line)
+            if numeric_match:
+                try:
+                    return float(numeric_match.group())
+                except ValueError:
+                    pass
+        return None
+    def execute_file(self, filepath: str) -> Dict[str, Any]:
+        """Execute Python code from file and capture the output."""
+        try:
+            with open(filepath, 'r') as file:
+                code = file.read()
+            return self.execute_code(code)
+        except FileNotFoundError:
+            return {"success": False, "error": f"File not found: {filepath}"}
+        except Exception as e:
+            return {
+                "success": False,
+                "error": f"Error reading file: {str(e)}"
+            }
+    def execute_code(self, code: str) -> Dict[str, Any]:
+        """Execute Python code string and capture the output."""
+        # Check code safety first
+        safety_check = self._analyze_code_safety(code)
+        if not safety_check["safe"]:
+            return {
+                "success": False,
+                "error": f"Security check failed: {safety_check['reason']}"
+            }
+        # Prepare a clean globals dictionary with minimal safe functions
+        safe_globals = {
+            'abs': abs,
+            'all': all,
+            'any': any,
+            'bin': bin,
+            'bool': bool,
+            'chr': chr,
+            'complex': complex,
+            'dict': dict,
+            'divmod': divmod,
+            'enumerate': enumerate,
+            'filter': filter,
+            'float': float,
+            'format': format,
+            'frozenset': frozenset,
+            'hash': hash,
+            'hex': hex,
+            'int': int,
+            'isinstance': isinstance,
+            'issubclass': issubclass,
+            'len': len,
+            'list': list,
+            'map': map,
+            'max': max,
+            'min': min,
+            'oct': oct,
+            'ord': ord,
+            'pow': pow,
+            'print': print,
+            'range': range,
+            'reversed': reversed,
+            'round': round,
+            'set': set,
+            'sorted': sorted,
+            'str': str,
+            'sum': sum,
+            'tuple': tuple,
+            'zip': zip,
+            '__builtins__': {},  # Empty builtins for extra security
+        }
+        # Add math module functions, commonly needed
+        try:
+            import math
+            for name in dir(math):
+                if not name.startswith('_'):
+                    safe_globals[name] = getattr(math, name)
+        except ImportError:
+            pass
+        # Capture output using StringIO
+        output_buffer = io.StringIO()
+        # Set timeout handler
+        old_handler = signal.getsignal(signal.SIGALRM)
+        signal.signal(signal.SIGALRM, self._timeout_handler)
+        signal.alarm(self.timeout)
+        try:
+            # Execute code with stdout/stderr capture
+            with contextlib.redirect_stdout(output_buffer):
+                with contextlib.redirect_stderr(output_buffer):
+                    exec(code, safe_globals)
+            output = output_buffer.getvalue()
+            if len(output) > self.max_output_size:
+                output = output[:self.max_output_size] + "... [output truncated]"
+            # Extract the numeric value
+            numeric_result = self._extract_numeric_value(output)
+            return {
+                "success": True,
+                "raw_output": output,
+                "numeric_value": numeric_result,
+                "has_numeric_result": numeric_result is not None
+            }
+        except TimeoutError:
+            return {
+                "success": False,
+                "error": f"Code execution timed out after {self.timeout} seconds"
+            }
+        except Exception as e:
+            error_info = traceback.format_exc()
+            return {
+                "success": False,
+                "error": str(e),
+                "traceback": error_info,
+                "raw_output": output_buffer.getvalue()
+            }
+        finally:
+            # Reset alarm and signal handler
+            signal.alarm(0)
+            signal.signal(signal.SIGALRM, old_handler)
+# Example usage
+if __name__ == "__main__":
+    executor = CodeExecutionTool()
+    result = executor.execute_code("""
+# Example code that calculates a value
+total = 0
+for i in range(10):
+    total += i * 2
+print(f"The result is {total}")
+    """)
+    print(result)

tests/__init__.py ADDED Viewed

File without changes

tests/test_python_tool.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import unittest
+import sys
+import os
+from pathlib import Path
+# Add the parent directory to sys.path to find the src module
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from src.python_tool import CodeExecutionTool
+class TestCodeExecutionTool(unittest.TestCase):
+    def setUp(self):
+        self.code_tool = CodeExecutionTool()
+    def test_analyze_code_safety_imports(self):
+        """Test that the tool detects banned imports."""
+        code_with_banned_import = "import os"
+        result = self.code_tool._analyze_code_safety(code_with_banned_import)
+        self.assertFalse(result["safe"])
+        self.assertIn("os", result["reason"])
+    def test_analyze_code_safety_exec_eval(self):
+        """Test that the tool detects exec and eval usage."""
+        code_with_exec = "exec('print(1)')"
+        result = self.code_tool._analyze_code_safety(code_with_exec)
+        self.assertFalse(result["safe"])
+        self.assertIn("exec()", result["reason"])
+    def test_analyze_code_safety_valid_code(self):
+        """Test that the tool allows safe code."""
+        safe_code = "print(1 + 1)"
+        result = self.code_tool._analyze_code_safety(safe_code)
+        self.assertTrue(result["safe"])
+    def test_common_question_reverse_word(self):
+        """Test the reverse word question from common_questions.json."""
+        question = ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI"
+        expected_answer = "Right"
+        reversed_question = question[::-1]
+        self.assertEqual(reversed_question, "If you understand this sentence, write the opposite of the word \"left\" as the answer.")
+        self.assertEqual(expected_answer, "Right")
+if __name__ == "__main__":
+    unittest.main()