""" Code Interpreter Tool Allows the AI agent to write and execute custom Python code for tasks that don't have predefined tools. This is what makes it a TRUE AI Agent, not just a function-calling bot. """ import os import sys import subprocess import tempfile from pathlib import Path from typing import Dict, Any, Optional import polars as pl def execute_python_code( code: str, working_directory: str = "./outputs/code", timeout: int = 60, allow_file_operations: bool = True, output_file: Optional[str] = None ) -> Dict[str, Any]: """ Execute custom Python code written by the AI agent. This is the KEY tool that transforms the agent from a function-calling bot into a true AI agent capable of solving ANY data science problem. Use cases: - Custom visualizations not covered by existing tools - Data transformations too specific for generic tools - Domain-specific calculations - Interactive dashboards - Custom export formats Args: code: Python code to execute working_directory: Where to run the code (default: ./outputs/code) timeout: Maximum execution time in seconds allow_file_operations: Whether code can read/write files output_file: Optional file path to save output (e.g., HTML plot) Returns: Dict with execution results, stdout, stderr, and any generated files Example: # Agent can write custom Plotly code for specific visualizations code = ''' import plotly.express as px import pandas as pd df = pd.read_csv('./temp/sales_data.csv') fig = px.line(df, x='month', y='sales', color='bike_model', title='Extended Sales by Month for Each Bike Model') # Add dropdown filter fig.update_layout( updatemenus=[{ 'buttons': [{'label': model, 'method': 'update', 'args': [{'visible': [model == m for m in df['bike_model'].unique()]}]} for model in df['bike_model'].unique()], 'direction': 'down', 'showactive': True }] ) fig.write_html('./outputs/code/bike_sales_interactive.html') print("Chart saved to: ./outputs/code/bike_sales_interactive.html") ''' result = execute_python_code(code) """ try: # ⚠️ CRITICAL: Basic syntax validation BEFORE execution try: compile(code, '', 'exec') except SyntaxError as e: return { "success": False, "error": f"Syntax error in generated code: {str(e)}", "error_type": "SyntaxError", "line": e.lineno, "suggestion": "Fix syntax errors in the code. Common issues: missing quotes, parentheses, indentation" } # Create working directory with proper permissions try: os.makedirs(working_directory, exist_ok=True) # Ensure directory is writable test_file = os.path.join(working_directory, '.write_test') with open(test_file, 'w') as f: f.write('test') os.remove(test_file) except PermissionError: return { "success": False, "error": f"No write permission for directory: {working_directory}", "error_type": "PermissionError", "suggestion": f"Check folder permissions or use a different directory" } except Exception as e: return { "success": False, "error": f"Failed to create working directory: {str(e)}", "error_type": type(e).__name__ } # Security: Validate code doesn't contain dangerous operations dangerous_patterns = { 'subprocess': 'Use specialized tools instead of shell commands', '__import__': 'Dynamic imports not allowed for security', 'eval(': 'eval() is dangerous - rewrite without it', 'exec(': 'exec() is dangerous - rewrite without it', 'compile(': 'compile() not needed - write code directly', 'os.system': 'Shell commands not allowed - use Python libraries', 'os.popen': 'Shell commands not allowed - use Python libraries' } for pattern, reason in dangerous_patterns.items(): if pattern in code: return { "success": False, "error": f"Code contains restricted operation: {pattern}", "error_type": "SecurityError", "reason": reason, "suggestion": "Rewrite code using safe Python operations" } # Create temporary Python file with better error handling temp_file = None try: with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, dir=working_directory, encoding='utf-8') as f: temp_file = f.name # Add helper imports at the top + error handling wrapper enhanced_code = """ # Auto-imported libraries for convenience import pandas as pd import polars as pl import numpy as np import matplotlib matplotlib.use('Agg') # Non-interactive backend import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import plotly.graph_objects as go from pathlib import Path import json import sys import traceback # Ensure output directory exists import os os.makedirs('./outputs/code', exist_ok=True) os.makedirs('./outputs/data', exist_ok=True) try: # User's code starts here """ + "\n".join(" " + line for line in code.split("\n")) + """ except Exception as e: print(f"❌ Error in code execution: {str(e)}", file=sys.stderr) traceback.print_exc() sys.exit(1) """ f.write(enhanced_code) except Exception as e: return { "success": False, "error": f"Failed to write temporary file: {str(e)}", "error_type": type(e).__name__, "suggestion": "Check file write permissions" } # Track existing files BEFORE execution to detect new files existing_files = set() if allow_file_operations: for output_dir in ['./outputs/code', './outputs/data', './outputs/plots']: if os.path.exists(output_dir): for file_path in Path(output_dir).resolve().glob('**/*'): if file_path.is_file(): existing_files.add(file_path.resolve()) try: # Execute the code with better error capture # Use absolute path and normalize it for Windows abs_temp_file = os.path.abspath(temp_file) abs_cwd = os.path.abspath(Path.cwd()) result = subprocess.run( [sys.executable, abs_temp_file], capture_output=True, text=True, timeout=timeout, cwd=abs_cwd # Use absolute path to avoid permission issues ) stdout = result.stdout.strip() stderr = result.stderr.strip() returncode = result.returncode # Check for errors with detailed diagnostics if returncode != 0: # Parse error message for common issues error_hints = [] if "PermissionError" in stderr: error_hints.append("💡 File permission issue - check if file is open in another program") if "FileNotFoundError" in stderr: error_hints.append("💡 File not found - check if path is correct (use relative paths like './outputs/data/file.csv')") if "KeyError" in stderr: error_hints.append("💡 Column not found - check column names in the CSV") if "ModuleNotFoundError" in stderr: error_hints.append("💡 Missing library - may need to install additional packages") if "ValueError" in stderr: error_hints.append("💡 Data type mismatch - check data types and conversions") return { "success": False, "error": f"Code execution failed", "stderr": stderr, "stdout": stdout if stdout else None, "error_type": "ExecutionError", "exit_code": returncode, "hints": error_hints if error_hints else ["Check the error message above for details"] } # Success! Find NEWLY generated files (not existing before execution) generated_files = [] if allow_file_operations: cwd = Path.cwd() for output_dir in ['./outputs/code', './outputs/data', './outputs/plots']: if os.path.exists(output_dir): abs_output_dir = Path(output_dir).resolve() for file_path in abs_output_dir.glob('**/*'): if file_path.is_file(): abs_file = file_path.resolve() # Only include if it's NEW (didn't exist before) or MODIFIED is_new = abs_file not in existing_files # Check if file was modified in last 5 seconds (just created/updated) import time file_age = time.time() - file_path.stat().st_mtime is_recent = file_age < 5 if (is_new or is_recent): # Get relative path safely (handle Windows paths) try: rel_path = file_path.relative_to(cwd) except ValueError: # Fallback: just use the file name with output dir rel_path = Path(output_dir) / file_path.name # Only include if not temp file and has content abs_temp = Path(temp_file).resolve() if temp_file else None if file_path != abs_temp and file_path.stat().st_size > 0: generated_files.append(str(rel_path).replace('\\', '/')) # Sort by modification time (newest first) if generated_files: generated_files = sorted( generated_files, key=lambda x: Path(x).stat().st_mtime, reverse=True )[:10] # Limit to 10 most recent files return { "success": True, "stdout": stdout if stdout else "✅ Code executed successfully (no output)", "stderr": stderr if stderr else None, "message": "✅ Code executed successfully", "generated_files": generated_files, "working_directory": working_directory, "execution_summary": { "lines_of_code": len(code.split('\n')), "files_generated": len(generated_files) } } finally: # Clean up temp file if temp_file and os.path.exists(temp_file): try: os.unlink(temp_file) except Exception: pass # Ignore cleanup errors except subprocess.TimeoutExpired: return { "success": False, "error": f"Code execution timed out after {timeout} seconds", "error_type": "TimeoutError", "suggestion": "Code is taking too long. Optimize it or increase timeout. Avoid large loops or heavy computations." } except Exception as e: return { "success": False, "error": f"Unexpected error: {str(e)}", "error_type": type(e).__name__, "suggestion": "This is an unexpected error. Try simplifying the code." } def execute_code_from_file( file_path: str, working_directory: str = "./outputs/code", timeout: int = 60 ) -> Dict[str, Any]: """ Execute Python code from a file. Useful when code is too long to pass as a string, or when the agent wants to run an existing script. Args: file_path: Path to Python file to execute working_directory: Where to run the code timeout: Maximum execution time in seconds Returns: Dict with execution results """ try: # Read code from file with open(file_path, 'r', encoding='utf-8') as f: code = f.read() return execute_python_code( code=code, working_directory=working_directory, timeout=timeout ) except FileNotFoundError: return { "success": False, "error": f"File not found: {file_path}", "error_type": "FileNotFoundError" } except Exception as e: return { "success": False, "error": f"Failed to read file: {str(e)}", "error_type": type(e).__name__ } def generate_custom_visualization( data_file: str, visualization_description: str, output_path: str = "./outputs/code/custom_plot.html", timeout: int = 60 ) -> Dict[str, Any]: """ HIGH-LEVEL helper: Generate custom visualization from natural language description. The agent describes what it wants, and this function attempts to generate the code. This is a convenience wrapper that could use an LLM to generate the plotting code. Args: data_file: Path to dataset visualization_description: Natural language description of desired plot output_path: Where to save the visualization timeout: Execution timeout Returns: Dict with execution results Example: result = generate_custom_visualization( data_file="./temp/sales.csv", visualization_description="Line plot of sales by month for each bike model, with dropdown filter", output_path="./outputs/code/sales_plot.html" ) """ # This is a placeholder - in a full implementation, this would use an LLM # to generate the Plotly code from the description return { "success": False, "error": "Not yet implemented - use execute_python_code with explicit code instead", "error_type": "NotImplementedError", "suggestion": "Write the Plotly code explicitly and use execute_python_code()" }