File size: 15,458 Bytes
226ac39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
"""
Code Interpreter Tool
Allows the AI agent to write and execute custom Python code for tasks that don't have predefined tools.
This is what makes it a TRUE AI Agent, not just a function-calling bot.
"""

import os
import sys
import subprocess
import tempfile
from pathlib import Path
from typing import Dict, Any, Optional
import polars as pl


def execute_python_code(
    code: str,
    working_directory: str = "./outputs/code",
    timeout: int = 60,
    allow_file_operations: bool = True,
    output_file: Optional[str] = None
) -> Dict[str, Any]:
    """
    Execute custom Python code written by the AI agent.
    
    This is the KEY tool that transforms the agent from a function-calling bot
    into a true AI agent capable of solving ANY data science problem.
    
    Use cases:
    - Custom visualizations not covered by existing tools
    - Data transformations too specific for generic tools
    - Domain-specific calculations
    - Interactive dashboards
    - Custom export formats
    
    Args:
        code: Python code to execute
        working_directory: Where to run the code (default: ./outputs/code)
        timeout: Maximum execution time in seconds
        allow_file_operations: Whether code can read/write files
        output_file: Optional file path to save output (e.g., HTML plot)
        
    Returns:
        Dict with execution results, stdout, stderr, and any generated files
        
    Example:
        # Agent can write custom Plotly code for specific visualizations
        code = '''
        import plotly.express as px
        import pandas as pd
        
        df = pd.read_csv('./temp/sales_data.csv')
        fig = px.line(df, x='month', y='sales', color='bike_model',
                     title='Extended Sales by Month for Each Bike Model')
        
        # Add dropdown filter
        fig.update_layout(
            updatemenus=[{
                'buttons': [{'label': model, 'method': 'update',
                           'args': [{'visible': [model == m for m in df['bike_model'].unique()]}]}
                          for model in df['bike_model'].unique()],
                'direction': 'down',
                'showactive': True
            }]
        )
        
        fig.write_html('./outputs/code/bike_sales_interactive.html')
        print("Chart saved to: ./outputs/code/bike_sales_interactive.html")
        '''
        
        result = execute_python_code(code)
    """
    try:
        # ⚠️ CRITICAL: Basic syntax validation BEFORE execution
        try:
            compile(code, '<string>', 'exec')
        except SyntaxError as e:
            return {
                "success": False,
                "error": f"Syntax error in generated code: {str(e)}",
                "error_type": "SyntaxError",
                "line": e.lineno,
                "suggestion": "Fix syntax errors in the code. Common issues: missing quotes, parentheses, indentation"
            }
        
        # Create working directory with proper permissions
        try:
            os.makedirs(working_directory, exist_ok=True)
            # Ensure directory is writable
            test_file = os.path.join(working_directory, '.write_test')
            with open(test_file, 'w') as f:
                f.write('test')
            os.remove(test_file)
        except PermissionError:
            return {
                "success": False,
                "error": f"No write permission for directory: {working_directory}",
                "error_type": "PermissionError",
                "suggestion": f"Check folder permissions or use a different directory"
            }
        except Exception as e:
            return {
                "success": False,
                "error": f"Failed to create working directory: {str(e)}",
                "error_type": type(e).__name__
            }
        
        # Security: Validate code doesn't contain dangerous operations
        dangerous_patterns = {
            'subprocess': 'Use specialized tools instead of shell commands',
            '__import__': 'Dynamic imports not allowed for security',
            'eval(': 'eval() is dangerous - rewrite without it',
            'exec(': 'exec() is dangerous - rewrite without it',
            'compile(': 'compile() not needed - write code directly',
            'os.system': 'Shell commands not allowed - use Python libraries',
            'os.popen': 'Shell commands not allowed - use Python libraries'
        }
        
        for pattern, reason in dangerous_patterns.items():
            if pattern in code:
                return {
                    "success": False,
                    "error": f"Code contains restricted operation: {pattern}",
                    "error_type": "SecurityError",
                    "reason": reason,
                    "suggestion": "Rewrite code using safe Python operations"
                }
        
        # Create temporary Python file with better error handling
        temp_file = None
        try:
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, 
                                            dir=working_directory, encoding='utf-8') as f:
                temp_file = f.name
                
                # Add helper imports at the top + error handling wrapper
                enhanced_code = """
# Auto-imported libraries for convenience
import pandas as pd
import polars as pl
import numpy as np
import matplotlib
matplotlib.use('Agg')  # Non-interactive backend
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import json
import sys
import traceback

# Ensure output directory exists
import os
os.makedirs('./outputs/code', exist_ok=True)
os.makedirs('./outputs/data', exist_ok=True)

try:
    # User's code starts here
""" + "\n".join("    " + line for line in code.split("\n")) + """

except Exception as e:
    print(f"❌ Error in code execution: {str(e)}", file=sys.stderr)
    traceback.print_exc()
    sys.exit(1)
"""
                
                f.write(enhanced_code)
        
        except Exception as e:
            return {
                "success": False,
                "error": f"Failed to write temporary file: {str(e)}",
                "error_type": type(e).__name__,
                "suggestion": "Check file write permissions"
            }
        
        # Track existing files BEFORE execution to detect new files
        existing_files = set()
        if allow_file_operations:
            for output_dir in ['./outputs/code', './outputs/data', './outputs/plots']:
                if os.path.exists(output_dir):
                    for file_path in Path(output_dir).resolve().glob('**/*'):
                        if file_path.is_file():
                            existing_files.add(file_path.resolve())
        
        try:
            # Execute the code with better error capture
            # Use absolute path and normalize it for Windows
            abs_temp_file = os.path.abspath(temp_file)
            abs_cwd = os.path.abspath(Path.cwd())
            
            result = subprocess.run(
                [sys.executable, abs_temp_file],
                capture_output=True,
                text=True,
                timeout=timeout,
                cwd=abs_cwd  # Use absolute path to avoid permission issues
            )
            
            stdout = result.stdout.strip()
            stderr = result.stderr.strip()
            returncode = result.returncode
            
            # Check for errors with detailed diagnostics
            if returncode != 0:
                # Parse error message for common issues
                error_hints = []
                if "PermissionError" in stderr:
                    error_hints.append("πŸ’‘ File permission issue - check if file is open in another program")
                if "FileNotFoundError" in stderr:
                    error_hints.append("πŸ’‘ File not found - check if path is correct (use relative paths like './outputs/data/file.csv')")
                if "KeyError" in stderr:
                    error_hints.append("πŸ’‘ Column not found - check column names in the CSV")
                if "ModuleNotFoundError" in stderr:
                    error_hints.append("πŸ’‘ Missing library - may need to install additional packages")
                if "ValueError" in stderr:
                    error_hints.append("πŸ’‘ Data type mismatch - check data types and conversions")
                
                return {
                    "success": False,
                    "error": f"Code execution failed",
                    "stderr": stderr,
                    "stdout": stdout if stdout else None,
                    "error_type": "ExecutionError",
                    "exit_code": returncode,
                    "hints": error_hints if error_hints else ["Check the error message above for details"]
                }
            
            # Success! Find NEWLY generated files (not existing before execution)
            generated_files = []
            if allow_file_operations:
                cwd = Path.cwd()
                for output_dir in ['./outputs/code', './outputs/data', './outputs/plots']:
                    if os.path.exists(output_dir):
                        abs_output_dir = Path(output_dir).resolve()
                        for file_path in abs_output_dir.glob('**/*'):
                            if file_path.is_file():
                                abs_file = file_path.resolve()
                                
                                # Only include if it's NEW (didn't exist before) or MODIFIED
                                is_new = abs_file not in existing_files
                                
                                # Check if file was modified in last 5 seconds (just created/updated)
                                import time
                                file_age = time.time() - file_path.stat().st_mtime
                                is_recent = file_age < 5
                                
                                if (is_new or is_recent):
                                    # Get relative path safely (handle Windows paths)
                                    try:
                                        rel_path = file_path.relative_to(cwd)
                                    except ValueError:
                                        # Fallback: just use the file name with output dir
                                        rel_path = Path(output_dir) / file_path.name
                                    
                                    # Only include if not temp file and has content
                                    abs_temp = Path(temp_file).resolve() if temp_file else None
                                    if file_path != abs_temp and file_path.stat().st_size > 0:
                                        generated_files.append(str(rel_path).replace('\\', '/'))
            
            # Sort by modification time (newest first)
            if generated_files:
                generated_files = sorted(
                    generated_files,
                    key=lambda x: Path(x).stat().st_mtime,
                    reverse=True
                )[:10]  # Limit to 10 most recent files
            
            return {
                "success": True,
                "stdout": stdout if stdout else "βœ… Code executed successfully (no output)",
                "stderr": stderr if stderr else None,
                "message": "βœ… Code executed successfully",
                "generated_files": generated_files,
                "working_directory": working_directory,
                "execution_summary": {
                    "lines_of_code": len(code.split('\n')),
                    "files_generated": len(generated_files)
                }
            }
            
        finally:
            # Clean up temp file
            if temp_file and os.path.exists(temp_file):
                try:
                    os.unlink(temp_file)
                except Exception:
                    pass  # Ignore cleanup errors
                
    except subprocess.TimeoutExpired:
        return {
            "success": False,
            "error": f"Code execution timed out after {timeout} seconds",
            "error_type": "TimeoutError",
            "suggestion": "Code is taking too long. Optimize it or increase timeout. Avoid large loops or heavy computations."
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Unexpected error: {str(e)}",
            "error_type": type(e).__name__,
            "suggestion": "This is an unexpected error. Try simplifying the code."
        }


def execute_code_from_file(
    file_path: str,
    working_directory: str = "./outputs/code",
    timeout: int = 60
) -> Dict[str, Any]:
    """
    Execute Python code from a file.
    
    Useful when code is too long to pass as a string, or when the agent
    wants to run an existing script.
    
    Args:
        file_path: Path to Python file to execute
        working_directory: Where to run the code
        timeout: Maximum execution time in seconds
        
    Returns:
        Dict with execution results
    """
    try:
        # Read code from file
        with open(file_path, 'r', encoding='utf-8') as f:
            code = f.read()
        
        return execute_python_code(
            code=code,
            working_directory=working_directory,
            timeout=timeout
        )
    except FileNotFoundError:
        return {
            "success": False,
            "error": f"File not found: {file_path}",
            "error_type": "FileNotFoundError"
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Failed to read file: {str(e)}",
            "error_type": type(e).__name__
        }


def generate_custom_visualization(
    data_file: str,
    visualization_description: str,
    output_path: str = "./outputs/code/custom_plot.html",
    timeout: int = 60
) -> Dict[str, Any]:
    """
    HIGH-LEVEL helper: Generate custom visualization from natural language description.
    
    The agent describes what it wants, and this function attempts to generate the code.
    This is a convenience wrapper that could use an LLM to generate the plotting code.
    
    Args:
        data_file: Path to dataset
        visualization_description: Natural language description of desired plot
        output_path: Where to save the visualization
        timeout: Execution timeout
        
    Returns:
        Dict with execution results
        
    Example:
        result = generate_custom_visualization(
            data_file="./temp/sales.csv",
            visualization_description="Line plot of sales by month for each bike model, with dropdown filter",
            output_path="./outputs/code/sales_plot.html"
        )
    """
    # This is a placeholder - in a full implementation, this would use an LLM
    # to generate the Plotly code from the description
    
    return {
        "success": False,
        "error": "Not yet implemented - use execute_python_code with explicit code instead",
        "error_type": "NotImplementedError",
        "suggestion": "Write the Plotly code explicitly and use execute_python_code()"
    }