Spaces:
Running
Running
Pulastya B
feat: Initial commit - Data Science Agent with React frontend and FastAPI backend
226ac39
| """ | |
| Code Interpreter Tool | |
| Allows the AI agent to write and execute custom Python code for tasks that don't have predefined tools. | |
| This is what makes it a TRUE AI Agent, not just a function-calling bot. | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import tempfile | |
| from pathlib import Path | |
| from typing import Dict, Any, Optional | |
| import polars as pl | |
| def execute_python_code( | |
| code: str, | |
| working_directory: str = "./outputs/code", | |
| timeout: int = 60, | |
| allow_file_operations: bool = True, | |
| output_file: Optional[str] = None | |
| ) -> Dict[str, Any]: | |
| """ | |
| Execute custom Python code written by the AI agent. | |
| This is the KEY tool that transforms the agent from a function-calling bot | |
| into a true AI agent capable of solving ANY data science problem. | |
| Use cases: | |
| - Custom visualizations not covered by existing tools | |
| - Data transformations too specific for generic tools | |
| - Domain-specific calculations | |
| - Interactive dashboards | |
| - Custom export formats | |
| Args: | |
| code: Python code to execute | |
| working_directory: Where to run the code (default: ./outputs/code) | |
| timeout: Maximum execution time in seconds | |
| allow_file_operations: Whether code can read/write files | |
| output_file: Optional file path to save output (e.g., HTML plot) | |
| Returns: | |
| Dict with execution results, stdout, stderr, and any generated files | |
| Example: | |
| # Agent can write custom Plotly code for specific visualizations | |
| code = ''' | |
| import plotly.express as px | |
| import pandas as pd | |
| df = pd.read_csv('./temp/sales_data.csv') | |
| fig = px.line(df, x='month', y='sales', color='bike_model', | |
| title='Extended Sales by Month for Each Bike Model') | |
| # Add dropdown filter | |
| fig.update_layout( | |
| updatemenus=[{ | |
| 'buttons': [{'label': model, 'method': 'update', | |
| 'args': [{'visible': [model == m for m in df['bike_model'].unique()]}]} | |
| for model in df['bike_model'].unique()], | |
| 'direction': 'down', | |
| 'showactive': True | |
| }] | |
| ) | |
| fig.write_html('./outputs/code/bike_sales_interactive.html') | |
| print("Chart saved to: ./outputs/code/bike_sales_interactive.html") | |
| ''' | |
| result = execute_python_code(code) | |
| """ | |
| try: | |
| # ⚠️ CRITICAL: Basic syntax validation BEFORE execution | |
| try: | |
| compile(code, '<string>', 'exec') | |
| except SyntaxError as e: | |
| return { | |
| "success": False, | |
| "error": f"Syntax error in generated code: {str(e)}", | |
| "error_type": "SyntaxError", | |
| "line": e.lineno, | |
| "suggestion": "Fix syntax errors in the code. Common issues: missing quotes, parentheses, indentation" | |
| } | |
| # Create working directory with proper permissions | |
| try: | |
| os.makedirs(working_directory, exist_ok=True) | |
| # Ensure directory is writable | |
| test_file = os.path.join(working_directory, '.write_test') | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| except PermissionError: | |
| return { | |
| "success": False, | |
| "error": f"No write permission for directory: {working_directory}", | |
| "error_type": "PermissionError", | |
| "suggestion": f"Check folder permissions or use a different directory" | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Failed to create working directory: {str(e)}", | |
| "error_type": type(e).__name__ | |
| } | |
| # Security: Validate code doesn't contain dangerous operations | |
| dangerous_patterns = { | |
| 'subprocess': 'Use specialized tools instead of shell commands', | |
| '__import__': 'Dynamic imports not allowed for security', | |
| 'eval(': 'eval() is dangerous - rewrite without it', | |
| 'exec(': 'exec() is dangerous - rewrite without it', | |
| 'compile(': 'compile() not needed - write code directly', | |
| 'os.system': 'Shell commands not allowed - use Python libraries', | |
| 'os.popen': 'Shell commands not allowed - use Python libraries' | |
| } | |
| for pattern, reason in dangerous_patterns.items(): | |
| if pattern in code: | |
| return { | |
| "success": False, | |
| "error": f"Code contains restricted operation: {pattern}", | |
| "error_type": "SecurityError", | |
| "reason": reason, | |
| "suggestion": "Rewrite code using safe Python operations" | |
| } | |
| # Create temporary Python file with better error handling | |
| temp_file = None | |
| try: | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, | |
| dir=working_directory, encoding='utf-8') as f: | |
| temp_file = f.name | |
| # Add helper imports at the top + error handling wrapper | |
| enhanced_code = """ | |
| # Auto-imported libraries for convenience | |
| import pandas as pd | |
| import polars as pl | |
| import numpy as np | |
| import matplotlib | |
| matplotlib.use('Agg') # Non-interactive backend | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from pathlib import Path | |
| import json | |
| import sys | |
| import traceback | |
| # Ensure output directory exists | |
| import os | |
| os.makedirs('./outputs/code', exist_ok=True) | |
| os.makedirs('./outputs/data', exist_ok=True) | |
| try: | |
| # User's code starts here | |
| """ + "\n".join(" " + line for line in code.split("\n")) + """ | |
| except Exception as e: | |
| print(f"❌ Error in code execution: {str(e)}", file=sys.stderr) | |
| traceback.print_exc() | |
| sys.exit(1) | |
| """ | |
| f.write(enhanced_code) | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Failed to write temporary file: {str(e)}", | |
| "error_type": type(e).__name__, | |
| "suggestion": "Check file write permissions" | |
| } | |
| # Track existing files BEFORE execution to detect new files | |
| existing_files = set() | |
| if allow_file_operations: | |
| for output_dir in ['./outputs/code', './outputs/data', './outputs/plots']: | |
| if os.path.exists(output_dir): | |
| for file_path in Path(output_dir).resolve().glob('**/*'): | |
| if file_path.is_file(): | |
| existing_files.add(file_path.resolve()) | |
| try: | |
| # Execute the code with better error capture | |
| # Use absolute path and normalize it for Windows | |
| abs_temp_file = os.path.abspath(temp_file) | |
| abs_cwd = os.path.abspath(Path.cwd()) | |
| result = subprocess.run( | |
| [sys.executable, abs_temp_file], | |
| capture_output=True, | |
| text=True, | |
| timeout=timeout, | |
| cwd=abs_cwd # Use absolute path to avoid permission issues | |
| ) | |
| stdout = result.stdout.strip() | |
| stderr = result.stderr.strip() | |
| returncode = result.returncode | |
| # Check for errors with detailed diagnostics | |
| if returncode != 0: | |
| # Parse error message for common issues | |
| error_hints = [] | |
| if "PermissionError" in stderr: | |
| error_hints.append("💡 File permission issue - check if file is open in another program") | |
| if "FileNotFoundError" in stderr: | |
| error_hints.append("💡 File not found - check if path is correct (use relative paths like './outputs/data/file.csv')") | |
| if "KeyError" in stderr: | |
| error_hints.append("💡 Column not found - check column names in the CSV") | |
| if "ModuleNotFoundError" in stderr: | |
| error_hints.append("💡 Missing library - may need to install additional packages") | |
| if "ValueError" in stderr: | |
| error_hints.append("💡 Data type mismatch - check data types and conversions") | |
| return { | |
| "success": False, | |
| "error": f"Code execution failed", | |
| "stderr": stderr, | |
| "stdout": stdout if stdout else None, | |
| "error_type": "ExecutionError", | |
| "exit_code": returncode, | |
| "hints": error_hints if error_hints else ["Check the error message above for details"] | |
| } | |
| # Success! Find NEWLY generated files (not existing before execution) | |
| generated_files = [] | |
| if allow_file_operations: | |
| cwd = Path.cwd() | |
| for output_dir in ['./outputs/code', './outputs/data', './outputs/plots']: | |
| if os.path.exists(output_dir): | |
| abs_output_dir = Path(output_dir).resolve() | |
| for file_path in abs_output_dir.glob('**/*'): | |
| if file_path.is_file(): | |
| abs_file = file_path.resolve() | |
| # Only include if it's NEW (didn't exist before) or MODIFIED | |
| is_new = abs_file not in existing_files | |
| # Check if file was modified in last 5 seconds (just created/updated) | |
| import time | |
| file_age = time.time() - file_path.stat().st_mtime | |
| is_recent = file_age < 5 | |
| if (is_new or is_recent): | |
| # Get relative path safely (handle Windows paths) | |
| try: | |
| rel_path = file_path.relative_to(cwd) | |
| except ValueError: | |
| # Fallback: just use the file name with output dir | |
| rel_path = Path(output_dir) / file_path.name | |
| # Only include if not temp file and has content | |
| abs_temp = Path(temp_file).resolve() if temp_file else None | |
| if file_path != abs_temp and file_path.stat().st_size > 0: | |
| generated_files.append(str(rel_path).replace('\\', '/')) | |
| # Sort by modification time (newest first) | |
| if generated_files: | |
| generated_files = sorted( | |
| generated_files, | |
| key=lambda x: Path(x).stat().st_mtime, | |
| reverse=True | |
| )[:10] # Limit to 10 most recent files | |
| return { | |
| "success": True, | |
| "stdout": stdout if stdout else "✅ Code executed successfully (no output)", | |
| "stderr": stderr if stderr else None, | |
| "message": "✅ Code executed successfully", | |
| "generated_files": generated_files, | |
| "working_directory": working_directory, | |
| "execution_summary": { | |
| "lines_of_code": len(code.split('\n')), | |
| "files_generated": len(generated_files) | |
| } | |
| } | |
| finally: | |
| # Clean up temp file | |
| if temp_file and os.path.exists(temp_file): | |
| try: | |
| os.unlink(temp_file) | |
| except Exception: | |
| pass # Ignore cleanup errors | |
| except subprocess.TimeoutExpired: | |
| return { | |
| "success": False, | |
| "error": f"Code execution timed out after {timeout} seconds", | |
| "error_type": "TimeoutError", | |
| "suggestion": "Code is taking too long. Optimize it or increase timeout. Avoid large loops or heavy computations." | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Unexpected error: {str(e)}", | |
| "error_type": type(e).__name__, | |
| "suggestion": "This is an unexpected error. Try simplifying the code." | |
| } | |
| def execute_code_from_file( | |
| file_path: str, | |
| working_directory: str = "./outputs/code", | |
| timeout: int = 60 | |
| ) -> Dict[str, Any]: | |
| """ | |
| Execute Python code from a file. | |
| Useful when code is too long to pass as a string, or when the agent | |
| wants to run an existing script. | |
| Args: | |
| file_path: Path to Python file to execute | |
| working_directory: Where to run the code | |
| timeout: Maximum execution time in seconds | |
| Returns: | |
| Dict with execution results | |
| """ | |
| try: | |
| # Read code from file | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| code = f.read() | |
| return execute_python_code( | |
| code=code, | |
| working_directory=working_directory, | |
| timeout=timeout | |
| ) | |
| except FileNotFoundError: | |
| return { | |
| "success": False, | |
| "error": f"File not found: {file_path}", | |
| "error_type": "FileNotFoundError" | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Failed to read file: {str(e)}", | |
| "error_type": type(e).__name__ | |
| } | |
| def generate_custom_visualization( | |
| data_file: str, | |
| visualization_description: str, | |
| output_path: str = "./outputs/code/custom_plot.html", | |
| timeout: int = 60 | |
| ) -> Dict[str, Any]: | |
| """ | |
| HIGH-LEVEL helper: Generate custom visualization from natural language description. | |
| The agent describes what it wants, and this function attempts to generate the code. | |
| This is a convenience wrapper that could use an LLM to generate the plotting code. | |
| Args: | |
| data_file: Path to dataset | |
| visualization_description: Natural language description of desired plot | |
| output_path: Where to save the visualization | |
| timeout: Execution timeout | |
| Returns: | |
| Dict with execution results | |
| Example: | |
| result = generate_custom_visualization( | |
| data_file="./temp/sales.csv", | |
| visualization_description="Line plot of sales by month for each bike model, with dropdown filter", | |
| output_path="./outputs/code/sales_plot.html" | |
| ) | |
| """ | |
| # This is a placeholder - in a full implementation, this would use an LLM | |
| # to generate the Plotly code from the description | |
| return { | |
| "success": False, | |
| "error": "Not yet implemented - use execute_python_code with explicit code instead", | |
| "error_type": "NotImplementedError", | |
| "suggestion": "Write the Plotly code explicitly and use execute_python_code()" | |
| } | |