Add new tools and functionalities for audio transcription, code execution, document handling, image processing, and mathematical operations
d303e2f
| from langchain_core.tools import tool | |
| import os | |
| import io | |
| import sys | |
| import uuid | |
| import base64 | |
| import traceback | |
| import contextlib | |
| import tempfile | |
| import subprocess | |
| import sqlite3 | |
| from typing import Dict, List, Any, Optional, Union | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| class CodeInterpreter: | |
| def __init__(self, allowed_modules=None, max_execution_time=30, working_directory=None): | |
| """Initialize the code interpreter with safety measures.""" | |
| self.allowed_modules = allowed_modules or [ | |
| "numpy", "pandas", "matplotlib", "scipy", "sklearn", | |
| "math", "random", "statistics", "datetime", "collections", | |
| "itertools", "functools", "operator", "re", "json", | |
| "sympy", "networkx", "nltk", "PIL", "pytesseract", | |
| "cmath", "uuid", "tempfile", "requests", "urllib", "os", "io", "sys", "base64", "traceback", "contextlib", "sqlite3" | |
| ] | |
| self.max_execution_time = max_execution_time | |
| self.working_directory = working_directory or os.path.join(os.getcwd()) | |
| if not os.path.exists(self.working_directory): | |
| os.makedirs(self.working_directory) | |
| self.globals = { | |
| "__builtins__": __builtins__, | |
| "np": np, | |
| "pd": pd, | |
| "plt": plt, | |
| "Image": Image, | |
| } | |
| self.temp_sqlite_db = os.path.join(tempfile.gettempdir(), "code_exec.db") | |
| def execute_code(self, code: str, language: str = "python", file_path: Optional[str] = None) -> Dict[str, Any]: | |
| """Execute the provided code or code from a file in the selected programming language.""" | |
| language = language.lower() | |
| execution_id = str(uuid.uuid4()) | |
| result = { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": "", | |
| "stderr": "", | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| current_code = code | |
| if file_path: | |
| if not os.path.exists(file_path): | |
| result["stderr"] = f"Error: File not found at {file_path}" | |
| return result | |
| if not os.path.isfile(file_path): | |
| result["stderr"] = f"Error: Path {file_path} is not a file." | |
| return result | |
| try: | |
| with open(file_path, "r", encoding='utf-8') as f: | |
| current_code = f.read() | |
| if not current_code.strip() and code.strip(): # If file is empty but code arg has content | |
| # This case might be ambiguous. Prioritize file content if path is given. | |
| # If file is truly empty, and code arg was also meant to be empty, it will proceed. | |
| # If code arg had content and file was empty, it implies user might want to run content of code arg. | |
| # For now, if file_path is provided, its content (even if empty) takes precedence. | |
| # If the intention is to run `code` when `file_path` is empty, the caller should not provide `file_path`. | |
| pass # current_code is already empty string from file | |
| elif not current_code.strip() and not code.strip(): | |
| result["stderr"] = "Error: Both provided code string and file content are empty." | |
| return result | |
| except Exception as e: | |
| result["stderr"] = f"Error reading file {file_path}: {str(e)}" | |
| return result | |
| elif not code.strip(): # No file_path and code string is empty | |
| result["stderr"] = "Error: No code provided either as a string or a file path." | |
| return result | |
| try: | |
| if language == "python": | |
| return self._execute_python(current_code, execution_id) | |
| elif language == "bash": | |
| return self._execute_bash(current_code, execution_id) | |
| elif language == "sql": | |
| return self._execute_sql(current_code, execution_id) | |
| elif language == "c": | |
| return self._execute_c(current_code, execution_id) | |
| elif language == "java": | |
| return self._execute_java(current_code, execution_id) | |
| else: | |
| result["stderr"] = f"Unsupported language: {language}" | |
| except Exception as e: | |
| result["stderr"] = str(e) | |
| return result | |
| def _execute_python(self, code: str, execution_id: str) -> dict: | |
| output_buffer = io.StringIO() | |
| error_buffer = io.StringIO() | |
| result = { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": "", | |
| "stderr": "", | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| try: | |
| exec_dir = os.path.join(self.working_directory, execution_id) | |
| os.makedirs(exec_dir, exist_ok=True) | |
| plt.switch_backend('Agg') | |
| with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(error_buffer): | |
| exec_result = exec(code, self.globals) | |
| if plt.get_fignums(): | |
| for i, fig_num in enumerate(plt.get_fignums()): | |
| fig = plt.figure(fig_num) | |
| img_path = os.path.join(exec_dir, f"plot_{i}.png") | |
| fig.savefig(img_path) | |
| with open(img_path, "rb") as img_file: | |
| img_data = base64.b64encode(img_file.read()).decode('utf-8') | |
| result["plots"].append({ | |
| "figure_number": fig_num, | |
| "data": img_data | |
| }) | |
| for var_name, var_value in self.globals.items(): | |
| if isinstance(var_value, pd.DataFrame) and len(var_value) > 0: | |
| result["dataframes"].append({ | |
| "name": var_name, | |
| "head": var_value.head().to_dict(), | |
| "shape": var_value.shape, | |
| "dtypes": str(var_value.dtypes) | |
| }) | |
| result["status"] = "success" | |
| result["stdout"] = output_buffer.getvalue() | |
| result["result"] = exec_result | |
| except Exception as e: | |
| result["status"] = "error" | |
| result["stderr"] = f"{error_buffer.getvalue()}\n{traceback.format_exc()}" | |
| return result | |
| def _execute_bash(self, code: str, execution_id: str) -> dict: | |
| try: | |
| completed = subprocess.run( | |
| code, shell=True, capture_output=True, text=True, timeout=self.max_execution_time | |
| ) | |
| return { | |
| "execution_id": execution_id, | |
| "status": "success" if completed.returncode == 0 else "error", | |
| "stdout": completed.stdout, | |
| "stderr": completed.stderr, | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| except subprocess.TimeoutExpired: | |
| return { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": "", | |
| "stderr": "Execution timed out.", | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| def _execute_sql(self, code: str, execution_id: str) -> dict: | |
| result = { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": "", | |
| "stderr": "", | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| try: | |
| conn = sqlite3.connect(self.temp_sqlite_db) | |
| cur = conn.cursor() | |
| cur.execute(code) | |
| if code.strip().lower().startswith("select"): | |
| columns = [description[0] for description in cur.description] | |
| rows = cur.fetchall() | |
| df = pd.DataFrame(rows, columns=columns) | |
| result["dataframes"].append({ | |
| "name": "query_result", | |
| "head": df.head().to_dict(), | |
| "shape": df.shape, | |
| "dtypes": str(df.dtypes) | |
| }) | |
| else: | |
| conn.commit() | |
| result["status"] = "success" | |
| result["stdout"] = "Query executed successfully." | |
| except Exception as e: | |
| result["stderr"] = str(e) | |
| finally: | |
| conn.close() | |
| return result | |
| def _execute_c(self, code: str, execution_id: str) -> dict: | |
| temp_dir = tempfile.mkdtemp() | |
| source_path = os.path.join(temp_dir, "program.c") | |
| binary_path = os.path.join(temp_dir, "program") | |
| try: | |
| with open(source_path, "w") as f: | |
| f.write(code) | |
| compile_proc = subprocess.run( | |
| ["gcc", source_path, "-o", binary_path], | |
| capture_output=True, text=True, timeout=self.max_execution_time | |
| ) | |
| if compile_proc.returncode != 0: | |
| return { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": compile_proc.stdout, | |
| "stderr": compile_proc.stderr, | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| run_proc = subprocess.run( | |
| [binary_path], | |
| capture_output=True, text=True, timeout=self.max_execution_time | |
| ) | |
| return { | |
| "execution_id": execution_id, | |
| "status": "success" if run_proc.returncode == 0 else "error", | |
| "stdout": run_proc.stdout, | |
| "stderr": run_proc.stderr, | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| except Exception as e: | |
| return { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": "", | |
| "stderr": str(e), | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| def _execute_java(self, code: str, execution_id: str) -> dict: | |
| temp_dir = tempfile.mkdtemp() | |
| source_path = os.path.join(temp_dir, "Main.java") | |
| try: | |
| with open(source_path, "w") as f: | |
| f.write(code) | |
| compile_proc = subprocess.run( | |
| ["javac", source_path], | |
| capture_output=True, text=True, timeout=self.max_execution_time | |
| ) | |
| if compile_proc.returncode != 0: | |
| return { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": compile_proc.stdout, | |
| "stderr": compile_proc.stderr, | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| run_proc = subprocess.run( | |
| ["java", "-cp", temp_dir, "Main"], | |
| capture_output=True, text=True, timeout=self.max_execution_time | |
| ) | |
| return { | |
| "execution_id": execution_id, | |
| "status": "success" if run_proc.returncode == 0 else "error", | |
| "stdout": run_proc.stdout, | |
| "stderr": run_proc.stderr, | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| except Exception as e: | |
| return { | |
| "execution_id": execution_id, | |
| "status": "error", | |
| "stdout": "", | |
| "stderr": str(e), | |
| "result": None, | |
| "plots": [], | |
| "dataframes": [] | |
| } | |
| interpreter_instance = CodeInterpreter() | |
| def execute_code_multilang(code: str, language: str = "python", file_path: Optional[str] = None) -> Dict[str, Any]: | |
| """ | |
| Executes code in various languages (Python, Bash, SQL, C, Java) using a sandboxed interpreter. | |
| Can execute code provided as a string or from a specified file path. | |
| If file_path is provided, the content of the file will be executed. | |
| If both code string and file_path are provided, the content of the file at file_path takes precedence. | |
| Args: | |
| code (str): The code string to execute. Ignored if file_path is provided and valid. | |
| language (str, optional): The programming language. Defaults to "python". | |
| Supported: "python", "bash", "sql", "c", "java". | |
| file_path (Optional[str], optional): Absolute path to a file containing the code to execute. | |
| If provided, its content overrides the 'code' argument. | |
| Returns: | |
| Dict[str, Any]: A dictionary containing execution results, including status, stdout, stderr, | |
| plots (for Python), and dataframes (for Python and SQL). | |
| """ | |
| interpreter = CodeInterpreter() | |
| return interpreter.execute_code(code=code, language=language, file_path=file_path) |