Final_Assignment_Template

Sleeping

Final_Assignment_Template / tools /codetools.py

Add new tools and functionalities for audio transcription, code execution, document handling, image processing, and mathematical operations

d303e2f 9 months ago

raw

history blame contribute delete

13.4 kB

	from langchain_core.tools import tool
	import os
	import io
	import sys
	import uuid
	import base64
	import traceback
	import contextlib
	import tempfile
	import subprocess
	import sqlite3
	from typing import Dict, List, Any, Optional, Union
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from PIL import Image

	class CodeInterpreter:
	def __init__(self, allowed_modules=None, max_execution_time=30, working_directory=None):
	"""Initialize the code interpreter with safety measures."""
	self.allowed_modules = allowed_modules or [
	"numpy", "pandas", "matplotlib", "scipy", "sklearn",
	"math", "random", "statistics", "datetime", "collections",
	"itertools", "functools", "operator", "re", "json",
	"sympy", "networkx", "nltk", "PIL", "pytesseract",
	"cmath", "uuid", "tempfile", "requests", "urllib", "os", "io", "sys", "base64", "traceback", "contextlib", "sqlite3"
	]
	self.max_execution_time = max_execution_time
	self.working_directory = working_directory or os.path.join(os.getcwd())
	if not os.path.exists(self.working_directory):
	os.makedirs(self.working_directory)

	self.globals = {
	"__builtins__": __builtins__,
	"np": np,
	"pd": pd,
	"plt": plt,
	"Image": Image,
	}
	self.temp_sqlite_db = os.path.join(tempfile.gettempdir(), "code_exec.db")

	def execute_code(self, code: str, language: str = "python", file_path: Optional[str] = None) -> Dict[str, Any]:
	"""Execute the provided code or code from a file in the selected programming language."""
	language = language.lower()
	execution_id = str(uuid.uuid4())

	result = {
	"execution_id": execution_id,
	"status": "error",
	"stdout": "",
	"stderr": "",
	"result": None,
	"plots": [],
	"dataframes": []
	}

	current_code = code
	if file_path:
	if not os.path.exists(file_path):
	result["stderr"] = f"Error: File not found at {file_path}"
	return result
	if not os.path.isfile(file_path):
	result["stderr"] = f"Error: Path {file_path} is not a file."
	return result
	try:
	with open(file_path, "r", encoding='utf-8') as f:
	current_code = f.read()
	if not current_code.strip() and code.strip(): # If file is empty but code arg has content
	# This case might be ambiguous. Prioritize file content if path is given.
	# If file is truly empty, and code arg was also meant to be empty, it will proceed.
	# If code arg had content and file was empty, it implies user might want to run content of code arg.
	# For now, if file_path is provided, its content (even if empty) takes precedence.
	# If the intention is to run `code` when `file_path` is empty, the caller should not provide `file_path`.
	pass # current_code is already empty string from file
	elif not current_code.strip() and not code.strip():
	result["stderr"] = "Error: Both provided code string and file content are empty."
	return result

	except Exception as e:
	result["stderr"] = f"Error reading file {file_path}: {str(e)}"
	return result
	elif not code.strip(): # No file_path and code string is empty
	result["stderr"] = "Error: No code provided either as a string or a file path."
	return result

	try:
	if language == "python":
	return self._execute_python(current_code, execution_id)
	elif language == "bash":
	return self._execute_bash(current_code, execution_id)
	elif language == "sql":
	return self._execute_sql(current_code, execution_id)
	elif language == "c":
	return self._execute_c(current_code, execution_id)
	elif language == "java":
	return self._execute_java(current_code, execution_id)
	else:
	result["stderr"] = f"Unsupported language: {language}"
	except Exception as e:
	result["stderr"] = str(e)

	return result

	def _execute_python(self, code: str, execution_id: str) -> dict:
	output_buffer = io.StringIO()
	error_buffer = io.StringIO()
	result = {
	"execution_id": execution_id,
	"status": "error",
	"stdout": "",
	"stderr": "",
	"result": None,
	"plots": [],
	"dataframes": []
	}

	try:
	exec_dir = os.path.join(self.working_directory, execution_id)
	os.makedirs(exec_dir, exist_ok=True)
	plt.switch_backend('Agg')

	with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(error_buffer):
	exec_result = exec(code, self.globals)

	if plt.get_fignums():
	for i, fig_num in enumerate(plt.get_fignums()):
	fig = plt.figure(fig_num)
	img_path = os.path.join(exec_dir, f"plot_{i}.png")
	fig.savefig(img_path)
	with open(img_path, "rb") as img_file:
	img_data = base64.b64encode(img_file.read()).decode('utf-8')
	result["plots"].append({
	"figure_number": fig_num,
	"data": img_data
	})

	for var_name, var_value in self.globals.items():
	if isinstance(var_value, pd.DataFrame) and len(var_value) > 0:
	result["dataframes"].append({
	"name": var_name,
	"head": var_value.head().to_dict(),
	"shape": var_value.shape,
	"dtypes": str(var_value.dtypes)
	})

	result["status"] = "success"
	result["stdout"] = output_buffer.getvalue()
	result["result"] = exec_result

	except Exception as e:
	result["status"] = "error"
	result["stderr"] = f"{error_buffer.getvalue()}\n{traceback.format_exc()}"

	return result

	def _execute_bash(self, code: str, execution_id: str) -> dict:
	try:
	completed = subprocess.run(
	code, shell=True, capture_output=True, text=True, timeout=self.max_execution_time
	)
	return {
	"execution_id": execution_id,
	"status": "success" if completed.returncode == 0 else "error",
	"stdout": completed.stdout,
	"stderr": completed.stderr,
	"result": None,
	"plots": [],
	"dataframes": []
	}
	except subprocess.TimeoutExpired:
	return {
	"execution_id": execution_id,
	"status": "error",
	"stdout": "",
	"stderr": "Execution timed out.",
	"result": None,
	"plots": [],
	"dataframes": []
	}

	def _execute_sql(self, code: str, execution_id: str) -> dict:
	result = {
	"execution_id": execution_id,
	"status": "error",
	"stdout": "",
	"stderr": "",
	"result": None,
	"plots": [],
	"dataframes": []
	}
	try:
	conn = sqlite3.connect(self.temp_sqlite_db)
	cur = conn.cursor()
	cur.execute(code)
	if code.strip().lower().startswith("select"):
	columns = [description[0] for description in cur.description]
	rows = cur.fetchall()
	df = pd.DataFrame(rows, columns=columns)
	result["dataframes"].append({
	"name": "query_result",
	"head": df.head().to_dict(),
	"shape": df.shape,
	"dtypes": str(df.dtypes)
	})
	else:
	conn.commit()

	result["status"] = "success"
	result["stdout"] = "Query executed successfully."

	except Exception as e:
	result["stderr"] = str(e)
	finally:
	conn.close()

	return result

	def _execute_c(self, code: str, execution_id: str) -> dict:
	temp_dir = tempfile.mkdtemp()
	source_path = os.path.join(temp_dir, "program.c")
	binary_path = os.path.join(temp_dir, "program")

	try:
	with open(source_path, "w") as f:
	f.write(code)

	compile_proc = subprocess.run(
	["gcc", source_path, "-o", binary_path],
	capture_output=True, text=True, timeout=self.max_execution_time
	)
	if compile_proc.returncode != 0:
	return {
	"execution_id": execution_id,
	"status": "error",
	"stdout": compile_proc.stdout,
	"stderr": compile_proc.stderr,
	"result": None,
	"plots": [],
	"dataframes": []
	}

	run_proc = subprocess.run(
	[binary_path],
	capture_output=True, text=True, timeout=self.max_execution_time
	)
	return {
	"execution_id": execution_id,
	"status": "success" if run_proc.returncode == 0 else "error",
	"stdout": run_proc.stdout,
	"stderr": run_proc.stderr,
	"result": None,
	"plots": [],
	"dataframes": []
	}
	except Exception as e:
	return {
	"execution_id": execution_id,
	"status": "error",
	"stdout": "",
	"stderr": str(e),
	"result": None,
	"plots": [],
	"dataframes": []
	}

	def _execute_java(self, code: str, execution_id: str) -> dict:
	temp_dir = tempfile.mkdtemp()
	source_path = os.path.join(temp_dir, "Main.java")

	try:
	with open(source_path, "w") as f:
	f.write(code)

	compile_proc = subprocess.run(
	["javac", source_path],
	capture_output=True, text=True, timeout=self.max_execution_time
	)
	if compile_proc.returncode != 0:
	return {
	"execution_id": execution_id,
	"status": "error",
	"stdout": compile_proc.stdout,
	"stderr": compile_proc.stderr,
	"result": None,
	"plots": [],
	"dataframes": []
	}

	run_proc = subprocess.run(
	["java", "-cp", temp_dir, "Main"],
	capture_output=True, text=True, timeout=self.max_execution_time
	)
	return {
	"execution_id": execution_id,
	"status": "success" if run_proc.returncode == 0 else "error",
	"stdout": run_proc.stdout,
	"stderr": run_proc.stderr,
	"result": None,
	"plots": [],
	"dataframes": []
	}
	except Exception as e:
	return {
	"execution_id": execution_id,
	"status": "error",
	"stdout": "",
	"stderr": str(e),
	"result": None,
	"plots": [],
	"dataframes": []
	}


	interpreter_instance = CodeInterpreter()

	@tool
	def execute_code_multilang(code: str, language: str = "python", file_path: Optional[str] = None) -> Dict[str, Any]:
	"""
	Executes code in various languages (Python, Bash, SQL, C, Java) using a sandboxed interpreter.
	Can execute code provided as a string or from a specified file path.
	If file_path is provided, the content of the file will be executed.
	If both code string and file_path are provided, the content of the file at file_path takes precedence.

	Args:
	code (str): The code string to execute. Ignored if file_path is provided and valid.
	language (str, optional): The programming language. Defaults to "python".
	Supported: "python", "bash", "sql", "c", "java".
	file_path (Optional[str], optional): Absolute path to a file containing the code to execute.
	If provided, its content overrides the 'code' argument.

	Returns:
	Dict[str, Any]: A dictionary containing execution results, including status, stdout, stderr,
	plots (for Python), and dataframes (for Python and SQL).
	"""
	interpreter = CodeInterpreter()
	return interpreter.execute_code(code=code, language=language, file_path=file_path)