rookie_fix_this

Sleeping

App Files Files Community

rookie_fix_this / agent_tools.py

ErdemTheFixer

Create agent_tools.py

2144dc0 verified 2 months ago

raw

history blame contribute delete

11 kB

	import io
	import os
	import re
	import sys
	from typing import List, Callable, Any

	import openai
	import pandas as pd
	import requests
	from dotenv import load_dotenv
	from google import genai
	from google.genai import types
	from langchain_community.document_loaders import WebBaseLoader, ImageCaptionLoader, WikipediaLoader, ArxivLoader
	from langchain_community.tools import DuckDuckGoSearchResults
	from langchain_core.tools import tool
	from langchain_text_splitters import CharacterTextSplitter

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	load_dotenv()


	@tool(description="Multiply two integers and return the result")
	def multiply(a: int, b: int) -> int:
	return a * b


	@tool(description="Add two integers and return the result")
	def add(a: int, b: int) -> int:
	return a + b


	@tool(description="Subtract the second integer from the first and return the result")
	def subtract(a: int, b: int) -> int:
	return a - b


	@tool(
	description="Divide the first integer by the second and return the result; raises an error if the second integer is zero")
	def divide(a: int, b: int) -> float:
	if b == 0:
	raise ValueError("Cannot divide by zero.")
	return a / b


	@tool(description="Return the remainder of dividing the first integer by the second")
	def modulus(a: int, b: int) -> int:
	return a % b


	@tool(description="""
	Searches for a Wikipedia articles using the provided query and returns the content of the corresponding Wikipedia pages.
	Args:
	query (str): The search term to look up on Wikipedia.
	Returns:
	str: The text content of the Wikipedia articles related to the query.
	""")
	def wiki_search(query: str) -> str:
	print("wiki_search called with:", query)
	search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	])
	return formatted_search_docs


	@tool(description="""
	Fetches raw HTML content of a web page.
	Args:
	url: the webpage url
	Returns:
	str: The combined raw text content of the webpage
	""")
	def visit_webpage(url: str) -> str:
	try:
	response = requests.get(url, timeout=5)
	return response.text[:5000]
	except Exception as e:
	return f"[ERROR fetching {url}]: {str(e)}"


	@tool(description="""
	Performs a web search using the given query, downloads the content of two relevant web pages,
	and returns their combined content as a raw string.
	This is useful when the task requires analysis of web page content, such as retrieving poems,
	changelogs, or other textual resources.
	Args:
	query (str): The search query.
	Returns:
	str: The combined raw text content of the two retrieved web pages.
	""")
	def duckduck_websearch(query: str) -> str:
	search_engine = DuckDuckGoSearchResults(output_format="list", num_results=2)
	results = search_engine.invoke({"query": query})
	page_urls = [url["link"] for url in results]

	loader = WebBaseLoader(web_paths=page_urls)
	docs = loader.load()

	combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)

	# Clean up excessive newlines, spaces and strip leading/trailing whitespace
	cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
	cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)

	# Strip leading/trailing whitespace
	cleaned_text = cleaned_text.strip()
	return cleaned_text


	@tool(description="""
	Splits text into chunks using LangChain's CharacterTextSplitter.
	Args:
	text: A string of text to split.
	Returns:
	List[str]: a list of split text
	""")
	def text_splitter(text: str) -> List[str]:
	splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10)
	return splitter.split_text(text)


	@tool(description="""
	First download the file, then read its content
	Args:
	dir: the task_id
	Returns:
	str: the file content
	""")
	def read_file(task_id: str) -> str:
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp', "wb") as fp:
	fp.write(r.content)
	with open('temp') as f:
	return f.read()


	@tool(description="""
	First download the excel file, then read its content
	Args:
	task_id: the task_id
	Returns:
	str: the content of excel file
	""")
	def excel_read(task_id: str) -> str:
	try:
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp.xlsx', "wb") as fp:
	fp.write(r.content)
	# Read the Excel file
	df = pd.read_excel('temp.xlsx')
	# Run various analyses based on the query
	result = (
	f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	)
	result += f"Columns: {', '.join(df.columns)}\n\n"
	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())
	return result
	except Exception as e:
	return f"Error analyzing Excel file: {str(e)}"


	@tool(description="""
	First download the csv file, then read its content
	Args:
	dir: the task_id
	Returns:
	str: the content of csv file
	""")
	def csv_read(task_id: str) -> str:
	try:
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	with open('temp.csv', "wb") as fp:
	fp.write(r.content)
	# Read the CSV file
	df = pd.read_csv('temp.csv')
	# Run various analyses based on the query
	result = (
	f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	)
	result += f"Columns: {', '.join(df.columns)}\n\n"
	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())
	return result
	except Exception as e:
	return f"Error analyzing CSV file: {str(e)}"


	@tool(description="""
	Understand the content of the provided image
	Args:
	dir: the image url link
	Returns:
	str: the image caption
	""")
	def image_caption(task_id: str) -> str:
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'
	loader = ImageCaptionLoader(images=[file_url])
	metadata = loader.load()
	return metadata[0].page_content


	@tool(description="""
	Analyzes a YouTube video from the provided URL and returns an answer
	to the given question based on the analysis results.
	Args:
	youtube_url (str): The URL of the YouTube video, in the format
	"https://www.youtube.com/...".
	question (str): A question related to the content of the video.
	Returns:
	str: An answer to the question based on the video's content.
	""")
	def youtube_search(youtube_url: str, question: str) -> str:
	client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
	response = client.models.generate_content(
	model='models/gemini-2.5-flash',
	contents=types.Content(
	parts=[
	types.Part(
	file_data=types.FileData(file_uri=youtube_url)
	),
	types.Part(text=question)
	]
	)
	)
	return response.text


	@tool(description=
	"""Search Arxiv for a query and return maximum 3 result.
	Args:
	query: The search query.""")
	def arvix_search(query: str) -> str:
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	])
	return formatted_search_docs


	@tool(description= """
	First download the mp3 file, then listen to it

	Args:
	dir: the task_id

	Returns:
	str: the content of mp3 file
	""")
	def whisper_transcribe_api(task_id: str) -> str:
	openai.api_key = os.getenv("OPENAI_API_KEY")
	file_url = f'{DEFAULT_API_URL}/files/{task_id}'

	try:
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	temp_path = 'temp.mp3'
	with open(temp_path, "wb") as fp:
	fp.write(r.content)
	with open(temp_path, "rb") as audio_file:
	transcript = openai.audio.transcriptions.create(
	file=audio_file,
	model="whisper-1"
	)
	return transcript.text
	except Exception as e:
	return f"Error transcribing audio: {e}"


	@tool(description="""
	Execute Python code from a file identified by task_id and file_name.
	Returns the numeric result if defined, otherwise stdout.
	""")
	def run_python_file(task_id: str, file_name: str) -> str:
	file_path = file_name
	buffer = io.StringIO()
	old_stdout = sys.stdout
	ns = {"__builtins__": __builtins__, "__name__": "__main__"}
	try:
	file_url = f"{DEFAULT_API_URL}/files/{task_id}"
	r = requests.get(file_url, timeout=15, allow_redirects=True)
	if r.status_code != 200:
	return f"❌ Failed to download file: {r.status_code}"

	with open(file_path, "wb") as f:
	f.write(r.content)

	with open(file_path, "r", encoding="utf-8", errors="replace") as f:
	code = f.read()

	sys.stdout = buffer
	try:
	compiled = compile(code, file_path, "exec")
	exec(compiled, ns, ns)
	finally:
	sys.stdout = old_stdout

	if "result" in ns:
	return str(ns["result"])
	else:
	output = buffer.getvalue().strip()
	return output or "No output produced."

	except Exception as e:
	# Prefer returning a computed result or any partial stdout if available
	try:
	sys.stdout = old_stdout
	except Exception:
	pass
	if "result" in ns:
	return str(ns["result"])
	output = buffer.getvalue().strip()
	if output:
	return output
	return f"❌ Error executing Python file: {e}"
	finally:
	# Ensure the downloaded code file is removed after execution
	try:
	if os.path.exists(file_path):
	os.remove(file_path)
	except Exception:
	pass



	TOOLS: List[Callable[..., Any]] = [
	multiply,
	add,
	subtract,
	divide,
	modulus,
	duckduck_websearch,
	arvix_search,
	wiki_search,
	visit_webpage,
	youtube_search,
	text_splitter,
	read_file,
	excel_read,
	csv_read,
	image_caption,
	whisper_transcribe_api,
	run_python_file
	]