Final_Assignment_Template_LAC

Sleeping

App Files Files Community

Final_Assignment_Template_LAC / tools.py

lcapriles

Commit 4

4f47d72 verified 6 months ago

raw

history blame contribute delete

5.87 kB

	import datetime
	import time

	import xml.etree.ElementTree as ET
	from http.client import responses

	import pandas as pd
	import pytz
	import requests
	import os
	import base64
	import io

	from PIL import Image
	from openai import AzureOpenAI
	from six import binary_type
	from smolagents import tool, DuckDuckGoSearchTool
	from tavily import TavilyClient
	from langchain_community.document_loaders import WikipediaLoader

	# === Tools ===
	@tool
	def wiki_search(query: str) -> str:
	"""Search Wikipedia for a query and return maximum 2 results.
	Args:
	query: The search query."""
	search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	]
	)
	return formatted_search_docs

	# tool for websearch capabilities
	# must improve fall back for timeout errors
	client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

	@tool
	def web_search(query: str) -> str:
	"""Search Tavily for a query and return up to 3 results.
	Args:
	query: The search query.
	"""
	try:
	results = client.search(query=query, max_results=3)
	formatted = "\n\n---\n\n".join(
	f"<Document source='{item.get('url', '')}'>\n{item.get('content', '').strip()}\n</Document>"
	for item in results.get("results", [])
	)
	return formatted or "No relevant search results found."
	except Exception as e:
	return f"[web_search error]: {str(e)}"


	# tool to obtain real current time zone
	@tool
	def get_current_time_in_timezone(timezone: str) -> str:
	"""Fetches the current local time in a specified timezone.
	Args:
	timezone: A string representing a valid timezone (e.g., 'America/New_York').
	"""
	try:
	tz = pytz.timezone(timezone)
	local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
	return f"The current local time in {timezone} is: {local_time}"
	except Exception as e:
	return f"Error fetching time for timezone '{timezone}': {str(e)}"

	# tool to get the HTML content of a web page
	@tool
	def visit_webpage(url: str) -> str:
	"""Fetches raw HTML content of a web page.
	Args:
	url: The url of the webpage.
	"""
	try:
	response = requests.get(url, timeout=5)
	return response.text#[:5000] # Limit length
	except Exception as e:
	return f"[ERROR fetching {url}]: {str(e)}"

	# tool for add operations
	@tool
	def calculator_add(a: int, b: int) -> int:
	"""Add two numbers.
	Args:
	a: first int
	b: second int
	"""
	return a + b

	# tool for image understanding
	@tool
	def ocr(base64_image: str) -> str:
	"""Analyzes the content of an image using gpt-4o.
	Args:
	base64_image: A base64-encoded string of the image.
	Returns: a string summary or description of what the image contains.
	"""
	client = AzureOpenAI(
	azure_endpoint=os.environ.get("AZendpoint"),
	api_version=os.environ.get("api_version"),
	api_key=os.environ.get("api_key")
	)

	response = client.chat.completions.create(
	model=os.environ["model"],
	messages=[
	{"role": "user", "content": [
	{"type": "text", "text": "Describe the image"},
	{"type": "image_url", "image_url": {
	"url": "data:image/jpeg;base64," + base64_image
	}}
	]}
	]
	)

	return response.choices[0].message.content

	# tool for data parsing
	@tool
	def parse_excel(base64_excel: str) -> str:
	"""
	Parses a base64-encoded Excel file and returns the first few rows as text.
	Args:
	base64_excel: Base64-encoded Excel file (.xlxs or .xls)
	Returns: a preview of the Excel data (first 5 rows).
	"""
	try:
	# decode base64 and read into a df
	binary_data = base64.b64decode(base64_excel)
	df = pd.read_excel(io.BytesIO(binary_data))

	#optional customize logic based on column names
	preview = df.head().to_string(index=False)
	return f"Excel preview: \n{preview}"

	except Exception as e:
	return f"[ERROR] Failed to parse Excel file: {str(e)}"

	@tool
	def arxiv_search(query: str) -> str:
	"""
	Search ArXiv for a query and return a summary of up to 3 papers.
	Args:
	query: The search string used to find relevant papers on ArXiv.
	Returns:
	A formatted string summarizing up to 3 relevant papers.
	"""
	try:
	# api url and query parameters
	url = "http://export.arxiv.org/api/query"
	params = {
	"search_query": query,
	"start": 0,
	"max_results": 3,
	"sortBy": "relevance"
	}
	# making the api request
	response = requests.get(url, params=params, timeout=10)
	response.raise_for_status()

	# parse the xml response
	root = ET.fromstring(response.text) # converts the xml string into an element tree object
	ns = {"atom": "http://www.w3.org/2005/Atom"} # declares xml namespaces (required for correct parsing)
	entries = root.findall("atom:entry", ns) # retrieves all <entry> elements from the feed

	if not entries:
	return "No results found on ArXiv"

	results = []
	for entry in entries:
	title = entry.find("atom:title", ns).text.strip()
	summary = entry.find("atom:summary", ns).text.strip()
	link = entry.find("atom:id", ns).text.strip()

	results.append(f"📄 {title}\n🔗 {link}\n\n{summary[:1000]}")

	return "\n\n---\n\n".join(results)

	except Exception as e:
	return f"[ArXiv tool error]: {str(e)}"