import datetime import time import xml.etree.ElementTree as ET from http.client import responses import pandas as pd import pytz import requests import os import base64 import io from PIL import Image from openai import AzureOpenAI from six import binary_type from smolagents import tool, DuckDuckGoSearchTool from tavily import TavilyClient from langchain_community.document_loaders import WikipediaLoader # === Tools === @tool def wiki_search(query: str) -> str: """Search Wikipedia for a query and return maximum 2 results. Args: query: The search query.""" search_docs = WikipediaLoader(query=query, load_max_docs=2).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ] ) return formatted_search_docs # tool for websearch capabilities # must improve fall back for timeout errors client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"]) @tool def web_search(query: str) -> str: """Search Tavily for a query and return up to 3 results. Args: query: The search query. """ try: results = client.search(query=query, max_results=3) formatted = "\n\n---\n\n".join( f"\n{item.get('content', '').strip()}\n" for item in results.get("results", []) ) return formatted or "No relevant search results found." except Exception as e: return f"[web_search error]: {str(e)}" # tool to obtain real current time zone @tool def get_current_time_in_timezone(timezone: str) -> str: """Fetches the current local time in a specified timezone. Args: timezone: A string representing a valid timezone (e.g., 'America/New_York'). """ try: tz = pytz.timezone(timezone) local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") return f"The current local time in {timezone} is: {local_time}" except Exception as e: return f"Error fetching time for timezone '{timezone}': {str(e)}" # tool to get the HTML content of a web page @tool def visit_webpage(url: str) -> str: """Fetches raw HTML content of a web page. Args: url: The url of the webpage. """ try: response = requests.get(url, timeout=5) return response.text#[:5000] # Limit length except Exception as e: return f"[ERROR fetching {url}]: {str(e)}" # tool for add operations @tool def calculator_add(a: int, b: int) -> int: """Add two numbers. Args: a: first int b: second int """ return a + b # tool for image understanding @tool def ocr(base64_image: str) -> str: """Analyzes the content of an image using gpt-4o. Args: base64_image: A base64-encoded string of the image. Returns: a string summary or description of what the image contains. """ client = AzureOpenAI( azure_endpoint=os.environ.get("AZendpoint"), api_version=os.environ.get("api_version"), api_key=os.environ.get("api_key") ) response = client.chat.completions.create( model=os.environ["model"], messages=[ {"role": "user", "content": [ {"type": "text", "text": "Describe the image"}, {"type": "image_url", "image_url": { "url": "data:image/jpeg;base64," + base64_image }} ]} ] ) return response.choices[0].message.content # tool for data parsing @tool def parse_excel(base64_excel: str) -> str: """ Parses a base64-encoded Excel file and returns the first few rows as text. Args: base64_excel: Base64-encoded Excel file (.xlxs or .xls) Returns: a preview of the Excel data (first 5 rows). """ try: # decode base64 and read into a df binary_data = base64.b64decode(base64_excel) df = pd.read_excel(io.BytesIO(binary_data)) #optional customize logic based on column names preview = df.head().to_string(index=False) return f"Excel preview: \n{preview}" except Exception as e: return f"[ERROR] Failed to parse Excel file: {str(e)}" @tool def arxiv_search(query: str) -> str: """ Search ArXiv for a query and return a summary of up to 3 papers. Args: query: The search string used to find relevant papers on ArXiv. Returns: A formatted string summarizing up to 3 relevant papers. """ try: # api url and query parameters url = "http://export.arxiv.org/api/query" params = { "search_query": query, "start": 0, "max_results": 3, "sortBy": "relevance" } # making the api request response = requests.get(url, params=params, timeout=10) response.raise_for_status() # parse the xml response root = ET.fromstring(response.text) # converts the xml string into an element tree object ns = {"atom": "http://www.w3.org/2005/Atom"} # declares xml namespaces (required for correct parsing) entries = root.findall("atom:entry", ns) # retrieves all elements from the feed if not entries: return "No results found on ArXiv" results = [] for entry in entries: title = entry.find("atom:title", ns).text.strip() summary = entry.find("atom:summary", ns).text.strip() link = entry.find("atom:id", ns).text.strip() results.append(f"šŸ“„ **{title}**\nšŸ”— {link}\n\n{summary[:1000]}") return "\n\n---\n\n".join(results) except Exception as e: return f"[ArXiv tool error]: {str(e)}"