| import datetime |
| import time |
|
|
| import xml.etree.ElementTree as ET |
| from http.client import responses |
|
|
| import pandas as pd |
| import pytz |
| import requests |
| import os |
| import base64 |
| import io |
|
|
| from PIL import Image |
| from openai import AzureOpenAI |
| from six import binary_type |
| from smolagents import tool, DuckDuckGoSearchTool |
| from tavily import TavilyClient |
| from langchain_community.document_loaders import WikipediaLoader |
|
|
| |
| @tool |
| def wiki_search(query: str) -> str: |
| """Search Wikipedia for a query and return maximum 2 results. |
| Args: |
| query: The search query.""" |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| for doc in search_docs |
| ] |
| ) |
| return formatted_search_docs |
|
|
| |
| |
| client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"]) |
|
|
| @tool |
| def web_search(query: str) -> str: |
| """Search Tavily for a query and return up to 3 results. |
| Args: |
| query: The search query. |
| """ |
| try: |
| results = client.search(query=query, max_results=3) |
| formatted = "\n\n---\n\n".join( |
| f"<Document source='{item.get('url', '')}'>\n{item.get('content', '').strip()}\n</Document>" |
| for item in results.get("results", []) |
| ) |
| return formatted or "No relevant search results found." |
| except Exception as e: |
| return f"[web_search error]: {str(e)}" |
|
|
|
|
| |
| @tool |
| def get_current_time_in_timezone(timezone: str) -> str: |
| """Fetches the current local time in a specified timezone. |
| Args: |
| timezone: A string representing a valid timezone (e.g., 'America/New_York'). |
| """ |
| try: |
| tz = pytz.timezone(timezone) |
| local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") |
| return f"The current local time in {timezone} is: {local_time}" |
| except Exception as e: |
| return f"Error fetching time for timezone '{timezone}': {str(e)}" |
|
|
| |
| @tool |
| def visit_webpage(url: str) -> str: |
| """Fetches raw HTML content of a web page. |
| Args: |
| url: The url of the webpage. |
| """ |
| try: |
| response = requests.get(url, timeout=5) |
| return response.text |
| except Exception as e: |
| return f"[ERROR fetching {url}]: {str(e)}" |
|
|
| |
| @tool |
| def calculator_add(a: int, b: int) -> int: |
| """Add two numbers. |
| Args: |
| a: first int |
| b: second int |
| """ |
| return a + b |
|
|
| |
| @tool |
| def ocr(base64_image: str) -> str: |
| """Analyzes the content of an image using gpt-4o. |
| Args: |
| base64_image: A base64-encoded string of the image. |
| Returns: a string summary or description of what the image contains. |
| """ |
| client = AzureOpenAI( |
| azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), |
| api_key=os.environ.get("AZURE_OPENAI_API_KEY"), |
| api_version=os.environ.get("OPENAI_API_VERSION") |
| ) |
|
|
| response = client.chat.completions.create( |
| model=os.environ["AZURE_OPENAI_MODEL"], |
| messages=[ |
| {"role": "user", "content": [ |
| {"type": "text", "text": "Describe the image"}, |
| {"type": "image_url", "image_url": { |
| "url": "data:image/jpeg;base64," + base64_image |
| }} |
| ]} |
| ] |
| ) |
|
|
| return response.choices[0].message.content |
|
|
| |
| @tool |
| def parse_excel(base64_excel: str) -> str: |
| """ |
| Parses a base64-encoded Excel file and returns the first few rows as text. |
| Args: |
| base64_excel: Base64-encoded Excel file (.xlxs or .xls) |
| Returns: a preview of the Excel data (first 5 rows). |
| """ |
| try: |
| |
| binary_data = base64.b64decode(base64_excel) |
| df = pd.read_excel(io.BytesIO(binary_data)) |
|
|
| |
| preview = df.head().to_string(index=False) |
| return f"Excel preview: \n{preview}" |
|
|
| except Exception as e: |
| return f"[ERROR] Failed to parse Excel file: {str(e)}" |
|
|
| @tool |
| def arxiv_search(query: str) -> str: |
| """ |
| Search ArXiv for a query and return a summary of up to 3 papers. |
| Args: |
| query: The search string used to find relevant papers on ArXiv. |
| Returns: |
| A formatted string summarizing up to 3 relevant papers. |
| """ |
| try: |
| |
| url = "http://export.arxiv.org/api/query" |
| params = { |
| "search_query": query, |
| "start": 0, |
| "max_results": 3, |
| "sortBy": "relevance" |
| } |
| |
| response = requests.get(url, params=params, timeout=10) |
| response.raise_for_status() |
|
|
| |
| root = ET.fromstring(response.text) |
| ns = {"atom": "http://www.w3.org/2005/Atom"} |
| entries = root.findall("atom:entry", ns) |
|
|
| if not entries: |
| return "No results found on ArXiv" |
|
|
| results = [] |
| for entry in entries: |
| title = entry.find("atom:title", ns).text.strip() |
| summary = entry.find("atom:summary", ns).text.strip() |
| link = entry.find("atom:id", ns).text.strip() |
|
|
| results.append(f"📄 **{title}**\n🔗 {link}\n\n{summary[:1000]}") |
|
|
| return "\n\n---\n\n".join(results) |
|
|
| except Exception as e: |
| return f"[ArXiv tool error]: {str(e)}" |
|
|