|
|
import datetime |
|
|
import time |
|
|
|
|
|
import xml.etree.ElementTree as ET |
|
|
from http.client import responses |
|
|
|
|
|
import pandas as pd |
|
|
import pytz |
|
|
import requests |
|
|
import os |
|
|
import base64 |
|
|
import io |
|
|
|
|
|
from PIL import Image |
|
|
from openai import AzureOpenAI |
|
|
from six import binary_type |
|
|
from smolagents import tool, DuckDuckGoSearchTool |
|
|
from tavily import TavilyClient |
|
|
from langchain_community.document_loaders import WikipediaLoader |
|
|
|
|
|
|
|
|
@tool |
|
|
def wiki_search(query: str) -> str: |
|
|
"""Search Wikipedia for a query and return maximum 2 results. |
|
|
Args: |
|
|
query: The search query.""" |
|
|
search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
|
|
formatted_search_docs = "\n\n---\n\n".join( |
|
|
[ |
|
|
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
|
|
for doc in search_docs |
|
|
] |
|
|
) |
|
|
return formatted_search_docs |
|
|
|
|
|
|
|
|
|
|
|
client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"]) |
|
|
|
|
|
@tool |
|
|
def web_search(query: str) -> str: |
|
|
"""Search Tavily for a query and return up to 3 results. |
|
|
Args: |
|
|
query: The search query. |
|
|
""" |
|
|
try: |
|
|
results = client.search(query=query, max_results=3) |
|
|
formatted = "\n\n---\n\n".join( |
|
|
f"<Document source='{item.get('url', '')}'>\n{item.get('content', '').strip()}\n</Document>" |
|
|
for item in results.get("results", []) |
|
|
) |
|
|
return formatted or "No relevant search results found." |
|
|
except Exception as e: |
|
|
return f"[web_search error]: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
|
def get_current_time_in_timezone(timezone: str) -> str: |
|
|
"""Fetches the current local time in a specified timezone. |
|
|
Args: |
|
|
timezone: A string representing a valid timezone (e.g., 'America/New_York'). |
|
|
""" |
|
|
try: |
|
|
tz = pytz.timezone(timezone) |
|
|
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") |
|
|
return f"The current local time in {timezone} is: {local_time}" |
|
|
except Exception as e: |
|
|
return f"Error fetching time for timezone '{timezone}': {str(e)}" |
|
|
|
|
|
|
|
|
@tool |
|
|
def visit_webpage(url: str) -> str: |
|
|
"""Fetches raw HTML content of a web page. |
|
|
Args: |
|
|
url: The url of the webpage. |
|
|
""" |
|
|
try: |
|
|
response = requests.get(url, timeout=5) |
|
|
return response.text |
|
|
except Exception as e: |
|
|
return f"[ERROR fetching {url}]: {str(e)}" |
|
|
|
|
|
|
|
|
@tool |
|
|
def calculator_add(a: int, b: int) -> int: |
|
|
"""Add two numbers. |
|
|
Args: |
|
|
a: first int |
|
|
b: second int |
|
|
""" |
|
|
return a + b |
|
|
|
|
|
|
|
|
@tool |
|
|
def ocr(base64_image: str) -> str: |
|
|
"""Analyzes the content of an image using gpt-4o. |
|
|
Args: |
|
|
base64_image: A base64-encoded string of the image. |
|
|
Returns: a string summary or description of what the image contains. |
|
|
""" |
|
|
client = AzureOpenAI( |
|
|
azure_endpoint=os.environ.get("AZendpoint"), |
|
|
api_version=os.environ.get("api_version"), |
|
|
api_key=os.environ.get("api_key") |
|
|
) |
|
|
|
|
|
response = client.chat.completions.create( |
|
|
model=os.environ["model"], |
|
|
messages=[ |
|
|
{"role": "user", "content": [ |
|
|
{"type": "text", "text": "Describe the image"}, |
|
|
{"type": "image_url", "image_url": { |
|
|
"url": "data:image/jpeg;base64," + base64_image |
|
|
}} |
|
|
]} |
|
|
] |
|
|
) |
|
|
|
|
|
return response.choices[0].message.content |
|
|
|
|
|
|
|
|
@tool |
|
|
def parse_excel(base64_excel: str) -> str: |
|
|
""" |
|
|
Parses a base64-encoded Excel file and returns the first few rows as text. |
|
|
Args: |
|
|
base64_excel: Base64-encoded Excel file (.xlxs or .xls) |
|
|
Returns: a preview of the Excel data (first 5 rows). |
|
|
""" |
|
|
try: |
|
|
|
|
|
binary_data = base64.b64decode(base64_excel) |
|
|
df = pd.read_excel(io.BytesIO(binary_data)) |
|
|
|
|
|
|
|
|
preview = df.head().to_string(index=False) |
|
|
return f"Excel preview: \n{preview}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"[ERROR] Failed to parse Excel file: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def arxiv_search(query: str) -> str: |
|
|
""" |
|
|
Search ArXiv for a query and return a summary of up to 3 papers. |
|
|
Args: |
|
|
query: The search string used to find relevant papers on ArXiv. |
|
|
Returns: |
|
|
A formatted string summarizing up to 3 relevant papers. |
|
|
""" |
|
|
try: |
|
|
|
|
|
url = "http://export.arxiv.org/api/query" |
|
|
params = { |
|
|
"search_query": query, |
|
|
"start": 0, |
|
|
"max_results": 3, |
|
|
"sortBy": "relevance" |
|
|
} |
|
|
|
|
|
response = requests.get(url, params=params, timeout=10) |
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
root = ET.fromstring(response.text) |
|
|
ns = {"atom": "http://www.w3.org/2005/Atom"} |
|
|
entries = root.findall("atom:entry", ns) |
|
|
|
|
|
if not entries: |
|
|
return "No results found on ArXiv" |
|
|
|
|
|
results = [] |
|
|
for entry in entries: |
|
|
title = entry.find("atom:title", ns).text.strip() |
|
|
summary = entry.find("atom:summary", ns).text.strip() |
|
|
link = entry.find("atom:id", ns).text.strip() |
|
|
|
|
|
results.append(f"📄 **{title}**\n🔗 {link}\n\n{summary[:1000]}") |
|
|
|
|
|
return "\n\n---\n\n".join(results) |
|
|
|
|
|
except Exception as e: |
|
|
return f"[ArXiv tool error]: {str(e)}" |
|
|
|