lcapriles commited on
Commit
9b1db64
·
verified ·
1 Parent(s): cc8200f
Files changed (1) hide show
  1. tools.py +184 -0
tools.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import time
3
+
4
+ import xml.etree.ElementTree as ET
5
+ from http.client import responses
6
+
7
+ import pandas as pd
8
+ import pytz
9
+ import requests
10
+ import os
11
+ import base64
12
+ import io
13
+
14
+ from PIL import Image
15
+ from openai import AzureOpenAI
16
+ from six import binary_type
17
+ from smolagents import tool, DuckDuckGoSearchTool
18
+ from tavily import TavilyClient
19
+ from langchain_community.document_loaders import WikipediaLoader
20
+
21
+ # === Tools ===
22
+ @tool
23
+ def wiki_search(query: str) -> str:
24
+ """Search Wikipedia for a query and return maximum 2 results.
25
+ Args:
26
+ query: The search query."""
27
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
28
+ formatted_search_docs = "\n\n---\n\n".join(
29
+ [
30
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
31
+ for doc in search_docs
32
+ ]
33
+ )
34
+ return formatted_search_docs
35
+
36
+ # tool for websearch capabilities
37
+ # must improve fall back for timeout errors
38
+ client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
39
+
40
+ @tool
41
+ def web_search(query: str) -> str:
42
+ """Search Tavily for a query and return up to 3 results.
43
+ Args:
44
+ query: The search query.
45
+ """
46
+ try:
47
+ results = client.search(query=query, max_results=3)
48
+ formatted = "\n\n---\n\n".join(
49
+ f"<Document source='{item.get('url', '')}'>\n{item.get('content', '').strip()}\n</Document>"
50
+ for item in results.get("results", [])
51
+ )
52
+ return formatted or "No relevant search results found."
53
+ except Exception as e:
54
+ return f"[web_search error]: {str(e)}"
55
+
56
+
57
+ # tool to obtain real current time zone
58
+ @tool
59
+ def get_current_time_in_timezone(timezone: str) -> str:
60
+ """Fetches the current local time in a specified timezone.
61
+ Args:
62
+ timezone: A string representing a valid timezone (e.g., 'America/New_York').
63
+ """
64
+ try:
65
+ tz = pytz.timezone(timezone)
66
+ local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
67
+ return f"The current local time in {timezone} is: {local_time}"
68
+ except Exception as e:
69
+ return f"Error fetching time for timezone '{timezone}': {str(e)}"
70
+
71
+ # tool to get the HTML content of a web page
72
+ @tool
73
+ def visit_webpage(url: str) -> str:
74
+ """Fetches raw HTML content of a web page.
75
+ Args:
76
+ url: The url of the webpage.
77
+ """
78
+ try:
79
+ response = requests.get(url, timeout=5)
80
+ return response.text#[:5000] # Limit length
81
+ except Exception as e:
82
+ return f"[ERROR fetching {url}]: {str(e)}"
83
+
84
+ # tool for add operations
85
+ @tool
86
+ def calculator_add(a: int, b: int) -> int:
87
+ """Add two numbers.
88
+ Args:
89
+ a: first int
90
+ b: second int
91
+ """
92
+ return a + b
93
+
94
+ # tool for image understanding
95
+ @tool
96
+ def ocr(base64_image: str) -> str:
97
+ """Analyzes the content of an image using gpt-4o.
98
+ Args:
99
+ base64_image: A base64-encoded string of the image.
100
+ Returns: a string summary or description of what the image contains.
101
+ """
102
+ client = AzureOpenAI(
103
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
104
+ api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
105
+ api_version=os.environ.get("OPENAI_API_VERSION")
106
+ )
107
+
108
+ response = client.chat.completions.create(
109
+ model=os.environ["AZURE_OPENAI_MODEL"],
110
+ messages=[
111
+ {"role": "user", "content": [
112
+ {"type": "text", "text": "Describe the image"},
113
+ {"type": "image_url", "image_url": {
114
+ "url": "data:image/jpeg;base64," + base64_image
115
+ }}
116
+ ]}
117
+ ]
118
+ )
119
+
120
+ return response.choices[0].message.content
121
+
122
+ # tool for data parsing
123
+ @tool
124
+ def parse_excel(base64_excel: str) -> str:
125
+ """
126
+ Parses a base64-encoded Excel file and returns the first few rows as text.
127
+ Args:
128
+ base64_excel: Base64-encoded Excel file (.xlxs or .xls)
129
+ Returns: a preview of the Excel data (first 5 rows).
130
+ """
131
+ try:
132
+ # decode base64 and read into a df
133
+ binary_data = base64.b64decode(base64_excel)
134
+ df = pd.read_excel(io.BytesIO(binary_data))
135
+
136
+ #optional customize logic based on column names
137
+ preview = df.head().to_string(index=False)
138
+ return f"Excel preview: \n{preview}"
139
+
140
+ except Exception as e:
141
+ return f"[ERROR] Failed to parse Excel file: {str(e)}"
142
+
143
+ @tool
144
+ def arxiv_search(query: str) -> str:
145
+ """
146
+ Search ArXiv for a query and return a summary of up to 3 papers.
147
+ Args:
148
+ query: The search string used to find relevant papers on ArXiv.
149
+ Returns:
150
+ A formatted string summarizing up to 3 relevant papers.
151
+ """
152
+ try:
153
+ # api url and query parameters
154
+ url = "http://export.arxiv.org/api/query"
155
+ params = {
156
+ "search_query": query,
157
+ "start": 0,
158
+ "max_results": 3,
159
+ "sortBy": "relevance"
160
+ }
161
+ # making the api request
162
+ response = requests.get(url, params=params, timeout=10)
163
+ response.raise_for_status()
164
+
165
+ # parse the xml response
166
+ root = ET.fromstring(response.text) # converts the xml string into an element tree object
167
+ ns = {"atom": "http://www.w3.org/2005/Atom"} # declares xml namespaces (required for correct parsing)
168
+ entries = root.findall("atom:entry", ns) # retrieves all <entry> elements from the feed
169
+
170
+ if not entries:
171
+ return "No results found on ArXiv"
172
+
173
+ results = []
174
+ for entry in entries:
175
+ title = entry.find("atom:title", ns).text.strip()
176
+ summary = entry.find("atom:summary", ns).text.strip()
177
+ link = entry.find("atom:id", ns).text.strip()
178
+
179
+ results.append(f"📄 **{title}**\n🔗 {link}\n\n{summary[:1000]}")
180
+
181
+ return "\n\n---\n\n".join(results)
182
+
183
+ except Exception as e:
184
+ return f"[ArXiv tool error]: {str(e)}"