bstraehle commited on
Commit
c0b37e6
·
verified ·
1 Parent(s): 77d81e0

Create tools/ai_tools.py

Browse files
Files changed (1) hide show
  1. agents/tools/ai_tools.py +386 -0
agents/tools/ai_tools.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from crewai.tools import tool
4
+ from crewai_tools import StagehandTool
5
+ from google import genai
6
+ from google.genai import types
7
+ from mcp.mcp_client import call_mcp_tool, MCP_SSE_URL, MCP_TOOL
8
+ from utils import (
9
+ read_docx_text,
10
+ read_pptx_text,
11
+ is_ext
12
+ )
13
+
14
+ # LLMs
15
+
16
+ WEB_SEARCH_MODEL = "gemini-2.5-flash"
17
+ IMAGE_ANALYSIS_MODEL = "gemini-2.5-flash"
18
+ AUDIO_ANALYSIS_MODEL = "gemini-2.5-flash"
19
+ VIDEO_ANALYSIS_MODEL = "gemini-2.5-flash"
20
+ YOUTUBE_ANALYSIS_MODEL = "gemini-2.5-flash"
21
+ DOCUMENT_ANALYSIS_MODEL = "gemini-2.5-flash"
22
+ ARITHMETIC_MODEL = "gemini-2.5-flash"
23
+ CODE_GENERATION_MODEL = "gemini-2.5-flash"
24
+ CODE_EXECUTION_MODEL = "gemini-2.5-flash"
25
+
26
+ WEB_BROWSER_MODEL = "claude-sonnet-4-5-latest"
27
+
28
+ IMG_TO_FEN_PROMPT = """Analyze this chess board image and convert it to FEN (Forsyth-Edwards Notation).
29
+
30
+ Rules:
31
+ - Start from rank 8 (top) to rank 1 (bottom)
32
+ - For each rank, go from file a (left) to file h (right)
33
+ - Use uppercase for white pieces: K=King, Q=Queen, R=Rook, B=Bishop, N=Knight, P=Pawn
34
+ - Use lowercase for black pieces: k, q, r, b, n, p
35
+ - Use numbers (1-8) for consecutive empty squares
36
+ - Separate ranks with '/'
37
+ - Only provide the piece placement portion of FEN (the first field)
38
+ - Do not include active color, castling rights, en passant, or move counters
39
+
40
+ Example: "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR" for the starting position.
41
+
42
+ Return ONLY the FEN string, nothing else."""
43
+
44
+ class AITools():
45
+ @tool("Web Search Tool")
46
+ def web_search_tool(question: str) -> str:
47
+ """Given a question only, search the web to answer the question.
48
+
49
+ Args:
50
+ question (str): Question to answer
51
+
52
+ Returns:
53
+ str: Answer to the question
54
+
55
+ Raises:
56
+ RuntimeError: If processing fails"""
57
+ try:
58
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
59
+
60
+ response = client.models.generate_content(
61
+ model=WEB_SEARCH_MODEL,
62
+ contents=question,
63
+ config=types.GenerateContentConfig(
64
+ tools=[types.Tool(google_search=types.GoogleSearch())]
65
+ )
66
+ )
67
+
68
+ return response.text.strip()
69
+ except Exception as e:
70
+ raise RuntimeError(f"Processing failed: {str(e)}")
71
+
72
+ @tool("Web Browser Tool")
73
+ def web_browser_tool(question: str, url: str) -> str:
74
+ """Given a question and URL, load the URL and act, extract, or observe to answer the question.
75
+
76
+ Args:
77
+ question (str): Question about a URL
78
+ url (str): The URL
79
+
80
+ Returns:
81
+ str: Answer to the question
82
+
83
+ Raises:
84
+ RuntimeError: If processing fails"""
85
+ try:
86
+ stagehand_tool = StagehandTool(
87
+ api_key=os.environ["BROWSERBASE_API_KEY"],
88
+ project_id=os.environ["BROWSERBASE_PROJECT_ID"],
89
+ model_api_key=os.environ["MODEL_API_KEY"],
90
+ model_name=WEB_BROWSER_MODEL,
91
+ dom_settle_timeout_ms=5000,
92
+ headless=True,
93
+ self_heal=True,
94
+ wait_for_captcha_solves=True,
95
+ verbose=3
96
+ )
97
+
98
+ return stagehand_tool.run(
99
+ instruction=question,
100
+ url=url
101
+ )
102
+ except Exception as e:
103
+ raise RuntimeError(f"Processing failed: {str(e)}")
104
+ finally:
105
+ stagehand_tool.close()
106
+
107
+ @tool("Chess Analysis Tool")
108
+ def chess_analysis_tool(question: str, file_path: str) -> str:
109
+ """Given a chess question and image file, analyze the image to answer the question.
110
+
111
+ Args:
112
+ question (str): Chess question about an image file
113
+ file_path (str): The image file path
114
+
115
+ Returns:
116
+ str: Answer to the chess question about the image file
117
+
118
+ Raises:
119
+ RuntimeError: If processing fails"""
120
+ try:
121
+ import asyncio
122
+ from mcp.client.sse import sse_client
123
+ from mcp.client.session import ClientSession, DEFAULT_CLIENT_INFO
124
+ import anyio
125
+
126
+ # Process image to FEN
127
+
128
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
129
+
130
+ file = client.files.upload(file=file_path)
131
+
132
+ fen_prompt = IMG_TO_FEN_PROMPT
133
+
134
+ response = client.models.generate_content(
135
+ model=IMAGE_ANALYSIS_MODEL,
136
+ contents=[file, fen_prompt]
137
+ )
138
+
139
+ fen = response.text.strip()
140
+
141
+ # Call MCP server
142
+
143
+ mcp_url = os.getenv("MCP_SSE_URL", MCP_SSE_URL)
144
+
145
+ return call_mcp_tool(
146
+ mcp_url=mcp_url,
147
+ tool_name=MCP_TOOL,
148
+ arguments={"question": question, "fen": fen}
149
+ )
150
+ except Exception as e:
151
+ raise RuntimeError(f"Processing failed: {str(e)}")
152
+
153
+ @tool("Image Analysis Tool")
154
+ def image_analysis_tool(question: str, file_path: str) -> str:
155
+ """Given a question and image file, analyze the image to answer the question.
156
+
157
+ Args:
158
+ question (str): Question about an image file
159
+ file_path (str): The image file path
160
+
161
+ Returns:
162
+ str: Answer to the question about the image file
163
+
164
+ Raises:
165
+ RuntimeError: If processing fails"""
166
+ try:
167
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
168
+
169
+ file = client.files.upload(file=file_path)
170
+
171
+ response = client.models.generate_content(
172
+ model=IMAGE_ANALYSIS_MODEL,
173
+ contents=[file, question]
174
+ )
175
+
176
+ return response.text.strip()
177
+ except Exception as e:
178
+ raise RuntimeError(f"Processing failed: {str(e)}")
179
+
180
+ @tool("Audio Analysis Tool")
181
+ def audio_analysis_tool(question: str, file_path: str) -> str:
182
+ """Given a question and audio file, analyze the audio to answer the question.
183
+
184
+ Args:
185
+ question (str): Question about an audio file
186
+ file_path (str): The audio file path
187
+
188
+ Returns:
189
+ str: Answer to the question about the audio file
190
+
191
+ Raises:
192
+ RuntimeError: If processing fails"""
193
+ try:
194
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
195
+
196
+ file = client.files.upload(file=file_path)
197
+
198
+ response = client.models.generate_content(
199
+ model=AUDIO_ANALYSIS_MODEL,
200
+ contents=[file, question]
201
+ )
202
+
203
+ return response.text.strip()
204
+ except Exception as e:
205
+ raise RuntimeError(f"Processing failed: {str(e)}")
206
+
207
+ @tool("Video Analysis Tool")
208
+ def video_analysis_tool(question: str, file_path: str) -> str:
209
+ """Given a question and video file, analyze the video to answer the question.
210
+
211
+ Args:
212
+ question (str): Question about a video file
213
+ file_path (str): The video file path
214
+
215
+ Returns:
216
+ str: Answer to the question about the video file
217
+
218
+ Raises:
219
+ RuntimeError: If processing fails"""
220
+ try:
221
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
222
+
223
+ file = client.files.upload(file=file_path)
224
+
225
+ response = client.models.generate_content(
226
+ model=VIDEO_ANALYSIS_MODEL,
227
+ contents=[file, question]
228
+ )
229
+
230
+ return response.text.strip()
231
+ except Exception as e:
232
+ raise RuntimeError(f"Processing failed: {str(e)}")
233
+
234
+ @tool("YouTube Analysis Tool")
235
+ def youtube_analysis_tool(question: str, url: str) -> str:
236
+ """Given a question and YouTube URL, analyze the video to answer the question.
237
+
238
+ Args:
239
+ question (str): Question about a YouTube video
240
+ url (str): The YouTube URL
241
+
242
+ Returns:
243
+ str: Answer to the question about the YouTube video
244
+
245
+ Raises:
246
+ RuntimeError: If processing fails"""
247
+ try:
248
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
249
+
250
+ return client.models.generate_content(
251
+ model=YOUTUBE_ANALYSIS_MODEL,
252
+ contents=types.Content(
253
+ parts=[types.Part(file_data=types.FileData(file_uri=url)),
254
+ types.Part(text=question)]
255
+ )
256
+ )
257
+ except Exception as e:
258
+ raise RuntimeError(f"Processing failed: {str(e)}")
259
+
260
+ @tool("Document Analysis Tool")
261
+ def document_analysis_tool(question: str, file_path: str) -> str:
262
+ """Given a question and document file, analyze the document to answer the question.
263
+
264
+ Args:
265
+ question (str): Question about a document file
266
+ file_path (str): The document file path
267
+
268
+ Returns:
269
+ str: Answer to the question about the document file
270
+
271
+ Raises:
272
+ RuntimeError: If processing fails"""
273
+ try:
274
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
275
+
276
+ contents = []
277
+
278
+ if is_ext(file_path, ".docx"):
279
+ text_data = read_docx_text(file_path)
280
+ contents = [f"{question}\n{text_data}"]
281
+ print(f"=> Text data:\n{text_data}")
282
+ elif is_ext(file_path, ".pptx"):
283
+ text_data = read_pptx_text(file_path)
284
+ contents = [f"{question}\n{text_data}"]
285
+ print(f"=> Text data:\n{text_data}")
286
+ else:
287
+ file = client.files.upload(file=file_path)
288
+ contents = [file, question]
289
+
290
+ response = client.models.generate_content(
291
+ model=DOCUMENT_ANALYSIS_MODEL,
292
+ contents=contents
293
+ )
294
+
295
+ return response.text.strip()
296
+ except Exception as e:
297
+ raise RuntimeError(f"Processing failed: {str(e)}")
298
+
299
+ @tool("Arithmetic Tool")
300
+ def arithmetic_tool(question: str, a: float, b: float) -> float:
301
+ """Given a question and two numbers, perform the calculation to answer the question.
302
+
303
+ Args:
304
+ question (str): Question to answer
305
+ a (float): First number
306
+ b (float): Second number
307
+
308
+ Returns:
309
+ float: Result number
310
+
311
+ Raises:
312
+ RuntimeError: If processing fails"""
313
+ try:
314
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
315
+
316
+ response = client.models.generate_content(
317
+ model=ARITHMETIC_MODEL,
318
+ contents=question,
319
+ config=types.GenerateContentConfig(
320
+ tools=[add, subtract, multiply, divide, modulus]
321
+ )
322
+ )
323
+
324
+ return response.text.strip()
325
+ except Exception as e:
326
+ raise RuntimeError(f"Processing failed: {str(e)}")
327
+
328
+ @tool("Code Generation Tool")
329
+ def code_generation_tool(question: str, json_data: str) -> str:
330
+ """Given a question and JSON data, generate and execute code to answer the question.
331
+ Args:
332
+ question (str): Question to answer
333
+ file_path (str): The JSON data
334
+ Returns:
335
+ str: Answer to the question
336
+
337
+ Raises:
338
+ RuntimeError: If processing fails"""
339
+ try:
340
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
341
+
342
+ response = client.models.generate_content(
343
+ model=CODE_GENERATION_MODEL,
344
+ contents=[f"{question}\n{json_data}"],
345
+ config=types.GenerateContentConfig(
346
+ tools=[types.Tool(code_execution=types.ToolCodeExecution)]
347
+ ),
348
+ )
349
+
350
+ for part in response.candidates[0].content.parts:
351
+ if part.code_execution_result is not None:
352
+ return part.code_execution_result.output
353
+ except Exception as e:
354
+ raise RuntimeError(f"Processing failed: {str(e)}")
355
+
356
+ @tool("Code Execution Tool")
357
+ def code_execution_tool(question: str, file_path: str) -> str:
358
+ """Given a question and Python file, execute the file to answer the question.
359
+
360
+ Args:
361
+ question (str): Question to answer
362
+ file_path (str): The Python file path
363
+
364
+ Returns:
365
+ str: Answer to the question
366
+
367
+ Raises:
368
+ RuntimeError: If processing fails"""
369
+ try:
370
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
371
+
372
+ file = client.files.upload(file=file_path)
373
+
374
+ response = client.models.generate_content(
375
+ model=CODE_EXECUTION_MODEL,
376
+ contents=[file, question],
377
+ config=types.GenerateContentConfig(
378
+ tools=[types.Tool(code_execution=types.ToolCodeExecution)]
379
+ ),
380
+ )
381
+
382
+ for part in response.candidates[0].content.parts:
383
+ if part.code_execution_result is not None:
384
+ return part.code_execution_result.output
385
+ except Exception as e:
386
+ raise RuntimeError(f"Processing failed: {str(e)}")