Thanh Vinh Vo commited on
Commit
2d82e56
·
1 Parent(s): dc8c03a
Files changed (1) hide show
  1. app.py +97 -18
app.py CHANGED
@@ -10,7 +10,6 @@ from PIL import Image
10
  from smolagents import (
11
  CodeAgent,
12
  DuckDuckGoSearchTool,
13
- GoogleSearchTool,
14
  InferenceClientModel,
15
  load_tool,
16
  OpenAIServerModel,
@@ -18,7 +17,6 @@ from smolagents import (
18
  Tool,
19
  ToolCollection,
20
  VisitWebpageTool,
21
- WikipediaSearchTool
22
  )
23
  import whisper
24
 
@@ -27,6 +25,44 @@ import whisper
27
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  @tool
31
  def audio_to_text(file_path: str) -> str:
32
  """
@@ -145,8 +181,8 @@ class BasicAgent:
145
  def __init__(self):
146
  print("BasicAgent initialized.")
147
  self.multimodal_agent = CodeAgent(
148
- tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
149
- model= OpenAIServerModel(model_id="gpt-4o", temperature=0.0,),
150
  additional_authorized_imports=[
151
  "requests",
152
  "bs4",
@@ -161,8 +197,7 @@ class BasicAgent:
161
  "numpy",
162
  "json",
163
  "whisper",
164
- "openpyxl",
165
- "youtube_transcript_api",
166
  ],
167
  name="multimodal_agent",
168
  description="""
@@ -172,9 +207,9 @@ class BasicAgent:
172
  )
173
 
174
  self.code_agent = CodeAgent(
175
- tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
176
  model=InferenceClientModel(
177
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
178
  ),
179
  additional_authorized_imports=[
180
  "requests",
@@ -186,29 +221,73 @@ class BasicAgent:
186
  "PIL",
187
  "chess",
188
  "img2text",
 
189
  "PIL.Image",
190
  "bytes",
191
  "cv2",
192
  "numpy",
 
193
  "json",
194
  "whisper",
195
- "openpyxl",
196
- "youtube_transcript_api",
197
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  )
199
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  def __call__(self, question: str, question_id: str, file_name: str) -> str:
201
  print(f"Agent received question: {question}")
202
  file = f"Mentioned file: {file_name}" if file_name else ""
203
  prompt = f"""
204
- Answer the following question (question_id is {question_id}):
205
  "{question}""{file}"
206
- Please follow rules below:
207
- 1. `wikipedia` Python package is provided, we should use it to interact with Wikipedia pages.
208
- 2. `pandas` Python package is provided, we should use it to read table data from HTML pages.
209
- 3. Take the question literally! Do not add any additional information or assumptions.
210
  """
211
- result = self.code_agent.run(prompt)
212
  print(f"Agent responded with: {result}")
213
  return result
214
 
 
10
  from smolagents import (
11
  CodeAgent,
12
  DuckDuckGoSearchTool,
 
13
  InferenceClientModel,
14
  load_tool,
15
  OpenAIServerModel,
 
17
  Tool,
18
  ToolCollection,
19
  VisitWebpageTool,
 
20
  )
21
  import whisper
22
 
 
25
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
26
 
27
 
28
+ @tool
29
+ def extract_table_from_html(html: str, match: str | None = None) -> list:
30
+ """
31
+ A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
32
+ Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
33
+ Args:
34
+ html (str): The HTML content containing HTML tables to extract. This can be raw HTML
35
+ string content or a URL to a webpage.
36
+ match (str | None, optional): A string or regular expression pattern to match
37
+ against table text content. If None, all tables
38
+ are extracted. Defaults to None.
39
+ DO NOT use HTML strings / tags in this parameter.
40
+
41
+ Returns:
42
+ list: A list of pandas DataFrames, where each DataFrame represents a table found
43
+ in the HTML content. Returns an empty list if no tables are found.
44
+ """
45
+ import pandas as pd
46
+
47
+ try:
48
+ # Extract tables using pandas
49
+ if match is not None:
50
+ tables = pd.read_html(html, match=match)
51
+ else:
52
+ tables = pd.read_html(html)
53
+
54
+ # Return the list of DataFrames directly
55
+ return tables if tables else []
56
+
57
+ except ValueError as e:
58
+ if "No tables found" in str(e):
59
+ # Return empty list instead of raising error
60
+ return []
61
+ else:
62
+ raise ValueError(f"Error extracting tables from HTML content: {e}")
63
+ except Exception as e:
64
+ raise Exception(f"Failed to extract tables from HTML content: {e}")
65
+
66
  @tool
67
  def audio_to_text(file_path: str) -> str:
68
  """
 
181
  def __init__(self):
182
  print("BasicAgent initialized.")
183
  self.multimodal_agent = CodeAgent(
184
+ tools=[VisitWebpageTool(), DuckDuckGoSearchTool(), get_file, audio_to_text],
185
+ model= OpenAIServerModel(model_id="gpt-4o"),
186
  additional_authorized_imports=[
187
  "requests",
188
  "bs4",
 
197
  "numpy",
198
  "json",
199
  "whisper",
200
+ "openpyxl"
 
201
  ],
202
  name="multimodal_agent",
203
  description="""
 
207
  )
208
 
209
  self.code_agent = CodeAgent(
210
+ tools=[VisitWebpageTool(), DuckDuckGoSearchTool(), get_file, audio_to_text, extract_table_from_html],
211
  model=InferenceClientModel(
212
+ model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
213
  ),
214
  additional_authorized_imports=[
215
  "requests",
 
221
  "PIL",
222
  "chess",
223
  "img2text",
224
+ "chess.pgn",
225
  "PIL.Image",
226
  "bytes",
227
  "cv2",
228
  "numpy",
229
+ "chess.engine",
230
  "json",
231
  "whisper",
232
+ "openpyxl"
233
+ ],
234
+ name="code_agent",
235
+ description="""
236
+ This agent specializes at:
237
+ - Writing code to solve problem.
238
+ - Browse the web to find information.
239
+ - Solving chess problems.
240
+ This agent follow rules below when possible:
241
+ 1. `wikipedia` Python package is provided to interact with Wikipedia pages.
242
+ 2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
243
+ 2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
244
+ 3. Please take the question literally! Do not add any additional information or assumptions.
245
+
246
+ """,
247
+ verbosity_level=0,
248
+ max_steps=10,
249
  )
250
+
251
+ self.manager_agent = CodeAgent(
252
+ model=InferenceClientModel(
253
+ "Qwen/Qwen2.5-32B-Instruct"
254
+ ),
255
+ tools=[get_file, audio_to_text],
256
+ managed_agents=[
257
+ self.multimodal_agent,
258
+ self.code_agent],
259
+ additional_authorized_imports=[
260
+ "requests",
261
+ "bs4",
262
+ "markdownify",
263
+ "wikipedia",
264
+ "pandas",
265
+ "io",
266
+ "PIL",
267
+ "chess",
268
+ "img2text",
269
+ "chess.pgn",
270
+ "PIL.Image",
271
+ "bytes",
272
+ "cv2",
273
+ "numpy",
274
+ "chess.engine",
275
+ "whisper",
276
+ "openpyxl"
277
+ "json",
278
+ ],
279
+ planning_interval=5,
280
+ max_steps=15,
281
+ )
282
+
283
  def __call__(self, question: str, question_id: str, file_name: str) -> str:
284
  print(f"Agent received question: {question}")
285
  file = f"Mentioned file: {file_name}" if file_name else ""
286
  prompt = f"""
287
+ Answer the following question (question_id is {question_id}):):
288
  "{question}""{file}"
 
 
 
 
289
  """
290
+ result = self.manager_agent.run(prompt)
291
  print(f"Agent responded with: {result}")
292
  return result
293