Thanh Vinh Vo
commited on
Commit
·
1c6f375
1
Parent(s):
329838b
update
Browse files- app.py +10 -15
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -10,6 +10,7 @@ from PIL import Image
|
|
| 10 |
from smolagents import (
|
| 11 |
CodeAgent,
|
| 12 |
DuckDuckGoSearchTool,
|
|
|
|
| 13 |
InferenceClientModel,
|
| 14 |
load_tool,
|
| 15 |
OpenAIServerModel,
|
|
@@ -26,17 +27,13 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 26 |
|
| 27 |
|
| 28 |
@tool
|
| 29 |
-
def extract_table_from_html(html: str
|
| 30 |
"""
|
| 31 |
A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
|
| 32 |
Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
|
| 33 |
Args:
|
| 34 |
html (str): The HTML content containing HTML tables to extract. This can be raw HTML
|
| 35 |
string content or a URL to a webpage.
|
| 36 |
-
match (str | None, optional): A string or regular expression pattern to match
|
| 37 |
-
against table text content. If None, all tables
|
| 38 |
-
are extracted. Defaults to None.
|
| 39 |
-
DO NOT use HTML strings / tags in this parameter.
|
| 40 |
|
| 41 |
Returns:
|
| 42 |
list: A list of pandas DataFrames, where each DataFrame represents a table found
|
|
@@ -46,10 +43,7 @@ def extract_table_from_html(html: str, match: str | None = None) -> list:
|
|
| 46 |
|
| 47 |
try:
|
| 48 |
# Extract tables using pandas
|
| 49 |
-
|
| 50 |
-
tables = pd.read_html(html, match=match)
|
| 51 |
-
else:
|
| 52 |
-
tables = pd.read_html(html)
|
| 53 |
|
| 54 |
# Return the list of DataFrames directly
|
| 55 |
return tables if tables else []
|
|
@@ -181,7 +175,7 @@ class BasicAgent:
|
|
| 181 |
def __init__(self):
|
| 182 |
print("BasicAgent initialized.")
|
| 183 |
self.multimodal_agent = CodeAgent(
|
| 184 |
-
tools=[VisitWebpageTool(),
|
| 185 |
model= OpenAIServerModel(model_id="gpt-4o"),
|
| 186 |
additional_authorized_imports=[
|
| 187 |
"requests",
|
|
@@ -197,7 +191,8 @@ class BasicAgent:
|
|
| 197 |
"numpy",
|
| 198 |
"json",
|
| 199 |
"whisper",
|
| 200 |
-
"openpyxl"
|
|
|
|
| 201 |
],
|
| 202 |
name="multimodal_agent",
|
| 203 |
description="""
|
|
@@ -207,7 +202,7 @@ class BasicAgent:
|
|
| 207 |
)
|
| 208 |
|
| 209 |
self.code_agent = CodeAgent(
|
| 210 |
-
tools=[VisitWebpageTool(),
|
| 211 |
model=InferenceClientModel(
|
| 212 |
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 213 |
),
|
|
@@ -229,7 +224,8 @@ class BasicAgent:
|
|
| 229 |
"chess.engine",
|
| 230 |
"json",
|
| 231 |
"whisper",
|
| 232 |
-
"openpyxl"
|
|
|
|
| 233 |
],
|
| 234 |
name="code_agent",
|
| 235 |
description="""
|
|
@@ -287,8 +283,7 @@ class BasicAgent:
|
|
| 287 |
Answer the following question (question_id is {question_id}):):
|
| 288 |
"{question}""{file}"
|
| 289 |
Please follow hints below:
|
| 290 |
-
1. `wikipedia` Python package is provided to interact with Wikipedia pages.
|
| 291 |
-
2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
|
| 292 |
3. Please take the question literally! Do not add any additional information or assumptions.
|
| 293 |
"""
|
| 294 |
result = self.manager_agent.run(prompt)
|
|
|
|
| 10 |
from smolagents import (
|
| 11 |
CodeAgent,
|
| 12 |
DuckDuckGoSearchTool,
|
| 13 |
+
GoogleSearchTool,
|
| 14 |
InferenceClientModel,
|
| 15 |
load_tool,
|
| 16 |
OpenAIServerModel,
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
@tool
|
| 30 |
+
def extract_table_from_html(html: str) -> list:
|
| 31 |
"""
|
| 32 |
A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
|
| 33 |
Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
|
| 34 |
Args:
|
| 35 |
html (str): The HTML content containing HTML tables to extract. This can be raw HTML
|
| 36 |
string content or a URL to a webpage.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
list: A list of pandas DataFrames, where each DataFrame represents a table found
|
|
|
|
| 43 |
|
| 44 |
try:
|
| 45 |
# Extract tables using pandas
|
| 46 |
+
tables = pd.read_html(html)
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# Return the list of DataFrames directly
|
| 49 |
return tables if tables else []
|
|
|
|
| 175 |
def __init__(self):
|
| 176 |
print("BasicAgent initialized.")
|
| 177 |
self.multimodal_agent = CodeAgent(
|
| 178 |
+
tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
|
| 179 |
model= OpenAIServerModel(model_id="gpt-4o"),
|
| 180 |
additional_authorized_imports=[
|
| 181 |
"requests",
|
|
|
|
| 191 |
"numpy",
|
| 192 |
"json",
|
| 193 |
"whisper",
|
| 194 |
+
"openpyxl",
|
| 195 |
+
"youtube-transcript-api",
|
| 196 |
],
|
| 197 |
name="multimodal_agent",
|
| 198 |
description="""
|
|
|
|
| 202 |
)
|
| 203 |
|
| 204 |
self.code_agent = CodeAgent(
|
| 205 |
+
tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text, extract_table_from_html],
|
| 206 |
model=InferenceClientModel(
|
| 207 |
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 208 |
),
|
|
|
|
| 224 |
"chess.engine",
|
| 225 |
"json",
|
| 226 |
"whisper",
|
| 227 |
+
"openpyxl",
|
| 228 |
+
"youtube-transcript-api",
|
| 229 |
],
|
| 230 |
name="code_agent",
|
| 231 |
description="""
|
|
|
|
| 283 |
Answer the following question (question_id is {question_id}):):
|
| 284 |
"{question}""{file}"
|
| 285 |
Please follow hints below:
|
| 286 |
+
1. `wikipedia` Python package is provided to interact with Wikipedia pages.
|
|
|
|
| 287 |
3. Please take the question literally! Do not add any additional information or assumptions.
|
| 288 |
"""
|
| 289 |
result = self.manager_agent.run(prompt)
|
requirements.txt
CHANGED
|
@@ -15,3 +15,4 @@ numpy
|
|
| 15 |
html5lib
|
| 16 |
openai-whisper
|
| 17 |
openpyxl
|
|
|
|
|
|
| 15 |
html5lib
|
| 16 |
openai-whisper
|
| 17 |
openpyxl
|
| 18 |
+
youtube-transcript-api
|