Thanh Vinh Vo
commited on
Commit
·
329838b
1
Parent(s):
b913f84
update
Browse files
app.py
CHANGED
|
@@ -30,34 +30,17 @@ def extract_table_from_html(html: str, match: str | None = None) -> list:
|
|
| 30 |
"""
|
| 31 |
A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
|
| 32 |
Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
|
| 33 |
-
|
| 34 |
-
This function uses pandas.read_html() to parse HTML tables from the provided HTML content
|
| 35 |
-
and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
|
| 36 |
-
tables based on a text pattern match.
|
| 37 |
-
|
| 38 |
Args:
|
| 39 |
html (str): The HTML content containing HTML tables to extract. This can be raw HTML
|
| 40 |
string content or a URL to a webpage.
|
| 41 |
match (str | None, optional): A string or regular expression pattern to match
|
| 42 |
-
against table text content.
|
| 43 |
-
this pattern will be returned. If None, all tables
|
| 44 |
are extracted. Defaults to None.
|
| 45 |
DO NOT use HTML strings / tags in this parameter.
|
| 46 |
|
| 47 |
Returns:
|
| 48 |
list: A list of pandas DataFrames, where each DataFrame represents a table found
|
| 49 |
in the HTML content. Returns an empty list if no tables are found.
|
| 50 |
-
|
| 51 |
-
Raises:
|
| 52 |
-
ValueError: If the HTML content is invalid or cannot be parsed.
|
| 53 |
-
Exception: If HTML parsing fails or other unexpected errors occur.
|
| 54 |
-
Note:
|
| 55 |
-
- Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
|
| 56 |
-
- Tables must be properly formatted HTML <table> elements
|
| 57 |
-
- The match parameter is case-sensitive
|
| 58 |
-
- Returns native pandas DataFrames for direct manipulation and analysis
|
| 59 |
-
- Can accept either raw HTML content or URLs (pandas.read_html supports both)
|
| 60 |
-
- Returns empty list instead of raising error when no tables are found
|
| 61 |
"""
|
| 62 |
import pandas as pd
|
| 63 |
|
|
@@ -256,6 +239,7 @@ class BasicAgent:
|
|
| 256 |
- Solving chess problems.
|
| 257 |
This agent follow rules below when possible:
|
| 258 |
1. `wikipedia` Python package is provided to interact with Wikipedia pages.
|
|
|
|
| 259 |
2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
|
| 260 |
3. Please take the question literally! Do not add any additional information or assumptions.
|
| 261 |
|
|
@@ -268,7 +252,7 @@ class BasicAgent:
|
|
| 268 |
model=InferenceClientModel(
|
| 269 |
"Qwen/Qwen2.5-32B-Instruct"
|
| 270 |
),
|
| 271 |
-
tools=[get_file, audio_to_text
|
| 272 |
managed_agents=[
|
| 273 |
self.multimodal_agent,
|
| 274 |
self.code_agent],
|
|
|
|
| 30 |
"""
|
| 31 |
A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
|
| 32 |
Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
Args:
|
| 34 |
html (str): The HTML content containing HTML tables to extract. This can be raw HTML
|
| 35 |
string content or a URL to a webpage.
|
| 36 |
match (str | None, optional): A string or regular expression pattern to match
|
| 37 |
+
against table text content. If None, all tables
|
|
|
|
| 38 |
are extracted. Defaults to None.
|
| 39 |
DO NOT use HTML strings / tags in this parameter.
|
| 40 |
|
| 41 |
Returns:
|
| 42 |
list: A list of pandas DataFrames, where each DataFrame represents a table found
|
| 43 |
in the HTML content. Returns an empty list if no tables are found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
"""
|
| 45 |
import pandas as pd
|
| 46 |
|
|
|
|
| 239 |
- Solving chess problems.
|
| 240 |
This agent follow rules below when possible:
|
| 241 |
1. `wikipedia` Python package is provided to interact with Wikipedia pages.
|
| 242 |
+
2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
|
| 243 |
2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
|
| 244 |
3. Please take the question literally! Do not add any additional information or assumptions.
|
| 245 |
|
|
|
|
| 252 |
model=InferenceClientModel(
|
| 253 |
"Qwen/Qwen2.5-32B-Instruct"
|
| 254 |
),
|
| 255 |
+
tools=[get_file, audio_to_text],
|
| 256 |
managed_agents=[
|
| 257 |
self.multimodal_agent,
|
| 258 |
self.code_agent],
|