Thanh Vinh Vo
commited on
Commit
·
0dac26c
1
Parent(s):
8dae467
update
Browse files
app.py
CHANGED
|
@@ -28,7 +28,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 28 |
@tool
|
| 29 |
def extract_table_from_html(html: str, match: str | None = None) -> list:
|
| 30 |
"""
|
| 31 |
-
A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
|
|
|
|
| 32 |
|
| 33 |
This function uses pandas.read_html() to parse HTML tables from the provided HTML content
|
| 34 |
and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
|
|
@@ -49,26 +50,6 @@ def extract_table_from_html(html: str, match: str | None = None) -> list:
|
|
| 49 |
Raises:
|
| 50 |
ValueError: If the HTML content is invalid or cannot be parsed.
|
| 51 |
Exception: If HTML parsing fails or other unexpected errors occur.
|
| 52 |
-
|
| 53 |
-
Example:
|
| 54 |
-
>>> html_content = '''
|
| 55 |
-
... <table>
|
| 56 |
-
... <tr><th>Name</th><th>Age</th></tr>
|
| 57 |
-
... <tr><td>John</td><td>25</td></tr>
|
| 58 |
-
... </table>
|
| 59 |
-
... '''
|
| 60 |
-
>>> tables = extract_table_from_html(html_content)
|
| 61 |
-
>>> print(f"Found {len(tables)} tables")
|
| 62 |
-
>>> if tables:
|
| 63 |
-
... first_table = tables[0]
|
| 64 |
-
... print(f"First table shape: {first_table.shape}")
|
| 65 |
-
... print(first_table.head())
|
| 66 |
-
|
| 67 |
-
>>> # Extract tables containing specific text
|
| 68 |
-
>>> tables = extract_table_from_html(html_content, match="Name")
|
| 69 |
-
>>> for i, table in enumerate(tables):
|
| 70 |
-
... print(f"Table {i}: {table.shape[0]} rows, {table.shape[1]} columns")
|
| 71 |
-
|
| 72 |
Note:
|
| 73 |
- Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
|
| 74 |
- Tables must be properly formatted HTML <table> elements
|
|
|
|
| 28 |
@tool
|
| 29 |
def extract_table_from_html(html: str, match: str | None = None) -> list:
|
| 30 |
"""
|
| 31 |
+
A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
|
| 32 |
+
Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
|
| 33 |
|
| 34 |
This function uses pandas.read_html() to parse HTML tables from the provided HTML content
|
| 35 |
and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
|
|
|
|
| 50 |
Raises:
|
| 51 |
ValueError: If the HTML content is invalid or cannot be parsed.
|
| 52 |
Exception: If HTML parsing fails or other unexpected errors occur.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
Note:
|
| 54 |
- Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
|
| 55 |
- Tables must be properly formatted HTML <table> elements
|