Thanh Vinh Vo commited on
Commit
0dac26c
·
1 Parent(s): 8dae467
Files changed (1) hide show
  1. app.py +2 -21
app.py CHANGED
@@ -28,7 +28,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
  @tool
29
  def extract_table_from_html(html: str, match: str | None = None) -> list:
30
  """
31
- A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames. Example use-cases include extracting tables from Wikipedia pages, HTML emails, or other web content.
 
32
 
33
  This function uses pandas.read_html() to parse HTML tables from the provided HTML content
34
  and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
@@ -49,26 +50,6 @@ def extract_table_from_html(html: str, match: str | None = None) -> list:
49
  Raises:
50
  ValueError: If the HTML content is invalid or cannot be parsed.
51
  Exception: If HTML parsing fails or other unexpected errors occur.
52
-
53
- Example:
54
- >>> html_content = '''
55
- ... <table>
56
- ... <tr><th>Name</th><th>Age</th></tr>
57
- ... <tr><td>John</td><td>25</td></tr>
58
- ... </table>
59
- ... '''
60
- >>> tables = extract_table_from_html(html_content)
61
- >>> print(f"Found {len(tables)} tables")
62
- >>> if tables:
63
- ... first_table = tables[0]
64
- ... print(f"First table shape: {first_table.shape}")
65
- ... print(first_table.head())
66
-
67
- >>> # Extract tables containing specific text
68
- >>> tables = extract_table_from_html(html_content, match="Name")
69
- >>> for i, table in enumerate(tables):
70
- ... print(f"Table {i}: {table.shape[0]} rows, {table.shape[1]} columns")
71
-
72
  Note:
73
  - Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
74
  - Tables must be properly formatted HTML <table> elements
 
28
  @tool
29
  def extract_table_from_html(html: str, match: str | None = None) -> list:
30
  """
31
+ A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
32
+ Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
33
 
34
  This function uses pandas.read_html() to parse HTML tables from the provided HTML content
35
  and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
 
50
  Raises:
51
  ValueError: If the HTML content is invalid or cannot be parsed.
52
  Exception: If HTML parsing fails or other unexpected errors occur.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  Note:
54
  - Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
55
  - Tables must be properly formatted HTML <table> elements