Thanh Vinh Vo commited on
Commit
329838b
·
1 Parent(s): b913f84
Files changed (1) hide show
  1. app.py +3 -19
app.py CHANGED
@@ -30,34 +30,17 @@ def extract_table_from_html(html: str, match: str | None = None) -> list:
30
  """
31
  A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
32
  Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
33
-
34
- This function uses pandas.read_html() to parse HTML tables from the provided HTML content
35
- and returns the extracted tables as a list of pandas DataFrames. It can optionally filter
36
- tables based on a text pattern match.
37
-
38
  Args:
39
  html (str): The HTML content containing HTML tables to extract. This can be raw HTML
40
  string content or a URL to a webpage.
41
  match (str | None, optional): A string or regular expression pattern to match
42
- against table text content. Only tables containing
43
- this pattern will be returned. If None, all tables
44
  are extracted. Defaults to None.
45
  DO NOT use HTML strings / tags in this parameter.
46
 
47
  Returns:
48
  list: A list of pandas DataFrames, where each DataFrame represents a table found
49
  in the HTML content. Returns an empty list if no tables are found.
50
-
51
- Raises:
52
- ValueError: If the HTML content is invalid or cannot be parsed.
53
- Exception: If HTML parsing fails or other unexpected errors occur.
54
- Note:
55
- - Uses pandas.read_html() which requires lxml, html5lib, or BeautifulSoup4
56
- - Tables must be properly formatted HTML <table> elements
57
- - The match parameter is case-sensitive
58
- - Returns native pandas DataFrames for direct manipulation and analysis
59
- - Can accept either raw HTML content or URLs (pandas.read_html supports both)
60
- - Returns empty list instead of raising error when no tables are found
61
  """
62
  import pandas as pd
63
 
@@ -256,6 +239,7 @@ class BasicAgent:
256
  - Solving chess problems.
257
  This agent follow rules below when possible:
258
  1. `wikipedia` Python package is provided to interact with Wikipedia pages.
 
259
  2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
260
  3. Please take the question literally! Do not add any additional information or assumptions.
261
 
@@ -268,7 +252,7 @@ class BasicAgent:
268
  model=InferenceClientModel(
269
  "Qwen/Qwen2.5-32B-Instruct"
270
  ),
271
- tools=[get_file, audio_to_text, extract_table_from_html],
272
  managed_agents=[
273
  self.multimodal_agent,
274
  self.code_agent],
 
30
  """
31
  A tool that extracts HTML tables from HTML content and returns them as pandas DataFrames.
32
  Example usecases include extracting tables from Wikipedia pages, HTML emails, or other web content.
 
 
 
 
 
33
  Args:
34
  html (str): The HTML content containing HTML tables to extract. This can be raw HTML
35
  string content or a URL to a webpage.
36
  match (str | None, optional): A string or regular expression pattern to match
37
+ against table text content. If None, all tables
 
38
  are extracted. Defaults to None.
39
  DO NOT use HTML strings / tags in this parameter.
40
 
41
  Returns:
42
  list: A list of pandas DataFrames, where each DataFrame represents a table found
43
  in the HTML content. Returns an empty list if no tables are found.
 
 
 
 
 
 
 
 
 
 
 
44
  """
45
  import pandas as pd
46
 
 
239
  - Solving chess problems.
240
  This agent follow rules below when possible:
241
  1. `wikipedia` Python package is provided to interact with Wikipedia pages.
242
+ 2. Use `extract_table_from_html` tool to process Wikipedia pages first before other approaches.
243
  2. `chess` Python package is provided. Please use it when there is need to solve chess problems.
244
  3. Please take the question literally! Do not add any additional information or assumptions.
245
 
 
252
  model=InferenceClientModel(
253
  "Qwen/Qwen2.5-32B-Instruct"
254
  ),
255
+ tools=[get_file, audio_to_text],
256
  managed_agents=[
257
  self.multimodal_agent,
258
  self.code_agent],