WeByT3 commited on
Commit
3e615dc
·
verified ·
1 Parent(s): dcc7bc7

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +30 -13
tools.py CHANGED
@@ -1,5 +1,6 @@
1
  from langchain_core.tools import tool
2
  import wikipediaapi
 
3
 
4
 
5
  @tool
@@ -51,23 +52,39 @@ def divide(a: int, b: int) -> int:
51
  @tool
52
  def search_wikipedia(page_title: str, language: str) -> str:
53
  """
54
- This tool allows searching wikipedia for information on a specific topic. It will return the resulting information in HTML format.
55
- Information may be incomplete in some languages, so try to infer the best language for searching the information from the query.
56
- For example, if you are looking for information on a spanish actor, search on the wikipedia in spanish.
57
 
58
  Args:
59
- page_title: The page title to search for
60
- language: The language for searching the content. For example, "en" for english, "es" for spanish, "fr" for french, etc.
61
 
62
- Example:
63
- {"message": "How many albums did Juanes produce between 2000 and 2005 ?"}
64
- search_wikipedia("Juanes", "es")
65
  """
66
-
67
  try:
68
- wiki_wiki = wikipediaapi.Wikipedia('AIAgent (gabriel_abilleira@tutanota.com)', language, extract_format=wikipediaapi.ExtractFormat.HTML)
 
 
 
 
 
69
  page = wiki_wiki.page(page_title)
70
-
71
- return f"Title: {page.title}\n\n{page.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  except Exception as e:
73
- return f"Error: {str(e)}"
 
1
  from langchain_core.tools import tool
2
  import wikipediaapi
3
+ import pandas as pd
4
 
5
 
6
  @tool
 
52
  @tool
53
  def search_wikipedia(page_title: str, language: str) -> str:
54
  """
55
+ This tool searches Wikipedia for a specific page and returns its text and any HTML tables it contains.
 
 
56
 
57
  Args:
58
+ page_title: Title of the Wikipedia page.
59
+ language: Language code (e.g., "en", "es", "fr").
60
 
61
+ Returns:
62
+ A string containing the page title, text, and any extracted tables in markdown format.
 
63
  """
 
64
  try:
65
+ wiki_wiki = wikipediaapi.Wikipedia(
66
+ user_agent='AIAgent (gabriel_abilleira@tutanota.com)',
67
+ language=language,
68
+ extract_format=wikipediaapi.ExtractFormat.HTML
69
+ )
70
+
71
  page = wiki_wiki.page(page_title)
72
+
73
+ if not page.exists():
74
+ return f"Error: Page '{page_title}' not found in language '{language}'."
75
+
76
+ # Use the URL to read tables
77
+ tables = pd.read_html(page.fullurl)
78
+ markdown_tables = []
79
+
80
+ for i, table in enumerate(tables):
81
+ if isinstance(table, pd.DataFrame):
82
+ markdown = table.to_markdown(index=False)
83
+ markdown_tables.append(f"\n---\n**Table {i + 1}:**\n{markdown}")
84
+
85
+ table_output = "\n".join(markdown_tables) if markdown_tables else "No tables found on this page."
86
+
87
+ return f"Title: {page.title}\n\nText: {page.summary}\n\n{table_output}"
88
+
89
  except Exception as e:
90
+ return f"Error retrieving Wikipedia content: {str(e)}"