Thanh Vinh Vo commited on
Commit
a45f805
·
1 Parent(s): ee7c16d
Files changed (1) hide show
  1. app.py +73 -2
app.py CHANGED
@@ -26,6 +26,78 @@ import whisper
26
  # --- Constants ---
27
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @tool
30
  def audio_to_text(file_path: str) -> str:
31
  """
@@ -171,7 +243,7 @@ class BasicAgent:
171
  )
172
 
173
  self.code_agent = CodeAgent(
174
- tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text],
175
  model=InferenceClientModel(
176
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
177
  ),
@@ -205,7 +277,6 @@ class BasicAgent:
205
  Please follow rules below:
206
  1. Take the question literally! Do not add any additional information or assumptions.
207
  2. `wikipedia` Python package is provided, we should use it to search and retrieve Wikipedia pages.
208
- 3. `pandas` Python package is provided, we should use it to extract table data from Wikipedia pages.
209
  """
210
  result = self.code_agent.run(prompt)
211
  print(f"Agent responded with: {result}")
 
26
  # --- Constants ---
27
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
 
29
+
30
+ @tool
31
+ def wikipedia_page_to_markdown(page_name: str) -> str:
32
+ """
33
+ A tool that retrieves a Wikipedia page and converts its content to Markdown format.
34
+
35
+ This function searches for a Wikipedia page by name, retrieves its HTML content,
36
+ and converts it to Markdown format for easier text processing and readability.
37
+ The function handles Wikipedia's search and disambiguation features automatically.
38
+
39
+ Args:
40
+ page_name (str): The name or title of the Wikipedia page to retrieve.
41
+ Can be an exact page title or a search term that Wikipedia
42
+ can match to an existing page. Examples: "Python programming",
43
+ "Albert Einstein", "Machine Learning", etc.
44
+
45
+ Returns:
46
+ str: The Wikipedia page content converted to Markdown format, including:
47
+ - Page title as header
48
+ - All text content with proper formatting
49
+ - Links preserved as Markdown links
50
+ - Lists, tables, and other structured content
51
+ - Images and media references (though actual images are not embedded)
52
+
53
+ Raises:
54
+ wikipedia.exceptions.DisambiguationError: If the page name matches multiple pages,
55
+ the function will automatically select the first option.
56
+ wikipedia.exceptions.PageError: If no Wikipedia page is found for the given name.
57
+ Exception: If there are network issues or other errors accessing Wikipedia.
58
+
59
+ Example:
60
+ >>> markdown_content = wikipedia_page_to_markdown("Python (programming language)")
61
+ >>> print(markdown_content[:200])
62
+ # Python (programming language)
63
+
64
+ **Python** is a high-level, general-purpose programming language...
65
+
66
+ >>> markdown_content = wikipedia_page_to_markdown("Albert Einstein")
67
+ >>> # Returns the full Wikipedia article about Einstein in Markdown format
68
+
69
+ Note:
70
+ - Uses the `wikipedia` Python package to access Wikipedia's API
71
+ - Converts HTML content to Markdown using the `markdownify` library
72
+ - Automatically handles Wikipedia's disambiguation by selecting the first match
73
+ - The returned Markdown preserves the structure and formatting of the original page
74
+ - Large pages may take a moment to process due to content conversion
75
+ """
76
+ import wikipedia
77
+ from markdownify import markdownify as md
78
+
79
+ try:
80
+ # Get the Wikipedia page
81
+ page = wikipedia.page(page_name)
82
+
83
+ # Convert HTML content to Markdown
84
+ markdown_content = md(page.html())
85
+
86
+ return markdown_content
87
+
88
+ except wikipedia.exceptions.DisambiguationError as e:
89
+ # If there are multiple pages, use the first option
90
+ first_option = e.options[0]
91
+ page = wikipedia.page(first_option)
92
+ markdown_content = md(page.html())
93
+ return markdown_content
94
+
95
+ except wikipedia.exceptions.PageError:
96
+ raise Exception(f"No Wikipedia page found for '{page_name}'. Please check the page name and try again.")
97
+
98
+ except Exception as e:
99
+ raise Exception(f"Error retrieving Wikipedia page '{page_name}': {str(e)}")
100
+
101
  @tool
102
  def audio_to_text(file_path: str) -> str:
103
  """
 
243
  )
244
 
245
  self.code_agent = CodeAgent(
246
+ tools=[VisitWebpageTool(), GoogleSearchTool("serper"), get_file, audio_to_text, wikipedia_page_to_markdown],
247
  model=InferenceClientModel(
248
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct"
249
  ),
 
277
  Please follow rules below:
278
  1. Take the question literally! Do not add any additional information or assumptions.
279
  2. `wikipedia` Python package is provided, we should use it to search and retrieve Wikipedia pages.
 
280
  """
281
  result = self.code_agent.run(prompt)
282
  print(f"Agent responded with: {result}")