Thanh Vinh Vo commited on
Commit
0ba08eb
·
1 Parent(s): 59c594c
Files changed (2) hide show
  1. app.py +15 -6
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,12 +4,14 @@ import os
4
  import gradio as gr
5
  import pandas as pd
6
  import requests
 
7
  from smolagents import (
8
  CodeAgent,
9
  DuckDuckGoSearchTool,
10
  InferenceClientModel,
11
  load_tool,
12
  tool,
 
13
  )
14
 
15
 
@@ -18,16 +20,22 @@ from smolagents import (
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
 
 
 
 
 
 
 
 
 
 
21
  # --- Basic Agent Definition ---
22
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
  class BasicAgent:
24
  def __init__(self):
25
  print("BasicAgent initialized.")
26
- html_parser_tool = load_tool(
27
- "victor/dom-to-semantic-markdown", trust_remote_code=True
28
- )
29
  self.agent = CodeAgent(
30
- tools=[html_parser_tool],
31
  model=InferenceClientModel(),
32
  additional_authorized_imports=["requests", "bs4"],
33
  max_steps=10,
@@ -36,8 +44,9 @@ class BasicAgent:
36
  def __call__(self, question: str) -> str:
37
  print(f"Agent received question: {question}")
38
  prompt = f"""
39
- Answer the following question: {question}. Please follow the following rules:
40
- 1. When there is need to parse HTML please use LLM to extract the relevant information instead of using BeautifulSoup.
 
41
  """
42
  result = self.agent.run(prompt)
43
  print(f"Agent responded with: {result}")
 
4
  import gradio as gr
5
  import pandas as pd
6
  import requests
7
+ from html_to_markdown import convert_to_markdown
8
  from smolagents import (
9
  CodeAgent,
10
  DuckDuckGoSearchTool,
11
  InferenceClientModel,
12
  load_tool,
13
  tool,
14
+ Tool,
15
  )
16
 
17
 
 
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
 
23
+ @tool
24
+ def html_to_markdown(arg1: str) -> str: # it's important to specify the return type
25
+ """A tool that converts HTML to Markdown.
26
+ Args:
27
+ arg1: the raw HTML string to convert
28
+ """
29
+ return convert_to_markdown(arg1)
30
+
31
+
32
  # --- Basic Agent Definition ---
33
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
34
  class BasicAgent:
35
  def __init__(self):
36
  print("BasicAgent initialized.")
 
 
 
37
  self.agent = CodeAgent(
38
+ tools=[],
39
  model=InferenceClientModel(),
40
  additional_authorized_imports=["requests", "bs4"],
41
  max_steps=10,
 
44
  def __call__(self, question: str) -> str:
45
  print(f"Agent received question: {question}")
46
  prompt = f"""
47
+ Answer the following question: `{question}`.
48
+ Please follow the following rules:
49
+ 1. When there is need to extract information from web page please use `html_to_markdown` tool to parse HTML content first, and then extract the information you need.
50
  """
51
  result = self.agent.run(prompt)
52
  print(f"Agent responded with: {result}")
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio
2
  requests
3
  smolagents
4
  duckduckgo_search
5
- bs4
 
 
2
  requests
3
  smolagents
4
  duckduckgo_search
5
+ bs4
6
+ html-to-markdown