mitiku commited on
Commit
c45dbaa
·
verified ·
1 Parent(s): a65ce7f

Add search_arxiv tool

Browse files
Files changed (1) hide show
  1. app.py +63 -5
app.py CHANGED
@@ -1,14 +1,26 @@
1
- from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
2
  import datetime
3
  import requests
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
7
  from datetime import date
 
 
8
 
9
 
10
  from Gradio_UI import GradioUI
11
 
 
 
 
 
 
 
 
 
 
 
12
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
13
  @tool
14
  def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
@@ -29,6 +41,55 @@ def todays_date() -> str:
29
  """
30
  today = date.today()
31
  return today.strftime("%b %d, %Y")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
  @tool
@@ -47,9 +108,6 @@ def get_current_time_in_timezone(timezone: str) -> str:
47
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
48
 
49
 
50
- final_answer = FinalAnswerTool()
51
- web_search = DuckDuckGoSearchTool()
52
-
53
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
54
  # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
55
 
@@ -69,7 +127,7 @@ with open("prompts.yaml", 'r') as stream:
69
 
70
  agent = CodeAgent(
71
  model=model,
72
- tools=[final_answer, web_search, todays_date], ## add your tools here (don't remove final answer)
73
  max_steps=6,
74
  verbosity_level=1,
75
  grammar=None,
 
1
+ from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool, VisitWebpageTool
2
  import datetime
3
  import requests
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
7
  from datetime import date
8
+ from collections import namedtuple
9
+ from typing import Tuple
10
 
11
 
12
  from Gradio_UI import GradioUI
13
 
14
+
15
+ web_search = DuckDuckGoSearchTool()
16
+ visit_webpage = VisitWebpageTool()
17
+
18
+ final_answer = FinalAnswerTool()
19
+
20
+ SearchResult = namedtuple("SearchResult", ["title", "url", "description"])
21
+
22
+
23
+
24
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
25
  @tool
26
  def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
 
41
  """
42
  today = date.today()
43
  return today.strftime("%b %d, %Y")
44
+
45
+ def parse_md_link(md_link: str) -> Tuple[str, str]:
46
+ """ parse markdown link to the title and link"""
47
+ pattern = r'\[([^\]]+)\]\(([^)]+)\)'
48
+ matches = re.findall(pattern, md_link)
49
+ return matches
50
+
51
+ def search_specific_website(topic: str, url: str, num_results: int):
52
+ query = f"{url} {topic}"
53
+
54
+ query_result = web_search(query).split("\n\n")[1:n]
55
+ output = []
56
+ for result in query_result:
57
+ link, description = result.split("\n")
58
+ title, url = parse_md_link(link)
59
+ output.append(SearchResult(title=title, url=url, description=description))
60
+ return output
61
+
62
+
63
+ def get_arxiv_paper_abstract(url):
64
+ result = visit_webpage(url)
65
+ for line in result.split("\n"):
66
+ if line.startswith("> Abstract:"):
67
+ return line.strip("> Abstract:")
68
+ return ""
69
+
70
+
71
+ @tool
72
+ def search_arxiv(topic: str, year: str = None, num_results: int = 10) -> str:
73
+ """
74
+ Search arxiv.org for a given topic. It will return the title and abstract of the top results
75
+
76
+ Args:
77
+ topic: The topic to search for.
78
+ year: Specic year to search for. Default is None
79
+ num_results: Indicating how many top search results should be included in the output
80
+
81
+ Returns:
82
+ A list of query result of title and abstract contcatenated by "\n\n".
83
+ """
84
+ if year is None:
85
+ year = ""
86
+ query_results = search_specific_website(topic + " " + year, "site:arxiv.org", num_results)
87
+
88
+ output = []
89
+ for result in query_results:
90
+ output.append("Title: {} Abstract: {}".format(result.title, get_arxiv_paper_abstract(result.url)))
91
+ return "\n\n".join(output)
92
+
93
 
94
 
95
  @tool
 
108
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
109
 
110
 
 
 
 
111
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
112
  # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
113
 
 
127
 
128
  agent = CodeAgent(
129
  model=model,
130
+ tools=[final_answer, web_search, todays_date, search_arxiv], ## add your tools here (don't remove final answer)
131
  max_steps=6,
132
  verbosity_level=1,
133
  grammar=None,