Oriol Alàs Cercós commited on
Commit
0b942af
·
1 Parent(s): eff5b52

updated with max results and wikipedia serach tool

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-312.pyc +0 -0
  2. app.py +27 -20
  3. requirements.txt +3 -1
__pycache__/app.cpython-312.pyc ADDED
Binary file (16.5 kB). View file
 
app.py CHANGED
@@ -1,6 +1,7 @@
 
1
  import os
2
  from math import sqrt
3
- from typing import Dict
4
  from langchain_community.tools.tavily_search import TavilySearchResults
5
  from langchain_community.document_loaders import WikipediaLoader
6
  from langchain_community.document_loaders import ArxivLoader
@@ -15,34 +16,39 @@ from smolagents import CodeAgent, tool, InferenceClientModel
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
 
 
 
 
 
 
 
18
  @tool
19
- def wiki_search(query: str) -> Dict[str, str]:
20
  """Search Wikipedia for a query and return maximum 2 results.
21
  Args:
22
  query: The search query."""
23
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
24
- formatted_search_docs = "\n\n---\n\n".join(
25
- [
26
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
27
- for doc in search_docs
28
- ]
29
- )
30
- return {"wiki_results": formatted_search_docs}
31
 
 
 
 
 
 
 
32
 
33
  @tool
34
- def web_search(query: str) -> Dict[str, str]:
35
  """Search Tavily for a query and return maximum 3 results.
36
  Args:
37
  query: The search query."""
38
- search_docs = TavilySearchResults(max_results=3).invoke(input=query)
39
- formatted_search_docs = "\n\n---\n\n".join(
40
- [
41
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
42
- for doc in search_docs
43
- ]
44
- )
45
- return {"web_results": formatted_search_docs}
46
 
47
 
48
  @tool
@@ -53,7 +59,7 @@ def arxiv_search(query: str) -> Dict[str, str]:
53
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
54
  formatted_search_docs = "\n\n---\n\n".join(
55
  [
56
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
57
  for doc in search_docs
58
  ]
59
  )
@@ -154,6 +160,7 @@ class BasicAgent:
154
  self.agent = CodeAgent(
155
  tools=[multiply, add, subtract, power, square_root, modulus, wiki_search, web_search, arxiv_search],
156
  model=InferenceClientModel(model_id=model_id, token=os.getenv("HF_TOKEN")),
 
157
  )
158
  print("BasicAgent initialized.")
159
 
 
1
+ import dataclasses
2
  import os
3
  from math import sqrt
4
+ from typing import Dict, List
5
  from langchain_community.tools.tavily_search import TavilySearchResults
6
  from langchain_community.document_loaders import WikipediaLoader
7
  from langchain_community.document_loaders import ArxivLoader
 
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
 
19
+ @dataclasses.dataclass
20
+ class WikiSourceDocument:
21
+ source: str
22
+ page: str
23
+ page_content: str
24
+
25
  @tool
26
+ def wiki_search(query: str, load_max_docs: int=3) -> List[WikiSourceDocument]:
27
  """Search Wikipedia for a query and return maximum 2 results.
28
  Args:
29
  query: The search query."""
30
+ search_docs = WikipediaLoader(query=query, load_max_docs=load_max_docs).load()
31
+
32
+ return [WikiSourceDocument(
33
+ source=doc.metadata["Source"],
34
+ page=doc.metadata["Page"],
35
+ page_content=doc.page_content
36
+ ) for doc in search_docs]
 
37
 
38
+ @tool
39
+ def load_file(file_id: str) -> str:
40
+ """Load a file from the Hugging Face Hub. It returns the content in bytes.
41
+ Args:
42
+ file_id: The file ID to load."""
43
+ return requests.get(f"https://agents-course-unit4-scoring.hf.space/files/{file_id}").content
44
 
45
  @tool
46
+ def web_search(query: str, max_results: int) -> Dict[str, str]:
47
  """Search Tavily for a query and return maximum 3 results.
48
  Args:
49
  query: The search query."""
50
+ search_docs = TavilySearchResults(max_results=max_results).invoke(input=query)
51
+ return {"web_results": search_docs}
 
 
 
 
 
 
52
 
53
 
54
  @tool
 
59
  search_docs = ArxivLoader(query=query, load_max_docs=3).load()
60
  formatted_search_docs = "\n\n---\n\n".join(
61
  [
62
+ f'<Document Title="{doc.metadata["Title"]}" Published="{doc.metadata['Published']}" Authors="{doc.metadata['Authors']} Summary={doc.metadata['Summary']}"/>\n{doc.page_content}\n</Document>'
63
  for doc in search_docs
64
  ]
65
  )
 
160
  self.agent = CodeAgent(
161
  tools=[multiply, add, subtract, power, square_root, modulus, wiki_search, web_search, arxiv_search],
162
  model=InferenceClientModel(model_id=model_id, token=os.getenv("HF_TOKEN")),
163
+ max_steps=10,
164
  )
165
  print("BasicAgent initialized.")
166
 
requirements.txt CHANGED
@@ -3,4 +3,6 @@ requests
3
  smolagents
4
  pandas
5
  langchain-community
6
- wikipedia
 
 
 
3
  smolagents
4
  pandas
5
  langchain-community
6
+ wikipedia
7
+ arxiv
8
+ pymupdf