mdicio commited on
Commit
75d27b2
·
1 Parent(s): 3cdcf43
Files changed (2) hide show
  1. agent.py +5 -7
  2. tools.py +5 -55
agent.py CHANGED
@@ -1,6 +1,4 @@
1
  import os
2
- from typing import Bool
3
-
4
  from dotenv import load_dotenv
5
 
6
  # Import models from SmolaAgents
@@ -10,7 +8,7 @@ from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel
10
  from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool
11
 
12
  # Import custom tools
13
- from Final_Assignment_Template.tools import (
14
  AddDocumentToVectorStoreTool,
15
  ArxivSearchTool,
16
  DownloadFileFromLinkTool,
@@ -21,16 +19,15 @@ from Final_Assignment_Template.tools import (
21
  TranscribeAudioTool,
22
  VisitWebpageTool,
23
  WikipediaSearchTool,
 
24
  )
25
 
26
  # Import utility functions
27
  from utils import extract_final_answer, replace_tool_mentions
28
 
29
- # Import tools from LangChain
30
-
31
 
32
  class BoomBot:
33
- def __init__(self, provider="deepinfra"):
34
  """
35
  Initialize the BoomBot with the specified provider.
36
 
@@ -134,6 +131,7 @@ class BoomBot:
134
  arxiv_search,
135
  add_doc_vectorstore,
136
  retrieve_doc_vectorstore,
 
137
  python_interpreter,
138
  final_answer,
139
  ]
@@ -246,7 +244,7 @@ class BoomBot:
246
  - Conclude with: FINAL ANSWER: <your_answer>
247
  """
248
 
249
- def run(self, question: str, task_id: str, to_download: Bool) -> str:
250
  """
251
  Run the agent with the given question, task_id, and download flag.
252
 
 
1
  import os
 
 
2
  from dotenv import load_dotenv
3
 
4
  # Import models from SmolaAgents
 
8
  from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool
9
 
10
  # Import custom tools
11
+ from tools import (
12
  AddDocumentToVectorStoreTool,
13
  ArxivSearchTool,
14
  DownloadFileFromLinkTool,
 
19
  TranscribeAudioTool,
20
  VisitWebpageTool,
21
  WikipediaSearchTool,
22
+ image_question_answering
23
  )
24
 
25
  # Import utility functions
26
  from utils import extract_final_answer, replace_tool_mentions
27
 
 
 
28
 
29
  class BoomBot:
30
+ def __init__(self, provider="meta"):
31
  """
32
  Initialize the BoomBot with the specified provider.
33
 
 
131
  arxiv_search,
132
  add_doc_vectorstore,
133
  retrieve_doc_vectorstore,
134
+ image_question_answering,
135
  python_interpreter,
136
  final_answer,
137
  ]
 
244
  - Conclude with: FINAL ANSWER: <your_answer>
245
  """
246
 
247
+ def run(self, question: str, task_id: str, to_download) -> str:
248
  """
249
  Run the agent with the given question, task_id, and download flag.
250
 
tools.py CHANGED
@@ -35,6 +35,11 @@ from markdownify import markdownify
35
  from smolagents import Tool, tool
36
  from smolagents.utils import truncate_content
37
 
 
 
 
 
 
38
 
39
  class ReadFileContentTool(Tool):
40
  name = "read_file_content"
@@ -159,44 +164,6 @@ class WikipediaSearchTool(Tool):
159
  return f"Error wiki: {e}"
160
 
161
 
162
- class VisitWebpageTool(Tool):
163
- name = "visit_webpage"
164
- description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
165
- inputs = {
166
- "url": {
167
- "type": "string",
168
- "description": "The url of the webpage to visit.",
169
- }
170
- }
171
- output_type = "string"
172
-
173
- def forward(self, url: str) -> str:
174
- try:
175
- import re
176
-
177
- import requests
178
- from markdownify import markdownify
179
- from requests.exceptions import RequestException
180
- from smolagents.utils import truncate_content
181
- except ImportError as e:
182
- raise ImportError(
183
- "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
184
- ) from e
185
- try:
186
- response = requests.get(url, timeout=20)
187
- response.raise_for_status() # Raise an exception for bad status codes
188
- markdown_content = markdownify(response.text).strip()
189
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
190
- return truncate_content(markdown_content, 5000)
191
-
192
- except requests.exceptions.Timeout:
193
- return "The request timed out. Please try again later or check the URL."
194
- except RequestException as e:
195
- return f"Error fetching the webpage: {str(e)}"
196
- except Exception as e:
197
- return f"An unexpected error occurred: {str(e)}"
198
-
199
-
200
  class TranscribeAudioTool(Tool):
201
  name = "transcribe_audio"
202
  description = """Converts spoken content in audio files to text. Handles various audio formats and produces a transcript of the spoken content for analysis."""
@@ -577,7 +544,6 @@ class DuckDuckGoSearchTool(Tool):
577
 
578
  return f"❌ Failed to retrieve results after {max_retries} retries."
579
 
580
-
581
  huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
582
  api_key=os.environ["HF_TOKEN"], model_name="sentence-transformers/all-mpnet-base-v2"
583
  )
@@ -592,7 +558,6 @@ SUPPORTED_EXTENSIONS = [
592
  ".htm",
593
  ]
594
 
595
-
596
  class AddDocumentToVectorStoreTool(Tool):
597
  name = "add_document_to_vector_store"
598
  description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
@@ -660,7 +625,6 @@ class AddDocumentToVectorStoreTool(Tool):
660
  traceback.print_exc()
661
  return f"Error: {e}"
662
 
663
-
664
  class QueryVectorStoreTool(Tool):
665
  name = "query_downloaded_documents"
666
  description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
@@ -715,7 +679,6 @@ class QueryVectorStoreTool(Tool):
715
  traceback.print_exc()
716
  return f"Error querying vector store: {e}"
717
 
718
-
719
  @tool
720
  def image_question_answering(image_path: str, prompt: str) -> str:
721
  """
@@ -752,7 +715,6 @@ def image_question_answering(image_path: str, prompt: str) -> str:
752
 
753
  return response.message.content.strip()
754
 
755
-
756
  class VisitWebpageTool(Tool):
757
  name = "visit_webpage"
758
  description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
@@ -987,7 +949,6 @@ class VisitWebpageTool(Tool):
987
 
988
  return content
989
 
990
-
991
  class ArxivSearchTool(Tool):
992
  name = "arxiv_search"
993
  description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
@@ -1045,13 +1006,6 @@ class ArxivSearchTool(Tool):
1045
 
1046
  return "\n".join(output_lines).strip()
1047
 
1048
-
1049
- from typing import Dict, List
1050
-
1051
- import requests
1052
- from bs4 import BeautifulSoup
1053
-
1054
-
1055
  def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
1056
  """
1057
  Fetches the given arXiv advanced‐search URL, parses the HTML,
@@ -1114,10 +1068,6 @@ def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
1114
 
1115
  return results
1116
 
1117
-
1118
- from urllib.parse import quote_plus
1119
-
1120
-
1121
  def build_arxiv_url(
1122
  query: str, from_date: str = None, to_date: str = None, size: int = 50
1123
  ) -> str:
 
35
  from smolagents import Tool, tool
36
  from smolagents.utils import truncate_content
37
 
38
+ from typing import Dict, List
39
+
40
+ import requests
41
+ from bs4 import BeautifulSoup
42
+ from urllib.parse import quote_plus
43
 
44
  class ReadFileContentTool(Tool):
45
  name = "read_file_content"
 
164
  return f"Error wiki: {e}"
165
 
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  class TranscribeAudioTool(Tool):
168
  name = "transcribe_audio"
169
  description = """Converts spoken content in audio files to text. Handles various audio formats and produces a transcript of the spoken content for analysis."""
 
544
 
545
  return f"❌ Failed to retrieve results after {max_retries} retries."
546
 
 
547
  huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
548
  api_key=os.environ["HF_TOKEN"], model_name="sentence-transformers/all-mpnet-base-v2"
549
  )
 
558
  ".htm",
559
  ]
560
 
 
561
  class AddDocumentToVectorStoreTool(Tool):
562
  name = "add_document_to_vector_store"
563
  description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
 
625
  traceback.print_exc()
626
  return f"Error: {e}"
627
 
 
628
  class QueryVectorStoreTool(Tool):
629
  name = "query_downloaded_documents"
630
  description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
 
679
  traceback.print_exc()
680
  return f"Error querying vector store: {e}"
681
 
 
682
  @tool
683
  def image_question_answering(image_path: str, prompt: str) -> str:
684
  """
 
715
 
716
  return response.message.content.strip()
717
 
 
718
  class VisitWebpageTool(Tool):
719
  name = "visit_webpage"
720
  description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
 
949
 
950
  return content
951
 
 
952
  class ArxivSearchTool(Tool):
953
  name = "arxiv_search"
954
  description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
 
1006
 
1007
  return "\n".join(output_lines).strip()
1008
 
 
 
 
 
 
 
 
1009
  def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
1010
  """
1011
  Fetches the given arXiv advanced‐search URL, parses the HTML,
 
1068
 
1069
  return results
1070
 
 
 
 
 
1071
  def build_arxiv_url(
1072
  query: str, from_date: str = None, to_date: str = None, size: int = 50
1073
  ) -> str: