Spaces:
Sleeping
Sleeping
small fix
Browse files
agent.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
import os
|
| 2 |
-
from typing import Bool
|
| 3 |
-
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
# Import models from SmolaAgents
|
|
@@ -10,7 +8,7 @@ from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel
|
|
| 10 |
from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool
|
| 11 |
|
| 12 |
# Import custom tools
|
| 13 |
-
from
|
| 14 |
AddDocumentToVectorStoreTool,
|
| 15 |
ArxivSearchTool,
|
| 16 |
DownloadFileFromLinkTool,
|
|
@@ -21,16 +19,15 @@ from Final_Assignment_Template.tools import (
|
|
| 21 |
TranscribeAudioTool,
|
| 22 |
VisitWebpageTool,
|
| 23 |
WikipediaSearchTool,
|
|
|
|
| 24 |
)
|
| 25 |
|
| 26 |
# Import utility functions
|
| 27 |
from utils import extract_final_answer, replace_tool_mentions
|
| 28 |
|
| 29 |
-
# Import tools from LangChain
|
| 30 |
-
|
| 31 |
|
| 32 |
class BoomBot:
|
| 33 |
-
def __init__(self, provider="
|
| 34 |
"""
|
| 35 |
Initialize the BoomBot with the specified provider.
|
| 36 |
|
|
@@ -134,6 +131,7 @@ class BoomBot:
|
|
| 134 |
arxiv_search,
|
| 135 |
add_doc_vectorstore,
|
| 136 |
retrieve_doc_vectorstore,
|
|
|
|
| 137 |
python_interpreter,
|
| 138 |
final_answer,
|
| 139 |
]
|
|
@@ -246,7 +244,7 @@ class BoomBot:
|
|
| 246 |
- Conclude with: FINAL ANSWER: <your_answer>
|
| 247 |
"""
|
| 248 |
|
| 249 |
-
def run(self, question: str, task_id: str, to_download
|
| 250 |
"""
|
| 251 |
Run the agent with the given question, task_id, and download flag.
|
| 252 |
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
|
| 4 |
# Import models from SmolaAgents
|
|
|
|
| 8 |
from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool
|
| 9 |
|
| 10 |
# Import custom tools
|
| 11 |
+
from tools import (
|
| 12 |
AddDocumentToVectorStoreTool,
|
| 13 |
ArxivSearchTool,
|
| 14 |
DownloadFileFromLinkTool,
|
|
|
|
| 19 |
TranscribeAudioTool,
|
| 20 |
VisitWebpageTool,
|
| 21 |
WikipediaSearchTool,
|
| 22 |
+
image_question_answering
|
| 23 |
)
|
| 24 |
|
| 25 |
# Import utility functions
|
| 26 |
from utils import extract_final_answer, replace_tool_mentions
|
| 27 |
|
|
|
|
|
|
|
| 28 |
|
| 29 |
class BoomBot:
|
| 30 |
+
def __init__(self, provider="meta"):
|
| 31 |
"""
|
| 32 |
Initialize the BoomBot with the specified provider.
|
| 33 |
|
|
|
|
| 131 |
arxiv_search,
|
| 132 |
add_doc_vectorstore,
|
| 133 |
retrieve_doc_vectorstore,
|
| 134 |
+
image_question_answering,
|
| 135 |
python_interpreter,
|
| 136 |
final_answer,
|
| 137 |
]
|
|
|
|
| 244 |
- Conclude with: FINAL ANSWER: <your_answer>
|
| 245 |
"""
|
| 246 |
|
| 247 |
+
def run(self, question: str, task_id: str, to_download) -> str:
|
| 248 |
"""
|
| 249 |
Run the agent with the given question, task_id, and download flag.
|
| 250 |
|
tools.py
CHANGED
|
@@ -35,6 +35,11 @@ from markdownify import markdownify
|
|
| 35 |
from smolagents import Tool, tool
|
| 36 |
from smolagents.utils import truncate_content
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
class ReadFileContentTool(Tool):
|
| 40 |
name = "read_file_content"
|
|
@@ -159,44 +164,6 @@ class WikipediaSearchTool(Tool):
|
|
| 159 |
return f"Error wiki: {e}"
|
| 160 |
|
| 161 |
|
| 162 |
-
class VisitWebpageTool(Tool):
|
| 163 |
-
name = "visit_webpage"
|
| 164 |
-
description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
|
| 165 |
-
inputs = {
|
| 166 |
-
"url": {
|
| 167 |
-
"type": "string",
|
| 168 |
-
"description": "The url of the webpage to visit.",
|
| 169 |
-
}
|
| 170 |
-
}
|
| 171 |
-
output_type = "string"
|
| 172 |
-
|
| 173 |
-
def forward(self, url: str) -> str:
|
| 174 |
-
try:
|
| 175 |
-
import re
|
| 176 |
-
|
| 177 |
-
import requests
|
| 178 |
-
from markdownify import markdownify
|
| 179 |
-
from requests.exceptions import RequestException
|
| 180 |
-
from smolagents.utils import truncate_content
|
| 181 |
-
except ImportError as e:
|
| 182 |
-
raise ImportError(
|
| 183 |
-
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
| 184 |
-
) from e
|
| 185 |
-
try:
|
| 186 |
-
response = requests.get(url, timeout=20)
|
| 187 |
-
response.raise_for_status() # Raise an exception for bad status codes
|
| 188 |
-
markdown_content = markdownify(response.text).strip()
|
| 189 |
-
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
| 190 |
-
return truncate_content(markdown_content, 5000)
|
| 191 |
-
|
| 192 |
-
except requests.exceptions.Timeout:
|
| 193 |
-
return "The request timed out. Please try again later or check the URL."
|
| 194 |
-
except RequestException as e:
|
| 195 |
-
return f"Error fetching the webpage: {str(e)}"
|
| 196 |
-
except Exception as e:
|
| 197 |
-
return f"An unexpected error occurred: {str(e)}"
|
| 198 |
-
|
| 199 |
-
|
| 200 |
class TranscribeAudioTool(Tool):
|
| 201 |
name = "transcribe_audio"
|
| 202 |
description = """Converts spoken content in audio files to text. Handles various audio formats and produces a transcript of the spoken content for analysis."""
|
|
@@ -577,7 +544,6 @@ class DuckDuckGoSearchTool(Tool):
|
|
| 577 |
|
| 578 |
return f"❌ Failed to retrieve results after {max_retries} retries."
|
| 579 |
|
| 580 |
-
|
| 581 |
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
|
| 582 |
api_key=os.environ["HF_TOKEN"], model_name="sentence-transformers/all-mpnet-base-v2"
|
| 583 |
)
|
|
@@ -592,7 +558,6 @@ SUPPORTED_EXTENSIONS = [
|
|
| 592 |
".htm",
|
| 593 |
]
|
| 594 |
|
| 595 |
-
|
| 596 |
class AddDocumentToVectorStoreTool(Tool):
|
| 597 |
name = "add_document_to_vector_store"
|
| 598 |
description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
|
|
@@ -660,7 +625,6 @@ class AddDocumentToVectorStoreTool(Tool):
|
|
| 660 |
traceback.print_exc()
|
| 661 |
return f"Error: {e}"
|
| 662 |
|
| 663 |
-
|
| 664 |
class QueryVectorStoreTool(Tool):
|
| 665 |
name = "query_downloaded_documents"
|
| 666 |
description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
|
|
@@ -715,7 +679,6 @@ class QueryVectorStoreTool(Tool):
|
|
| 715 |
traceback.print_exc()
|
| 716 |
return f"Error querying vector store: {e}"
|
| 717 |
|
| 718 |
-
|
| 719 |
@tool
|
| 720 |
def image_question_answering(image_path: str, prompt: str) -> str:
|
| 721 |
"""
|
|
@@ -752,7 +715,6 @@ def image_question_answering(image_path: str, prompt: str) -> str:
|
|
| 752 |
|
| 753 |
return response.message.content.strip()
|
| 754 |
|
| 755 |
-
|
| 756 |
class VisitWebpageTool(Tool):
|
| 757 |
name = "visit_webpage"
|
| 758 |
description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
|
|
@@ -987,7 +949,6 @@ class VisitWebpageTool(Tool):
|
|
| 987 |
|
| 988 |
return content
|
| 989 |
|
| 990 |
-
|
| 991 |
class ArxivSearchTool(Tool):
|
| 992 |
name = "arxiv_search"
|
| 993 |
description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
|
|
@@ -1045,13 +1006,6 @@ class ArxivSearchTool(Tool):
|
|
| 1045 |
|
| 1046 |
return "\n".join(output_lines).strip()
|
| 1047 |
|
| 1048 |
-
|
| 1049 |
-
from typing import Dict, List
|
| 1050 |
-
|
| 1051 |
-
import requests
|
| 1052 |
-
from bs4 import BeautifulSoup
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
|
| 1056 |
"""
|
| 1057 |
Fetches the given arXiv advanced‐search URL, parses the HTML,
|
|
@@ -1114,10 +1068,6 @@ def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
|
|
| 1114 |
|
| 1115 |
return results
|
| 1116 |
|
| 1117 |
-
|
| 1118 |
-
from urllib.parse import quote_plus
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
def build_arxiv_url(
|
| 1122 |
query: str, from_date: str = None, to_date: str = None, size: int = 50
|
| 1123 |
) -> str:
|
|
|
|
| 35 |
from smolagents import Tool, tool
|
| 36 |
from smolagents.utils import truncate_content
|
| 37 |
|
| 38 |
+
from typing import Dict, List
|
| 39 |
+
|
| 40 |
+
import requests
|
| 41 |
+
from bs4 import BeautifulSoup
|
| 42 |
+
from urllib.parse import quote_plus
|
| 43 |
|
| 44 |
class ReadFileContentTool(Tool):
|
| 45 |
name = "read_file_content"
|
|
|
|
| 164 |
return f"Error wiki: {e}"
|
| 165 |
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
class TranscribeAudioTool(Tool):
|
| 168 |
name = "transcribe_audio"
|
| 169 |
description = """Converts spoken content in audio files to text. Handles various audio formats and produces a transcript of the spoken content for analysis."""
|
|
|
|
| 544 |
|
| 545 |
return f"❌ Failed to retrieve results after {max_retries} retries."
|
| 546 |
|
|
|
|
| 547 |
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
|
| 548 |
api_key=os.environ["HF_TOKEN"], model_name="sentence-transformers/all-mpnet-base-v2"
|
| 549 |
)
|
|
|
|
| 558 |
".htm",
|
| 559 |
]
|
| 560 |
|
|
|
|
| 561 |
class AddDocumentToVectorStoreTool(Tool):
|
| 562 |
name = "add_document_to_vector_store"
|
| 563 |
description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
|
|
|
|
| 625 |
traceback.print_exc()
|
| 626 |
return f"Error: {e}"
|
| 627 |
|
|
|
|
| 628 |
class QueryVectorStoreTool(Tool):
|
| 629 |
name = "query_downloaded_documents"
|
| 630 |
description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
|
|
|
|
| 679 |
traceback.print_exc()
|
| 680 |
return f"Error querying vector store: {e}"
|
| 681 |
|
|
|
|
| 682 |
@tool
|
| 683 |
def image_question_answering(image_path: str, prompt: str) -> str:
|
| 684 |
"""
|
|
|
|
| 715 |
|
| 716 |
return response.message.content.strip()
|
| 717 |
|
|
|
|
| 718 |
class VisitWebpageTool(Tool):
|
| 719 |
name = "visit_webpage"
|
| 720 |
description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
|
|
|
|
| 949 |
|
| 950 |
return content
|
| 951 |
|
|
|
|
| 952 |
class ArxivSearchTool(Tool):
|
| 953 |
name = "arxiv_search"
|
| 954 |
description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
|
|
|
|
| 1006 |
|
| 1007 |
return "\n".join(output_lines).strip()
|
| 1008 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1009 |
def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
|
| 1010 |
"""
|
| 1011 |
Fetches the given arXiv advanced‐search URL, parses the HTML,
|
|
|
|
| 1068 |
|
| 1069 |
return results
|
| 1070 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1071 |
def build_arxiv_url(
|
| 1072 |
query: str, from_date: str = None, to_date: str = None, size: int = 50
|
| 1073 |
) -> str:
|