|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel |
|
|
|
|
|
|
|
|
from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool |
|
|
|
|
|
|
|
|
from tools import ( |
|
|
AddDocumentToVectorStoreTool, |
|
|
ArxivSearchTool, |
|
|
DownloadFileFromLinkTool, |
|
|
DuckDuckGoSearchTool, |
|
|
QueryVectorStoreTool, |
|
|
ReadFileContentTool, |
|
|
TranscibeVideoFileTool, |
|
|
TranscribeAudioTool, |
|
|
VisitWebpageTool, |
|
|
WikipediaSearchTool, |
|
|
image_question_answering, |
|
|
) |
|
|
|
|
|
|
|
|
from utils import extract_final_answer, replace_tool_mentions |
|
|
|
|
|
|
|
|
class BoomBot: |
|
|
def __init__(self, provider="anthropic"): |
|
|
""" |
|
|
Initialize the BoomBot with the specified provider. |
|
|
|
|
|
Args: |
|
|
provider (str): The model provider to use (e.g., "groq", "qwen", "gemma", "anthropic", "deepinfra", "meta") |
|
|
""" |
|
|
load_dotenv() |
|
|
self.provider = provider |
|
|
self.model = self._initialize_model() |
|
|
self.agent = self._create_agent() |
|
|
|
|
|
def _initialize_model(self): |
|
|
""" |
|
|
Initialize the appropriate model based on the provider. |
|
|
|
|
|
Returns: |
|
|
The initialized model object |
|
|
""" |
|
|
if self.provider == "qwen": |
|
|
qwen_model = "ollama_chat/qwen3:8b" |
|
|
return LiteLLMModel( |
|
|
model_id=qwen_model, |
|
|
device="cuda", |
|
|
num_ctx=32768, |
|
|
temperature=0.6, |
|
|
top_p=0.95, |
|
|
) |
|
|
elif self.provider == "gemma": |
|
|
gemma_model = "ollama_chat/gemma3:12b-it-qat" |
|
|
return LiteLLMModel( |
|
|
model_id=gemma_model, |
|
|
num_ctx=65536, |
|
|
temperature=1.0, |
|
|
device="cuda", |
|
|
top_k=64, |
|
|
top_p=0.95, |
|
|
min_p=0.0, |
|
|
) |
|
|
elif self.provider == "anthropic": |
|
|
model_id = "anthropic/claude-3-5-haiku-latest" |
|
|
return LiteLLMModel( |
|
|
model_id=model_id, |
|
|
temperature=0.6, |
|
|
max_tokens=8192, |
|
|
api_key=os.getenv("ANTHROPIC_API_KEY"), |
|
|
) |
|
|
|
|
|
elif self.provider == "deepinfra": |
|
|
deepinfra_model = "Qwen/Qwen3-235B-A22B" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return LiteLLMModel( |
|
|
model_id="deepinfra/"+ deepinfra_model, |
|
|
api_base="https://api.deepinfra.com/v1/openai", |
|
|
api_key=os.getenv("DEEPINFRA_API_KEY"), |
|
|
flatten_messages_as_text=True, |
|
|
max_tokens=8192, |
|
|
temperature=0.7, |
|
|
) |
|
|
elif self.provider == "meta": |
|
|
meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return LiteLLMModel( |
|
|
model_id="deepinfra/"+ meta_model, |
|
|
api_base="https://api.deepinfra.com/v1/openai", |
|
|
api_key=os.getenv("DEEPINFRA_API_KEY"), |
|
|
flatten_messages_as_text=True, |
|
|
max_tokens=8192, |
|
|
temperature=0.7, |
|
|
) |
|
|
elif self.provider == "groq": |
|
|
|
|
|
model_id = "claude-3-opus-20240229" |
|
|
return LiteLLMModel(model_id=model_id, temperature=0.7, max_tokens=8192) |
|
|
else: |
|
|
raise ValueError(f"Unsupported provider: {self.provider}") |
|
|
|
|
|
def _create_agent(self): |
|
|
""" |
|
|
Create and configure the agent with all necessary tools. |
|
|
|
|
|
Returns: |
|
|
The configured CodeAgent |
|
|
""" |
|
|
|
|
|
download_file = DownloadFileFromLinkTool() |
|
|
read_file_content = ReadFileContentTool() |
|
|
visit_webpage = VisitWebpageTool() |
|
|
transcribe_video = TranscibeVideoFileTool() |
|
|
transcribe_audio = TranscribeAudioTool() |
|
|
get_wikipedia_info = WikipediaSearchTool() |
|
|
web_searcher = DuckDuckGoSearchTool() |
|
|
arxiv_search = ArxivSearchTool() |
|
|
add_doc_vectorstore = AddDocumentToVectorStoreTool() |
|
|
retrieve_doc_vectorstore = QueryVectorStoreTool() |
|
|
|
|
|
|
|
|
python_interpreter = PythonInterpreterTool() |
|
|
final_answer = FinalAnswerTool() |
|
|
|
|
|
|
|
|
agent_tools = [ |
|
|
web_searcher, |
|
|
download_file, |
|
|
read_file_content, |
|
|
visit_webpage, |
|
|
transcribe_video, |
|
|
transcribe_audio, |
|
|
get_wikipedia_info, |
|
|
arxiv_search, |
|
|
add_doc_vectorstore, |
|
|
retrieve_doc_vectorstore, |
|
|
image_question_answering, |
|
|
python_interpreter, |
|
|
final_answer, |
|
|
] |
|
|
|
|
|
|
|
|
additional_imports = [ |
|
|
"json", |
|
|
"os", |
|
|
"glob", |
|
|
"pathlib", |
|
|
"pandas", |
|
|
"numpy", |
|
|
"matplotlib", |
|
|
"seaborn", |
|
|
"sklearn", |
|
|
"tqdm", |
|
|
"argparse", |
|
|
"pickle", |
|
|
"io", |
|
|
"re", |
|
|
"datetime", |
|
|
"collections", |
|
|
"math", |
|
|
"random", |
|
|
"csv", |
|
|
"zipfile", |
|
|
"itertools", |
|
|
"functools", |
|
|
"open", |
|
|
"requests" |
|
|
] |
|
|
|
|
|
|
|
|
agent = CodeAgent( |
|
|
tools=agent_tools, |
|
|
max_steps=12, |
|
|
model=self.model, |
|
|
add_base_tools=False, |
|
|
stream_outputs=True, |
|
|
additional_authorized_imports=additional_imports, |
|
|
) |
|
|
|
|
|
|
|
|
modified_prompt = replace_tool_mentions(agent.system_prompt) |
|
|
agent.system_prompt = modified_prompt |
|
|
|
|
|
return agent |
|
|
|
|
|
def _get_system_prompt(self): |
|
|
""" |
|
|
Return the system prompt for the agent. |
|
|
|
|
|
Returns: |
|
|
str: The system prompt |
|
|
""" |
|
|
return """ |
|
|
YOUR BEHAVIOR GUIDELINES: |
|
|
β’ Do NOT make unfounded assumptionsβalways ground answers in reliable sources or search results. |
|
|
β’ For math or puzzles: break the problem into code/math, then solve programmatically. |
|
|
|
|
|
RESEARCH WORKFLOW: |
|
|
1. SEARCH |
|
|
- Try web_search, wikipedia_search, or arxiv_search first. |
|
|
- Refine your query rather than repeating the exact same terms. |
|
|
- If one search tool yields insufficient info, switch to another before downloading. |
|
|
2. VISIT |
|
|
- Use visit_webpage to extract and read page content when a promising link appears after one of the SEARCH tools. |
|
|
- For each visited link, also download the file and add to the vector store, you might need to query this later, especially if you have a lot of search results. |
|
|
3. EVALUATE |
|
|
- β
If the page or search snippet fully answers the question, respond immediately. |
|
|
- β If not, move on to deeper investigation. |
|
|
4. DOWNLOAD |
|
|
- Use download_file_from_link tool on relevant links found (yes you can download webpages as html). |
|
|
- For arXiv papers, target the /pdf/ or DOI link (e.g https://arxiv.org/pdf/2011.10672). |
|
|
5. INDEX & QUERY |
|
|
- Add downloaded documents to the vector store with add_document_to_vector_store. |
|
|
- Use query_downloaded_documents for detailed answers. |
|
|
6. READ |
|
|
- You have access to a read_file_content tool to read most types of files (html, pdf, text). |
|
|
- You can also directly interact with downloaded files (csv, excel) in your python code. |
|
|
- Use query_downloaded_documents if you have added docs to vector store. |
|
|
|
|
|
FALLBACK & ADAPTATION: |
|
|
β’ If a tool fails, reformulate your query or try a different search method before dropping to download. |
|
|
β’ If a tool fails multiple times, try a different tool. |
|
|
β’ For arXiv: you might discover a paper link via web_search tool and then directly use download_file_from_link tool |
|
|
|
|
|
COMMON TOOL CHAINS (conceptual outlines): |
|
|
These are just guidelines, each task might require a unique workflow. |
|
|
A tool can provide useful information for the task, it will not always contain the answer. You need to work to get to a final_answer that makes sense. |
|
|
|
|
|
β’ FACTUAL Qs: |
|
|
web_search β final_answer |
|
|
β’ CURRENT EVENTS: |
|
|
To have some summary information use web_search, that might output a promising website to visit and read content from using (visit_webpage or download_file_from_link and read_file_content) |
|
|
web_search β visit_webpage β final_answer |
|
|
β’ DOCUMENT-BASED Qs: |
|
|
web_search β download_file_from_link β add_document_to_vector_store β query_downloaded_documents β final_answer |
|
|
β’ ARXIV PAPERS: |
|
|
The arxiv search tool provides a list of results with summary content, to inspect the whole paper you need to download it with download_file_from_link tool. |
|
|
arxiv_search β download_file_from_link β read_file_content |
|
|
If that fails |
|
|
arxiv_search β download_file_from_link β add_document_to_vector_store β query_downloaded_documents |
|
|
β’ MEDIA ANALYSIS: |
|
|
download_file_from_link β transcribe_video/transcribe_audio/describe_image β final_answer |
|
|
|
|
|
FINAL ANSWER FORMAT: |
|
|
** Do not name any python variables final_answer, this causes problems with tools. |
|
|
- Begin with "FINAL ANSWER: " |
|
|
- Number β digits only (e.g., 42) no units unless specified |
|
|
- String β exact text (e.g., Pope Francis) without quotation marks |
|
|
- List β comma-separated, one space, no brackets unless specified(e.g., 2, 3, 4) |
|
|
- Conclude with: FINAL ANSWER: <your_answer> |
|
|
""" |
|
|
|
|
|
def run(self, question: str, task_id: str, to_download) -> str: |
|
|
""" |
|
|
Run the agent with the given question, task_id, and download flag. |
|
|
|
|
|
Args: |
|
|
question (str): The question or task for the agent to process |
|
|
task_id (str): A unique identifier for the task |
|
|
to_download (Bool): Flag indicating whether to download resources |
|
|
|
|
|
Returns: |
|
|
str: The agent's response |
|
|
""" |
|
|
prompt = self._get_system_prompt() |
|
|
|
|
|
prompt += "\nHere is the Task you need to solve:\n\n" |
|
|
prompt += f"Task: {question}\n\n" |
|
|
|
|
|
|
|
|
if to_download: |
|
|
link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" |
|
|
prompt += ( |
|
|
"IMPORTANT: Before solving the task, you must download a required file.\n" |
|
|
f"Use the `download_file_from_link` tool with this link: {link}\n" |
|
|
"After downloading, use the appropriate tool to read or process the file " |
|
|
"before attempting to solve the task.\n\n" |
|
|
) |
|
|
|
|
|
|
|
|
result = self.agent.run(prompt) |
|
|
|
|
|
|
|
|
final_answer = extract_final_answer(result) |
|
|
|
|
|
return final_answer |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import time |
|
|
from utils import load_online_qas, extract_final_answer |
|
|
import requests |
|
|
import json |
|
|
|
|
|
agent = BoomBot(provider="gemma") |
|
|
file_online = load_online_qas(file_path = r"../../Final_Assignment_Template/allqas.jsonl", has_file=True) |
|
|
results = [] |
|
|
|
|
|
excluded_keywords = ["youtube", "video", "chess"] |
|
|
|
|
|
for entry in file_online: |
|
|
task_id = entry["task_id"] |
|
|
question = entry["Question"] |
|
|
real_answer = entry["Final answer"] |
|
|
file_name = entry.get("file_name", "") |
|
|
to_download = file_name != "" |
|
|
link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" |
|
|
|
|
|
|
|
|
if any(kw in question.lower() for kw in excluded_keywords): |
|
|
llm_answer = "NOT ATTEMPTED" |
|
|
processed_answer = llm_answer |
|
|
else: |
|
|
try: |
|
|
response = requests.get(link) |
|
|
if response.status_code != 200: |
|
|
llm_answer = "NOT ATTEMPTED" |
|
|
processed_answer = llm_answer |
|
|
else: |
|
|
|
|
|
llm_answer = agent.run(question, task_id, to_download) |
|
|
processed_answer = str(extract_final_answer(llm_answer)) |
|
|
|
|
|
except Exception as e: |
|
|
llm_answer = processed_answer = f"[Error] {e}" |
|
|
|
|
|
|
|
|
results.append({ |
|
|
"question": question, |
|
|
"llm_answer": llm_answer, |
|
|
"processed_answer": processed_answer.strip(), |
|
|
"real_answer": real_answer |
|
|
}) |
|
|
|
|
|
print("REAL ANSWER:", real_answer) |
|
|
|
|
|
|
|
|
with open("llm_eval.json", "w", encoding="utf-8") as f: |
|
|
json.dump(results, f, indent=2, ensure_ascii=False) |
|
|
|