try qwen
Browse files
agent.py
CHANGED
|
@@ -95,6 +95,7 @@ class BoomBot:
|
|
| 95 |
)
|
| 96 |
elif self.provider == "meta":
|
| 97 |
meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
|
|
|
| 98 |
# return OpenAIServerModel(
|
| 99 |
# model_id=meta_model,
|
| 100 |
# api_base="https://api.deepinfra.com/v1/openai",
|
|
@@ -147,29 +148,24 @@ class BoomBot:
|
|
| 147 |
download_file,
|
| 148 |
read_file_content,
|
| 149 |
visit_webpage,
|
| 150 |
-
transcribe_video,
|
| 151 |
transcribe_audio,
|
| 152 |
get_wikipedia_info,
|
| 153 |
arxiv_search,
|
| 154 |
add_doc_vectorstore,
|
| 155 |
retrieve_doc_vectorstore,
|
| 156 |
-
image_question_answering,
|
| 157 |
python_interpreter,
|
| 158 |
final_answer,
|
| 159 |
]
|
| 160 |
|
| 161 |
# Additional imports for the Python interpreter
|
| 162 |
additional_imports = [
|
|
|
|
| 163 |
"json",
|
| 164 |
"os",
|
| 165 |
"glob",
|
| 166 |
"pathlib",
|
| 167 |
-
"pandas",
|
| 168 |
-
"numpy",
|
| 169 |
-
"matplotlib",
|
| 170 |
-
"seaborn",
|
| 171 |
-
"sklearn",
|
| 172 |
-
"tqdm",
|
| 173 |
"argparse",
|
| 174 |
"pickle",
|
| 175 |
"io",
|
|
@@ -182,8 +178,20 @@ class BoomBot:
|
|
| 182 |
"zipfile",
|
| 183 |
"itertools",
|
| 184 |
"functools",
|
| 185 |
-
"
|
| 186 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
]
|
| 188 |
|
| 189 |
# Create the agent
|
|
@@ -211,64 +219,68 @@ class BoomBot:
|
|
| 211 |
"""
|
| 212 |
return """
|
| 213 |
YOUR BEHAVIOR GUIDELINES:
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
RESEARCH WORKFLOW:
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
FALLBACK & ADAPTATION:
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
• MEDIA ANALYSIS:
|
| 261 |
-
download_file_from_link → transcribe_video/transcribe_audio/describe_image → final_answer
|
| 262 |
-
|
| 263 |
FINAL ANSWER FORMAT:
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
- Conclude with: FINAL ANSWER: <your_answer>
|
| 270 |
"""
|
| 271 |
|
|
|
|
| 272 |
def run(self, question: str, task_id: str, to_download) -> str:
|
| 273 |
"""
|
| 274 |
Run the agent with the given question, task_id, and download flag.
|
|
@@ -307,53 +319,101 @@ class BoomBot:
|
|
| 307 |
|
| 308 |
|
| 309 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 310 |
import time
|
| 311 |
-
from utils import load_online_qas, extract_final_answer
|
| 312 |
import requests
|
| 313 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
agent = BoomBot(provider="gemma")
|
| 316 |
-
|
| 317 |
-
|
|
|
|
|
|
|
| 318 |
|
| 319 |
excluded_keywords = ["youtube", "video", "chess"]
|
|
|
|
| 320 |
|
|
|
|
| 321 |
for entry in file_online:
|
| 322 |
-
task_id
|
| 323 |
-
question
|
| 324 |
real_answer = entry["Final answer"]
|
| 325 |
-
file_name
|
| 326 |
-
to_download = file_name
|
| 327 |
-
link
|
| 328 |
|
| 329 |
-
# Check exclusion and file availability
|
| 330 |
if any(kw in question.lower() for kw in excluded_keywords):
|
| 331 |
-
llm_answer = "NOT ATTEMPTED"
|
| 332 |
-
processed_answer = llm_answer
|
| 333 |
else:
|
| 334 |
try:
|
| 335 |
-
|
| 336 |
-
if
|
| 337 |
-
llm_answer = "NOT ATTEMPTED"
|
| 338 |
-
processed_answer = llm_answer
|
| 339 |
else:
|
| 340 |
-
|
| 341 |
llm_answer = agent.run(question, task_id, to_download)
|
| 342 |
-
|
| 343 |
# time.sleep(10)
|
| 344 |
except Exception as e:
|
| 345 |
-
llm_answer =
|
| 346 |
# time.sleep(6)
|
| 347 |
|
| 348 |
-
|
| 349 |
-
"
|
| 350 |
-
"
|
| 351 |
-
"
|
| 352 |
-
"
|
|
|
|
|
|
|
| 353 |
})
|
|
|
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
print("REAL ANSWER:", real_answer)
|
| 356 |
|
| 357 |
-
#
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
|
|
| 95 |
)
|
| 96 |
elif self.provider == "meta":
|
| 97 |
meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
| 98 |
+
meta_model = "Qwen/Qwen2.5-72B-Instruct"
|
| 99 |
# return OpenAIServerModel(
|
| 100 |
# model_id=meta_model,
|
| 101 |
# api_base="https://api.deepinfra.com/v1/openai",
|
|
|
|
| 148 |
download_file,
|
| 149 |
read_file_content,
|
| 150 |
visit_webpage,
|
| 151 |
+
# transcribe_video,
|
| 152 |
transcribe_audio,
|
| 153 |
get_wikipedia_info,
|
| 154 |
arxiv_search,
|
| 155 |
add_doc_vectorstore,
|
| 156 |
retrieve_doc_vectorstore,
|
| 157 |
+
# image_question_answering,
|
| 158 |
python_interpreter,
|
| 159 |
final_answer,
|
| 160 |
]
|
| 161 |
|
| 162 |
# Additional imports for the Python interpreter
|
| 163 |
additional_imports = [
|
| 164 |
+
# Built-in / core Python
|
| 165 |
"json",
|
| 166 |
"os",
|
| 167 |
"glob",
|
| 168 |
"pathlib",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
"argparse",
|
| 170 |
"pickle",
|
| 171 |
"io",
|
|
|
|
| 178 |
"zipfile",
|
| 179 |
"itertools",
|
| 180 |
"functools",
|
| 181 |
+
"requests",
|
| 182 |
+
"bs4",
|
| 183 |
+
# Data handling
|
| 184 |
+
"pandas",
|
| 185 |
+
"numpy",
|
| 186 |
+
"dask", # For handling large datasets
|
| 187 |
+
"polars", # Fast DataFrame alternative
|
| 188 |
+
"pyarrow", # For Arrow/Parquet file formats
|
| 189 |
+
"h5py", # For HDF5 files
|
| 190 |
+
"openpyxl", # Excel reading/writing
|
| 191 |
+
"yaml", # Config file parsing
|
| 192 |
+
# Basic plotting
|
| 193 |
+
"matplotlib",
|
| 194 |
+
"seaborn"
|
| 195 |
]
|
| 196 |
|
| 197 |
# Create the agent
|
|
|
|
| 219 |
"""
|
| 220 |
return """
|
| 221 |
YOUR BEHAVIOR GUIDELINES:
|
| 222 |
+
• Do NOT make unfounded assumptions—always ground answers in reliable sources or search results.
|
| 223 |
+
• For math or puzzles: break the problem into code/math, then solve programmatically.
|
| 224 |
+
|
| 225 |
RESEARCH WORKFLOW:
|
| 226 |
+
1. SEARCH
|
| 227 |
+
- Begin with web_search, wikipedia_search, or arxiv_search.
|
| 228 |
+
- Refine your query if results are weak—don't just retry the same terms.
|
| 229 |
+
- If one search tool yields little, try another before moving on to downloads.
|
| 230 |
+
|
| 231 |
+
2. VISIT
|
| 232 |
+
- Use visit_webpage to preview content from promising links.
|
| 233 |
+
- If the content is long, complex, spans multiple pages, or may be needed later, do NOT rely solely on visit_webpage.
|
| 234 |
+
- Move quickly to downloading: avoid repeated visits when the content should be archived.
|
| 235 |
+
|
| 236 |
+
3. DOWNLOAD (MANDATORY IF CONTENT IS LONG, DENSE, OR CRUCIAL)
|
| 237 |
+
- Use download_file_from_link on all valuable resources (including html pages or pdfs).
|
| 238 |
+
- Especially when a page is detailed, technical, or multi-part, downloading is preferred.
|
| 239 |
+
- You can (and should) download webpages as HTML. Do this whenever the site might be referenced again later.
|
| 240 |
+
|
| 241 |
+
4. INDEX & QUERY
|
| 242 |
+
- Immediately add downloaded files to the vector store using add_document_to_vector_store.
|
| 243 |
+
- For complex tasks or unclear answers, prefer querying vector store over re-visiting pages.
|
| 244 |
+
- If you've downloaded a file, **always index it unless clearly irrelevant.**
|
| 245 |
+
|
| 246 |
+
5. READ
|
| 247 |
+
- Use read_file_content to analyze file contents (html, pdf, text).
|
| 248 |
+
- You can also use query_downloaded_documents for deeper understanding.
|
| 249 |
+
|
| 250 |
+
6. EVALUATE
|
| 251 |
+
- ✅ If the answer is clear from current sources, respond.
|
| 252 |
+
- ❌ If not, continue iterating and analyzing downloaded material.
|
| 253 |
+
|
| 254 |
FALLBACK & ADAPTATION:
|
| 255 |
+
• If a tool fails, reformulate or switch tools.
|
| 256 |
+
• For arXiv: web_search might help you find the paper; follow with direct download of the PDF via download_file_from_link.
|
| 257 |
+
|
| 258 |
+
MANDATORY DOWNLOAD & INDEX WHEN:
|
| 259 |
+
• The page is lengthy or technical (e.g., research papers, government sites, legal docs, blog posts with code).
|
| 260 |
+
• You suspect you'll need to return to the content.
|
| 261 |
+
• You are working on multi-hop reasoning or long-term memory tasks.
|
| 262 |
+
|
| 263 |
+
COMMON TOOL CHAINS:
|
| 264 |
+
• FACTUAL Qs:
|
| 265 |
+
web_search → final_answer
|
| 266 |
+
• CURRENT EVENTS:
|
| 267 |
+
web_search → visit_webpage → (download + index if needed) → final_answer
|
| 268 |
+
• DOCUMENT-BASED Qs:
|
| 269 |
+
web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
|
| 270 |
+
• ARXIV PAPERS:
|
| 271 |
+
arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
|
| 272 |
+
• MEDIA ANALYSIS:
|
| 273 |
+
download_file_from_link → transcribe_audio → final_answer
|
| 274 |
+
|
|
|
|
|
|
|
|
|
|
| 275 |
FINAL ANSWER FORMAT:
|
| 276 |
+
- Begin with "FINAL ANSWER: "
|
| 277 |
+
- Number → digits only (e.g., 42)
|
| 278 |
+
- String → exact text (e.g., Pope Francis) without quotation marks
|
| 279 |
+
- List → comma-separated, no brackets unless specified (e.g., 2, 3, 4)
|
| 280 |
+
- End with: FINAL ANSWER: <your_answer>
|
|
|
|
| 281 |
"""
|
| 282 |
|
| 283 |
+
|
| 284 |
def run(self, question: str, task_id: str, to_download) -> str:
|
| 285 |
"""
|
| 286 |
Run the agent with the given question, task_id, and download flag.
|
|
|
|
| 319 |
|
| 320 |
|
| 321 |
if __name__ == "__main__":
|
| 322 |
+
import os
|
| 323 |
+
import csv
|
| 324 |
import time
|
|
|
|
| 325 |
import requests
|
| 326 |
+
from utils import load_online_qas, extract_final_answer
|
| 327 |
+
|
| 328 |
+
CSV_FILE = "evals/llm_eval.csv"
|
| 329 |
+
FIELDNAMES = ["model", "task_id", "question", "llm_answer", "processed_answer", "real_answer"]
|
| 330 |
+
|
| 331 |
+
def ensure_csv():
|
| 332 |
+
"""Create the CSV file with header if it doesn't exist."""
|
| 333 |
+
if not os.path.isfile(CSV_FILE):
|
| 334 |
+
with open(CSV_FILE, mode="w", newline="", encoding="utf-8") as f:
|
| 335 |
+
writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
|
| 336 |
+
writer.writeheader()
|
| 337 |
+
|
| 338 |
+
def append_results(rows):
|
| 339 |
+
"""Append a list of dict rows to the CSV."""
|
| 340 |
+
with open(CSV_FILE, mode="a", newline="", encoding="utf-8") as f:
|
| 341 |
+
writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
|
| 342 |
+
for row in rows:
|
| 343 |
+
writer.writerow(row)
|
| 344 |
|
| 345 |
agent = BoomBot(provider="gemma")
|
| 346 |
+
model_name = agent.provider # e.g. "gemma"
|
| 347 |
+
|
| 348 |
+
file_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=True)
|
| 349 |
+
nofile_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=False)
|
| 350 |
|
| 351 |
excluded_keywords = ["youtube", "video", "chess"]
|
| 352 |
+
rows_to_append = []
|
| 353 |
|
| 354 |
+
# 1) With downloadable files
|
| 355 |
for entry in file_online:
|
| 356 |
+
task_id = entry["task_id"]
|
| 357 |
+
question = entry["Question"]
|
| 358 |
real_answer = entry["Final answer"]
|
| 359 |
+
file_name = entry.get("file_name", "")
|
| 360 |
+
to_download = bool(file_name)
|
| 361 |
+
link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 362 |
|
|
|
|
| 363 |
if any(kw in question.lower() for kw in excluded_keywords):
|
| 364 |
+
llm_answer = processed = "NOT ATTEMPTED"
|
|
|
|
| 365 |
else:
|
| 366 |
try:
|
| 367 |
+
resp = requests.get(link)
|
| 368 |
+
if resp.status_code != 200:
|
| 369 |
+
llm_answer = processed = "NOT ATTEMPTED"
|
|
|
|
| 370 |
else:
|
|
|
|
| 371 |
llm_answer = agent.run(question, task_id, to_download)
|
| 372 |
+
processed = extract_final_answer(llm_answer).strip()
|
| 373 |
# time.sleep(10)
|
| 374 |
except Exception as e:
|
| 375 |
+
llm_answer = processed = f"[Error] {e}"
|
| 376 |
# time.sleep(6)
|
| 377 |
|
| 378 |
+
rows_to_append.append({
|
| 379 |
+
"model": model_name,
|
| 380 |
+
"task_id": task_id,
|
| 381 |
+
"question": question,
|
| 382 |
+
"llm_answer": llm_answer,
|
| 383 |
+
"processed_answer": processed,
|
| 384 |
+
"real_answer": real_answer,
|
| 385 |
})
|
| 386 |
+
print("REAL ANSWER:", real_answer)
|
| 387 |
|
| 388 |
+
# 2) Without downloadable files
|
| 389 |
+
for entry in nofile_online:
|
| 390 |
+
task_id = entry["task_id"]
|
| 391 |
+
question = entry["Question"]
|
| 392 |
+
real_answer = entry["Final answer"]
|
| 393 |
+
|
| 394 |
+
if any(kw in question.lower() for kw in excluded_keywords):
|
| 395 |
+
llm_answer = processed = "NOT ATTEMPTED"
|
| 396 |
+
else:
|
| 397 |
+
try:
|
| 398 |
+
llm_answer = agent.run(question, task_id, to_download=False)
|
| 399 |
+
processed = extract_final_answer(llm_answer).strip()
|
| 400 |
+
# time.sleep(10)
|
| 401 |
+
except Exception as e:
|
| 402 |
+
llm_answer = processed = f"[Error] {e}"
|
| 403 |
+
# time.sleep(6)
|
| 404 |
+
|
| 405 |
+
rows_to_append.append({
|
| 406 |
+
"model": model_name,
|
| 407 |
+
"task_id": task_id,
|
| 408 |
+
"question": question,
|
| 409 |
+
"llm_answer": llm_answer,
|
| 410 |
+
"processed_answer": processed,
|
| 411 |
+
"real_answer": real_answer,
|
| 412 |
+
})
|
| 413 |
print("REAL ANSWER:", real_answer)
|
| 414 |
|
| 415 |
+
# ensure CSV exists and append
|
| 416 |
+
ensure_csv()
|
| 417 |
+
append_results(rows_to_append)
|
| 418 |
+
|
| 419 |
+
print(f"✅ Appended {len(rows_to_append)} rows to {CSV_FILE}")
|