mdicio commited on
Commit
08eb725
·
1 Parent(s): dbcccc7
Files changed (5) hide show
  1. .gitignore +2 -1
  2. agent.py +52 -5
  3. requirements.txt +2 -1
  4. tools.py +2 -1
  5. utils.py +154 -0
.gitignore CHANGED
@@ -6,4 +6,5 @@ downloads/
6
  .python_version
7
  *.jsonl
8
  *__pycache__/
9
- *.log
 
 
6
  .python_version
7
  *.jsonl
8
  *__pycache__/
9
+ *.log
10
+ evals/
agent.py CHANGED
@@ -182,6 +182,7 @@ class BoomBot:
182
  "zipfile",
183
  "itertools",
184
  "functools",
 
185
  ]
186
 
187
  # Create the agent
@@ -302,9 +303,55 @@ class BoomBot:
302
  return final_answer
303
 
304
 
305
- # Example of how to use this code (commented out)
306
  if __name__ == "__main__":
307
- agent = BoomBot(provider="meta")
308
- question = "In the year 2020, where were koi fish found in the watershed with the id 02040203? Give only the name of the pond, lake, or stream where the fish were found, and not the name of the city or county."
309
- response = agent.run(question=question, task_id="1", to_download=False)
310
- print(f"Response: {response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  "zipfile",
183
  "itertools",
184
  "functools",
185
+ "open"
186
  ]
187
 
188
  # Create the agent
 
303
  return final_answer
304
 
305
 
306
+
307
  if __name__ == "__main__":
308
+ import time
309
+ from utils import load_online_qas, extract_final_answer
310
+ import requests
311
+ import json
312
+
313
+ agent = BoomBot(provider="gemma")
314
+ file_online = load_online_qas(file_path = r"../../Final_Assignment_Template/allqas.jsonl", has_file=True)
315
+ results = []
316
+
317
+ excluded_keywords = ["youtube", "video", "chess"]
318
+
319
+ for entry in file_online:
320
+ task_id = entry["task_id"]
321
+ question = entry["Question"]
322
+ real_answer = entry["Final answer"]
323
+ file_name = entry.get("file_name", "")
324
+ to_download = file_name != ""
325
+ link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
326
+
327
+ # Check exclusion and file availability
328
+ if any(kw in question.lower() for kw in excluded_keywords):
329
+ llm_answer = "NOT ATTEMPTED"
330
+ processed_answer = llm_answer
331
+ else:
332
+ try:
333
+ response = requests.get(link)
334
+ if response.status_code != 200:
335
+ llm_answer = "NOT ATTEMPTED"
336
+ processed_answer = llm_answer
337
+ else:
338
+
339
+ llm_answer = agent.run(question, task_id, to_download)
340
+ processed_answer = str(extract_final_answer(llm_answer))
341
+ # time.sleep(10)
342
+ except Exception as e:
343
+ llm_answer = processed_answer = f"[Error] {e}"
344
+ # time.sleep(6)
345
+
346
+ results.append({
347
+ "question": question,
348
+ "llm_answer": llm_answer,
349
+ "processed_answer": processed_answer.strip(),
350
+ "real_answer": real_answer
351
+ })
352
+
353
+ print("REAL ANSWER:", real_answer)
354
+
355
+ # Save all results to file
356
+ with open("llm_eval.json", "w", encoding="utf-8") as f:
357
+ json.dump(results, f, indent=2, ensure_ascii=False)
requirements.txt CHANGED
@@ -10,4 +10,5 @@ duckduckgo-search
10
  langchain_community
11
  markdownify
12
  smolagents[litellm]
13
- smolagents[openai]
 
 
10
  langchain_community
11
  markdownify
12
  smolagents[litellm]
13
+ smolagents[openai]
14
+ openpyxl
tools.py CHANGED
@@ -479,7 +479,7 @@ class DuckDuckGoSearchTool(Tool):
479
  }
480
  output_type = "string"
481
 
482
- def _configure(self, max_retries: int = 3, retry_sleep: int = 3):
483
  self._max_retries = max_retries
484
  self._retry_sleep = retry_sleep
485
 
@@ -529,6 +529,7 @@ class DuckDuckGoSearchTool(Tool):
529
  ConversationLimitException,
530
  ) as e:
531
  retries += 1
 
532
  print(
533
  f"⚠️ DuckDuckGo Exception (Attempt {retries}/{max_retries}): {type(e).__name__}: {e}"
534
  )
 
479
  }
480
  output_type = "string"
481
 
482
+ def _configure(self, max_retries: int = 5, retry_sleep: int = 2):
483
  self._max_retries = max_retries
484
  self._retry_sleep = retry_sleep
485
 
 
529
  ConversationLimitException,
530
  ) as e:
531
  retries += 1
532
+ self._retry_sleep +=2
533
  print(
534
  f"⚠️ DuckDuckGo Exception (Attempt {retries}/{max_retries}): {type(e).__name__}: {e}"
535
  )
utils.py CHANGED
@@ -1,4 +1,6 @@
1
  import re
 
 
2
 
3
 
4
  def extract_final_answer(output: str) -> str:
@@ -34,3 +36,155 @@ def replace_tool_mentions(prompt: str) -> str:
34
  prompt = re.sub(r"(?<!\w)(?<!_)wiki\(", "wikipedia_search(", prompt)
35
 
36
  return prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
+ import json
3
+ from typing import List, Union, Optional
4
 
5
 
6
  def extract_final_answer(output: str) -> str:
 
36
  prompt = re.sub(r"(?<!\w)(?<!_)wiki\(", "wikipedia_search(", prompt)
37
 
38
  return prompt
39
+
40
+ def _question_matches(question: str, filters: Union[str, List[str]]) -> bool:
41
+ """Helper: check if question matches any string in filters."""
42
+ if isinstance(filters, str):
43
+ filters = [filters]
44
+ return any(f.lower() in question.lower() for f in filters)
45
+
46
+ def load_online_qas(
47
+ qa_type: Union[str, List[str]] = "all",
48
+ has_file: Optional[bool] = None,
49
+ file_path = "Final_Assignment_Template/allqas.jsonl"
50
+ ) -> List[dict]:
51
+ """
52
+ Load online QAs from example_gaiaqa.json.
53
+
54
+ Parameters:
55
+ - qa_type: str or List[str], used to match substrings in the Question. Use "all" for no filtering.
56
+ - has_file: bool or None, filters QAs by presence of 'file_name':
57
+ - True: only include QAs with file_name
58
+ - False: only include QAs without file_name
59
+ - None: no file_name filtering
60
+ - file_path: a path
61
+
62
+ """
63
+ data = []
64
+ with open(file_path ,"r") as f:
65
+ for line in f:
66
+ entry = json.loads(line)
67
+ data.append(entry)
68
+
69
+ # Apply file presence filter
70
+ if has_file is True:
71
+ data = [qa for qa in data if qa.get("file_name", "").strip()]
72
+ elif has_file is False:
73
+ data = [qa for qa in data if not qa.get("file_name", "").strip()]
74
+
75
+ # Apply question content filter
76
+ if qa_type == "all":
77
+ return data
78
+
79
+ return [qa for qa in data if _question_matches(qa.get("Question", ""), qa_type)]
80
+
81
+
82
+ def load_test_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]:
83
+ """Loads test QAs with no attached files. Optionally filters by topic keywords in questions."""
84
+ test_docs = []
85
+ with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f:
86
+ for line in f:
87
+ entry = json.loads(line)
88
+ if entry.get("file_name", "").strip() == "":
89
+ test_docs.append(entry)
90
+
91
+ if qa_type == "all":
92
+ return [
93
+ {
94
+ "Question": e["Question"],
95
+ "Final answer": e.get("Final answer"),
96
+ "task_id": e["task_id"],
97
+ "tools": e.get("Annotator Metadata", {}).get("Tools"),
98
+ "file_name": e.get("file_name", "")
99
+ }
100
+ for e in test_docs
101
+ ]
102
+
103
+ return [
104
+ {
105
+ "Question": e["Question"],
106
+ "Final answer": e.get("Final answer"),
107
+ "task_id": e["task_id"],
108
+ "tools": e.get("Annotator Metadata", {}).get("Tools"),
109
+ "file_name": e.get("file_name", "")
110
+ }
111
+ for e in test_docs
112
+ if _question_matches(e["Question"], qa_type)
113
+ ]
114
+
115
+
116
+ def load_val_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]:
117
+ """Loads validation QAs with no attached files. Optionally filters by topic keywords in questions."""
118
+ val_docs = []
119
+ with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f:
120
+ for line in f:
121
+ entry = json.loads(line)
122
+ if entry.get("file_name", "").strip() == "":
123
+ val_docs.append(entry)
124
+
125
+ if qa_type == "all":
126
+ return [
127
+ {
128
+ "Question": e["Question"],
129
+ "Final answer": e.get("Final answer"),
130
+ "task_id": e["task_id"],
131
+ "tools": e.get("Annotator Metadata", {}).get("Tools"),
132
+ "file_name": e.get("file_name", "")
133
+ }
134
+ for e in val_docs
135
+ ]
136
+
137
+ return [
138
+ {
139
+ "Question": e["Question"],
140
+ "Final answer": e.get("Final answer"),
141
+ "task_id": e["task_id"],
142
+ "tools": e.get("Annotator Metadata", {}).get("Tools"),
143
+ "file_name": e.get("file_name", "")
144
+ }
145
+ for e in val_docs
146
+ if _question_matches(e["Question"], qa_type)
147
+ ]
148
+ # import requests
149
+ # import json
150
+
151
+ # def fetch_and_save_questions(api_base_url: str, output_path: str):
152
+ # """
153
+ # Fetch all questions from the Agent Evaluation API and save them as JSONL.
154
+
155
+ # :param api_base_url: Base URL of the scoring API, e.g. "https://agents-course-unit4-scoring.hf.space"
156
+ # :param output_path: Path to the output .jsonl file
157
+ # """
158
+ # endpoint = f"{api_base_url}/questions"
159
+ # try:
160
+ # resp = requests.get(endpoint, timeout=30)
161
+ # resp.raise_for_status()
162
+ # questions = resp.json()
163
+ # except Exception as e:
164
+ # print(f"❌ Failed to fetch questions: {e}")
165
+ # return
166
+
167
+ # try:
168
+ # with open(output_path, "w", encoding="utf-8") as fout:
169
+ # for q in questions:
170
+ # fout.write(json.dumps(q, ensure_ascii=False) + "\n")
171
+ # print(f"✅ Saved {len(questions)} questions to {output_path}")
172
+ # except Exception as e:
173
+ # print(f"❌ Failed to write JSONL file: {e}")
174
+
175
+ # API_BASE = "https://agents-course-unit4-scoring.hf.space"
176
+ # OUTPUT_FILE = "questions.jsonl"
177
+ # fetch_and_save_questions(API_BASE, OUTPUT_FILE)
178
+
179
+
180
+ # dlf = DownloadFileFromTaskTool()
181
+ # for res in results:
182
+ # res = dlf.forward(task_id = res["task_id"])
183
+ # print(res)
184
+ # task_id = "cca530fc-4052-43b2-b130-b30968d8aa44"
185
+ # file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
186
+ # response = requests.get(file_url, timeout=15)
187
+
188
+ # print(response.content)
189
+ # print(response.headers.get("content-type", "").lower())
190
+ #print(response.headers)