Final_Assignment_Template_CURR

Sleeping

App Files Files Community

mdicio commited on May 18, 2025

Commit

08eb725

1 Parent(s): dbcccc7

openpyxl

Browse files

Files changed (5) hide show

.gitignore +2 -1
agent.py +52 -5
requirements.txt +2 -1
tools.py +2 -1
utils.py +154 -0

.gitignore CHANGED Viewed

@@ -6,4 +6,5 @@ downloads/
 .python_version
 *.jsonl
 *__pycache__/
-*.log

 .python_version
 *.jsonl
 *__pycache__/
+*.log
+evals/

agent.py CHANGED Viewed

@@ -182,6 +182,7 @@ class BoomBot:
             "zipfile",
             "itertools",
             "functools",
         ]
         # Create the agent
@@ -302,9 +303,55 @@ class BoomBot:
         return final_answer
-# Example of how to use this code (commented out)
 if __name__ == "__main__":
-    agent = BoomBot(provider="meta")
-    question = "In the year 2020, where were koi fish found in the watershed with the id 02040203? Give only the name of the pond, lake, or stream where the fish were found, and not the name of the city or county."
-    response = agent.run(question=question, task_id="1", to_download=False)
-    print(f"Response: {response}")

             "zipfile",
             "itertools",
             "functools",
+            "open"
         ]
         # Create the agent
         return final_answer
 if __name__ == "__main__":
+    import time
+    from utils import load_online_qas, extract_final_answer
+    import requests
+    import json
+    agent = BoomBot(provider="gemma")
+    file_online = load_online_qas(file_path = r"../../Final_Assignment_Template/allqas.jsonl", has_file=True)
+    results = []
+    excluded_keywords = ["youtube", "video", "chess"]
+    for entry in file_online:
+        task_id = entry["task_id"]
+        question = entry["Question"]
+        real_answer = entry["Final answer"]
+        file_name = entry.get("file_name", "")
+        to_download = file_name != ""
+        link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+        # Check exclusion and file availability
+        if any(kw in question.lower() for kw in excluded_keywords):
+            llm_answer = "NOT ATTEMPTED"
+            processed_answer = llm_answer
+        else:
+            try:
+                response = requests.get(link)
+                if response.status_code != 200:
+                    llm_answer = "NOT ATTEMPTED"
+                    processed_answer = llm_answer
+                else:
+                    llm_answer = agent.run(question, task_id, to_download)
+                    processed_answer = str(extract_final_answer(llm_answer))
+                    # time.sleep(10)
+            except Exception as e:
+                llm_answer = processed_answer = f"[Error] {e}"
+                # time.sleep(6)
+        results.append({
+            "question": question,
+            "llm_answer": llm_answer,
+            "processed_answer": processed_answer.strip(),
+            "real_answer": real_answer
+        })
+        print("REAL ANSWER:", real_answer)
+    # Save all results to file
+    with open("llm_eval.json", "w", encoding="utf-8") as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ duckduckgo-search
 langchain_community
 markdownify
 smolagents[litellm]
-smolagents[openai]

 langchain_community
 markdownify
 smolagents[litellm]
+smolagents[openai]
+openpyxl

tools.py CHANGED Viewed

@@ -479,7 +479,7 @@ class DuckDuckGoSearchTool(Tool):
     }
     output_type = "string"
-    def _configure(self, max_retries: int = 3, retry_sleep: int = 3):
         self._max_retries = max_retries
         self._retry_sleep = retry_sleep
@@ -529,6 +529,7 @@ class DuckDuckGoSearchTool(Tool):
                 ConversationLimitException,
             ) as e:
                 retries += 1
                 print(
                     f"⚠️ DuckDuckGo Exception (Attempt {retries}/{max_retries}): {type(e).__name__}: {e}"
                 )

     }
     output_type = "string"
+    def _configure(self, max_retries: int = 5, retry_sleep: int = 2):
         self._max_retries = max_retries
         self._retry_sleep = retry_sleep
                 ConversationLimitException,
             ) as e:
                 retries += 1
+                self._retry_sleep +=2
                 print(
                     f"⚠️ DuckDuckGo Exception (Attempt {retries}/{max_retries}): {type(e).__name__}: {e}"
                 )

utils.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import re
 def extract_final_answer(output: str) -> str:
@@ -34,3 +36,155 @@ def replace_tool_mentions(prompt: str) -> str:
     prompt = re.sub(r"(?<!\w)(?<!_)wiki\(", "wikipedia_search(", prompt)
     return prompt

 import re
+import json
+from typing import List, Union, Optional
 def extract_final_answer(output: str) -> str:
     prompt = re.sub(r"(?<!\w)(?<!_)wiki\(", "wikipedia_search(", prompt)
     return prompt
+def _question_matches(question: str, filters: Union[str, List[str]]) -> bool:
+    """Helper: check if question matches any string in filters."""
+    if isinstance(filters, str):
+        filters = [filters]
+    return any(f.lower() in question.lower() for f in filters)
+def load_online_qas(
+    qa_type: Union[str, List[str]] = "all",
+    has_file: Optional[bool] = None,
+    file_path = "Final_Assignment_Template/allqas.jsonl"
+) -> List[dict]:
+    """
+    Load online QAs from example_gaiaqa.json.
+    Parameters:
+    - qa_type: str or List[str], used to match substrings in the Question. Use "all" for no filtering.
+    - has_file: bool or None, filters QAs by presence of 'file_name':
+        - True: only include QAs with file_name
+        - False: only include QAs without file_name
+        - None: no file_name filtering
+    - file_path: a path
+    """
+    data = []
+    with open(file_path ,"r") as f:
+        for line in f:
+            entry = json.loads(line)
+            data.append(entry)
+    # Apply file presence filter
+    if has_file is True:
+        data = [qa for qa in data if qa.get("file_name", "").strip()]
+    elif has_file is False:
+        data = [qa for qa in data if not qa.get("file_name", "").strip()]
+    # Apply question content filter
+    if qa_type == "all":
+        return data
+    return [qa for qa in data if _question_matches(qa.get("Question", ""), qa_type)]
+def load_test_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]:
+    """Loads test QAs with no attached files. Optionally filters by topic keywords in questions."""
+    test_docs = []
+    with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f:
+        for line in f:
+            entry = json.loads(line)
+            if entry.get("file_name", "").strip() == "":
+                test_docs.append(entry)
+    if qa_type == "all":
+        return [
+            {
+                "Question": e["Question"],
+                "Final answer": e.get("Final answer"),
+                "task_id": e["task_id"],
+                "tools": e.get("Annotator Metadata", {}).get("Tools"),
+                "file_name": e.get("file_name", "")
+            }
+            for e in test_docs
+        ]
+    return [
+        {
+            "Question": e["Question"],
+            "Final answer": e.get("Final answer"),
+            "task_id": e["task_id"],
+            "tools": e.get("Annotator Metadata", {}).get("Tools"),
+            "file_name": e.get("file_name", "")
+        }
+        for e in test_docs
+        if _question_matches(e["Question"], qa_type)
+    ]
+def load_val_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]:
+    """Loads validation QAs with no attached files. Optionally filters by topic keywords in questions."""
+    val_docs = []
+    with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f:
+        for line in f:
+            entry = json.loads(line)
+            if entry.get("file_name", "").strip() == "":
+                val_docs.append(entry)
+    if qa_type == "all":
+        return [
+            {
+                "Question": e["Question"],
+                "Final answer": e.get("Final answer"),
+                "task_id": e["task_id"],
+                "tools": e.get("Annotator Metadata", {}).get("Tools"),
+                "file_name": e.get("file_name", "")
+            }
+            for e in val_docs
+        ]
+    return [
+        {
+            "Question": e["Question"],
+            "Final answer": e.get("Final answer"),
+            "task_id": e["task_id"],
+            "tools": e.get("Annotator Metadata", {}).get("Tools"),
+            "file_name": e.get("file_name", "")
+        }
+        for e in val_docs
+        if _question_matches(e["Question"], qa_type)
+    ]
+# import requests
+# import json
+# def fetch_and_save_questions(api_base_url: str, output_path: str):
+#     """
+#     Fetch all questions from the Agent Evaluation API and save them as JSONL.
+#     :param api_base_url: Base URL of the scoring API, e.g. "https://agents-course-unit4-scoring.hf.space"
+#     :param output_path:  Path to the output .jsonl file
+#     """
+#     endpoint = f"{api_base_url}/questions"
+#     try:
+#         resp = requests.get(endpoint, timeout=30)
+#         resp.raise_for_status()
+#         questions = resp.json()
+#     except Exception as e:
+#         print(f"❌ Failed to fetch questions: {e}")
+#         return
+#     try:
+#         with open(output_path, "w", encoding="utf-8") as fout:
+#             for q in questions:
+#                 fout.write(json.dumps(q, ensure_ascii=False) + "\n")
+#         print(f"✅ Saved {len(questions)} questions to {output_path}")
+#     except Exception as e:
+#         print(f"❌ Failed to write JSONL file: {e}")
+# API_BASE = "https://agents-course-unit4-scoring.hf.space"
+# OUTPUT_FILE = "questions.jsonl"
+# fetch_and_save_questions(API_BASE, OUTPUT_FILE)
+# dlf = DownloadFileFromTaskTool()
+# for res in results:
+#     res = dlf.forward(task_id = res["task_id"])
+#     print(res)
+# task_id = "cca530fc-4052-43b2-b130-b30968d8aa44"
+# file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+# response = requests.get(file_url, timeout=15)
+# print(response.content)
+# print(response.headers.get("content-type", "").lower())
+#print(response.headers)