Final_Assignment_Template_CURR

Sleeping

App Files Files Community

mdicio commited on May 18, 2025

Commit

b922347

1 Parent(s): b1b39b4

try qwen

Browse files

Files changed (1) hide show

agent.py +150 -90

agent.py CHANGED Viewed

@@ -95,6 +95,7 @@ class BoomBot:
             )
         elif self.provider == "meta":
             meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
             # return OpenAIServerModel(
             #     model_id=meta_model,
             #     api_base="https://api.deepinfra.com/v1/openai",
@@ -147,29 +148,24 @@ class BoomBot:
             download_file,
             read_file_content,
             visit_webpage,
-            transcribe_video,
             transcribe_audio,
             get_wikipedia_info,
             arxiv_search,
             add_doc_vectorstore,
             retrieve_doc_vectorstore,
-            image_question_answering,
             python_interpreter,
             final_answer,
         ]
         # Additional imports for the Python interpreter
         additional_imports = [
             "json",
             "os",
             "glob",
             "pathlib",
-            "pandas",
-            "numpy",
-            "matplotlib",
-            "seaborn",
-            "sklearn",
-            "tqdm",
             "argparse",
             "pickle",
             "io",
@@ -182,8 +178,20 @@ class BoomBot:
             "zipfile",
             "itertools",
             "functools",
-            "open",
-            "requests"
         ]
         # Create the agent
@@ -211,64 +219,68 @@ class BoomBot:
         """
         return """
         YOUR BEHAVIOR GUIDELINES:
-          • Do NOT make unfounded assumptions—always ground answers in reliable sources or search results.
-          • For math or puzzles: break the problem into code/math, then solve programmatically.
         RESEARCH WORKFLOW:
-          1. SEARCH
-             - Try web_search, wikipedia_search, or arxiv_search first.
-             - Refine your query rather than repeating the exact same terms.
-             - If one search tool yields insufficient info, switch to another before downloading.
-          2. VISIT
-             - Use visit_webpage to extract and read page content when a promising link appears after one of the SEARCH tools.
-             - For each visited link, also download the file and add to the vector store, you might need to query this later, especially if you have a lot of search results.
-          3. EVALUATE
-             - ✅ If the page or search snippet fully answers the question, respond immediately.
-             - ❌ If not, move on to deeper investigation.
-          4. DOWNLOAD
-             - Use download_file_from_link tool on relevant links found (yes you can download webpages as html).
-             - For arXiv papers, target the /pdf/ or DOI link (e.g https://arxiv.org/pdf/2011.10672).
-          5. INDEX & QUERY
-             - Add downloaded documents to the vector store with add_document_to_vector_store.
-             - Use query_downloaded_documents for detailed answers.
-          6. READ
-             - You have access to a read_file_content tool to read most types of files (html, pdf, text).
-             - You can also directly interact with downloaded files (csv, excel) in your python code.
-             - Use query_downloaded_documents if you have added docs to vector store.
         FALLBACK & ADAPTATION:
-          • If a tool fails, reformulate your query or try a different search method before dropping to download.
-          • If a tool fails multiple times, try a different tool.
-          • For arXiv: you might discover a paper link via web_search tool and then directly use download_file_from_link tool
-        COMMON TOOL CHAINS (conceptual outlines):
-        These are just guidelines, each task might require a unique workflow.
-        A tool can provide useful information for the task, it will not always contain the answer. You need to work to get to a final_answer that makes sense.
-          • FACTUAL Qs:
-              web_search → final_answer
-          • CURRENT EVENTS:
-              To have some summary information use web_search, that might output a promising website to visit and read content from using (visit_webpage or download_file_from_link and read_file_content)
-              web_search → visit_webpage → final_answer
-          • DOCUMENT-BASED Qs:
-              web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
-          • ARXIV PAPERS:
-              The arxiv search tool provides a list of results with summary content, to inspect the whole paper you need to download it with download_file_from_link tool.
-              arxiv_search → download_file_from_link → read_file_content
-              If that fails
-              arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents
-          • MEDIA ANALYSIS:
-              download_file_from_link → transcribe_video/transcribe_audio/describe_image → final_answer
         FINAL ANSWER FORMAT:
-        ** Do not name any python variables final_answer, this causes problems with tools.
-          - Begin with "FINAL ANSWER: "
-          - Number → digits only (e.g., 42)  no units unless specified
-          - String → exact text (e.g., Pope Francis) without quotation marks
-          - List → comma-separated, one space, no brackets unless specified(e.g., 2, 3, 4)
-          - Conclude with: FINAL ANSWER: <your_answer>
         """
     def run(self, question: str, task_id: str, to_download) -> str:
         """
         Run the agent with the given question, task_id, and download flag.
@@ -307,53 +319,101 @@ class BoomBot:
 if __name__ == "__main__":
     import time
-    from utils import load_online_qas, extract_final_answer
     import requests
-    import json
     agent = BoomBot(provider="gemma")
-    file_online = load_online_qas(file_path = r"../../Final_Assignment_Template/allqas.jsonl", has_file=True)
-    results = []
     excluded_keywords = ["youtube", "video", "chess"]
     for entry in file_online:
-        task_id = entry["task_id"]
-        question = entry["Question"]
         real_answer = entry["Final answer"]
-        file_name = entry.get("file_name", "")
-        to_download = file_name != ""
-        link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-        # Check exclusion and file availability
         if any(kw in question.lower() for kw in excluded_keywords):
-            llm_answer = "NOT ATTEMPTED"
-            processed_answer = llm_answer
         else:
             try:
-                response = requests.get(link)
-                if response.status_code != 200:
-                    llm_answer = "NOT ATTEMPTED"
-                    processed_answer = llm_answer
                 else:
                     llm_answer = agent.run(question, task_id, to_download)
-                    processed_answer = str(extract_final_answer(llm_answer))
                     # time.sleep(10)
             except Exception as e:
-                llm_answer = processed_answer = f"[Error] {e}"
                 # time.sleep(6)
-        results.append({
-            "question": question,
-            "llm_answer": llm_answer,
-            "processed_answer": processed_answer.strip(),
-            "real_answer": real_answer
         })
         print("REAL ANSWER:", real_answer)
-    # Save all results to file
-    with open("llm_eval.json", "w", encoding="utf-8") as f:
-        json.dump(results, f, indent=2, ensure_ascii=False)

             )
         elif self.provider == "meta":
             meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
+            meta_model = "Qwen/Qwen2.5-72B-Instruct"
             # return OpenAIServerModel(
             #     model_id=meta_model,
             #     api_base="https://api.deepinfra.com/v1/openai",
             download_file,
             read_file_content,
             visit_webpage,
+            # transcribe_video,
             transcribe_audio,
             get_wikipedia_info,
             arxiv_search,
             add_doc_vectorstore,
             retrieve_doc_vectorstore,
+            # image_question_answering,
             python_interpreter,
             final_answer,
         ]
         # Additional imports for the Python interpreter
         additional_imports = [
+            # Built-in / core Python
             "json",
             "os",
             "glob",
             "pathlib",
             "argparse",
             "pickle",
             "io",
             "zipfile",
             "itertools",
             "functools",
+            "requests",
+            "bs4",
+            # Data handling
+            "pandas",
+            "numpy",
+            "dask",        # For handling large datasets
+            "polars",      # Fast DataFrame alternative
+            "pyarrow",     # For Arrow/Parquet file formats
+            "h5py",        # For HDF5 files
+            "openpyxl",    # Excel reading/writing
+            "yaml",        # Config file parsing
+            # Basic plotting
+            "matplotlib",
+            "seaborn"
         ]
         # Create the agent
         """
         return """
         YOUR BEHAVIOR GUIDELINES:
+        • Do NOT make unfounded assumptions—always ground answers in reliable sources or search results.
+        • For math or puzzles: break the problem into code/math, then solve programmatically.
         RESEARCH WORKFLOW:
+        1. SEARCH
+            - Begin with web_search, wikipedia_search, or arxiv_search.
+            - Refine your query if results are weak—don't just retry the same terms.
+            - If one search tool yields little, try another before moving on to downloads.
+        2. VISIT
+            - Use visit_webpage to preview content from promising links.
+            - If the content is long, complex, spans multiple pages, or may be needed later, do NOT rely solely on visit_webpage.
+            - Move quickly to downloading: avoid repeated visits when the content should be archived.
+        3. DOWNLOAD (MANDATORY IF CONTENT IS LONG, DENSE, OR CRUCIAL)
+            - Use download_file_from_link on all valuable resources (including html pages or pdfs).
+            - Especially when a page is detailed, technical, or multi-part, downloading is preferred.
+            - You can (and should) download webpages as HTML. Do this whenever the site might be referenced again later.
+        4. INDEX & QUERY
+            - Immediately add downloaded files to the vector store using add_document_to_vector_store.
+            - For complex tasks or unclear answers, prefer querying vector store over re-visiting pages.
+            - If you've downloaded a file, **always index it unless clearly irrelevant.**
+        5. READ
+            - Use read_file_content to analyze file contents (html, pdf, text).
+            - You can also use query_downloaded_documents for deeper understanding.
+        6. EVALUATE
+            - ✅ If the answer is clear from current sources, respond.
+            - ❌ If not, continue iterating and analyzing downloaded material.
         FALLBACK & ADAPTATION:
+        • If a tool fails, reformulate or switch tools.
+        • For arXiv: web_search might help you find the paper; follow with direct download of the PDF via download_file_from_link.
+        MANDATORY DOWNLOAD & INDEX WHEN:
+        • The page is lengthy or technical (e.g., research papers, government sites, legal docs, blog posts with code).
+        • You suspect you'll need to return to the content.
+        • You are working on multi-hop reasoning or long-term memory tasks.
+        COMMON TOOL CHAINS:
+        • FACTUAL Qs:
+            web_search → final_answer
+        • CURRENT EVENTS:
+            web_search → visit_webpage → (download + index if needed) → final_answer
+        • DOCUMENT-BASED Qs:
+            web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
+        • ARXIV PAPERS:
+            arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
+        • MEDIA ANALYSIS:
+            download_file_from_link → transcribe_audio → final_answer
         FINAL ANSWER FORMAT:
+        - Begin with "FINAL ANSWER: "
+        - Number → digits only (e.g., 42)
+        - String → exact text (e.g., Pope Francis) without quotation marks
+        - List → comma-separated, no brackets unless specified (e.g., 2, 3, 4)
+        - End with: FINAL ANSWER: <your_answer>
         """
     def run(self, question: str, task_id: str, to_download) -> str:
         """
         Run the agent with the given question, task_id, and download flag.
 if __name__ == "__main__":
+    import os
+    import csv
     import time
     import requests
+    from utils import load_online_qas, extract_final_answer
+    CSV_FILE = "evals/llm_eval.csv"
+    FIELDNAMES = ["model", "task_id", "question", "llm_answer", "processed_answer", "real_answer"]
+    def ensure_csv():
+        """Create the CSV file with header if it doesn't exist."""
+        if not os.path.isfile(CSV_FILE):
+            with open(CSV_FILE, mode="w", newline="", encoding="utf-8") as f:
+                writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
+                writer.writeheader()
+    def append_results(rows):
+        """Append a list of dict rows to the CSV."""
+        with open(CSV_FILE, mode="a", newline="", encoding="utf-8") as f:
+            writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
+            for row in rows:
+                writer.writerow(row)
     agent = BoomBot(provider="gemma")
+    model_name = agent.provider  # e.g. "gemma"
+    file_online   = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=True)
+    nofile_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=False)
     excluded_keywords = ["youtube", "video", "chess"]
+    rows_to_append = []
+    # 1) With downloadable files
     for entry in file_online:
+        task_id     = entry["task_id"]
+        question    = entry["Question"]
         real_answer = entry["Final answer"]
+        file_name   = entry.get("file_name", "")
+        to_download = bool(file_name)
+        link        = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
         if any(kw in question.lower() for kw in excluded_keywords):
+            llm_answer = processed = "NOT ATTEMPTED"
         else:
             try:
+                resp = requests.get(link)
+                if resp.status_code != 200:
+                    llm_answer = processed = "NOT ATTEMPTED"
                 else:
                     llm_answer = agent.run(question, task_id, to_download)
+                    processed = extract_final_answer(llm_answer).strip()
                     # time.sleep(10)
             except Exception as e:
+                llm_answer = processed = f"[Error] {e}"
                 # time.sleep(6)
+        rows_to_append.append({
+            "model":            model_name,
+            "task_id":          task_id,
+            "question":         question,
+            "llm_answer":       llm_answer,
+            "processed_answer": processed,
+            "real_answer":      real_answer,
         })
+        print("REAL ANSWER:", real_answer)
+    # 2) Without downloadable files
+    for entry in nofile_online:
+        task_id     = entry["task_id"]
+        question    = entry["Question"]
+        real_answer = entry["Final answer"]
+        if any(kw in question.lower() for kw in excluded_keywords):
+            llm_answer = processed = "NOT ATTEMPTED"
+        else:
+            try:
+                llm_answer = agent.run(question, task_id, to_download=False)
+                processed = extract_final_answer(llm_answer).strip()
+                # time.sleep(10)
+            except Exception as e:
+                llm_answer = processed = f"[Error] {e}"
+                # time.sleep(6)
+        rows_to_append.append({
+            "model":            model_name,
+            "task_id":          task_id,
+            "question":         question,
+            "llm_answer":       llm_answer,
+            "processed_answer": processed,
+            "real_answer":      real_answer,
+        })
         print("REAL ANSWER:", real_answer)
+    # ensure CSV exists and append
+    ensure_csv()
+    append_results(rows_to_append)
+    print(f"✅ Appended {len(rows_to_append)} rows to {CSV_FILE}")