Final_Assignment_Template

Running

App Files Files Community

Paperbag commited on Mar 20

Commit

717c736

1 Parent(s): a5ab16b

update

Browse files

Files changed (4) hide show

__pycache__/agent.cpython-39.pyc +0 -0
agent.py +37 -5
app copy.py +25 -19
requirements.txt +2 -1

__pycache__/agent.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/agent.cpython-39.pyc and b/__pycache__/agent.cpython-39.pyc differ

agent.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from typing import TypedDict, List, Dict, Any, Optional, Union
 from langchain_core import tools
 from langgraph.graph import StateGraph, START, END
@@ -11,6 +12,7 @@ from dotenv import load_dotenv
 from groq import Groq
 from langchain_groq import ChatGroq
 from langchain_community.document_loaders.image import UnstructuredImageLoader
 import base64
 try:
     import cv2
@@ -65,7 +67,7 @@ def web_search(keywords: str) -> str:
 @tool
 def wiki_search(query: str) -> str:
     """
-    Search Wikipedia for a query and return a maximum of 2 results
     Use cases:
     When the question requires the use of information from wikipedia
@@ -74,13 +76,17 @@ def wiki_search(query: str) -> str:
     query: The search query
     """
-    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
     formatted_search_docs = "\n\n---\n\n".join(
         [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
         ])
-    return {"wiki_results": formatted_search_docs}
@@ -168,6 +174,22 @@ def analyze_video(video_path: str, question: str) -> str:
     except Exception as e:
         return f"Error analyzing video: {str(e)}"
 system_prompt = """
 You are a helpful assistant tasked with answering questions using a set of tools.
@@ -210,16 +232,26 @@ def restart_required(state: AgentState) -> AgentState:
 #     return {"messages": messages + [response]}
 # Augment the LLM with tools
-tools = [web_search, wiki_search, analyze_image, analyze_video]
 tools_by_name = {tool.name: tool for tool in tools}
 model_with_tools = model.bind_tools(tools)
 def answer_message(state: AgentState) -> AgentState:
     messages = state["messages"]
     prompt = [SystemMessage(f"""
     You are a GAIA question answering expert.
     Your task is to provide an answer to a question.
     Think carefully before answering the question.
     Do not include any thought process before answering the question, and only response exactly what was being asked of you.
     If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
     If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.

 import os
+import datetime
 from typing import TypedDict, List, Dict, Any, Optional, Union
 from langchain_core import tools
 from langgraph.graph import StateGraph, START, END
 from groq import Groq
 from langchain_groq import ChatGroq
 from langchain_community.document_loaders.image import UnstructuredImageLoader
+from langchain_community.document_loaders import WebBaseLoader
 import base64
 try:
     import cv2
 @tool
 def wiki_search(query: str) -> str:
     """
+    Search Wikipedia for a query and return up to 3 results.
     Use cases:
     When the question requires the use of information from wikipedia
     query: The search query
     """
+    search_docs = WikipediaLoader(query=query, load_max_docs=3, doc_content_chars_max=15000).load()
+    if not search_docs:
+        return "No Wikipedia results found."
     formatted_search_docs = "\n\n---\n\n".join(
         [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("title", "Unknown Title")}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
         ])
+    return formatted_search_docs
     except Exception as e:
         return f"Error analyzing video: {str(e)}"
+@tool
+def read_url(url: str) -> str:
+    """
+    Reads and extracts text from a specific webpage URL.
+    Use this if a web search snippet doesn't contain enough detail.
+    """
+    try:
+        loader = WebBaseLoader(url)
+        docs = loader.load()
+        # Truncate to first 15000 characters to fit context
+        if not docs:
+            return "No content could be extracted from this URL."
+        return docs[0].page_content[:15000]
+    except Exception as e:
+        return f"Error reading URL: {e}"
 system_prompt = """
 You are a helpful assistant tasked with answering questions using a set of tools.
 #     return {"messages": messages + [response]}
 # Augment the LLM with tools
+tools = [web_search, wiki_search, analyze_image, analyze_video, read_url]
 tools_by_name = {tool.name: tool for tool in tools}
 model_with_tools = model.bind_tools(tools)
 def answer_message(state: AgentState) -> AgentState:
     messages = state["messages"]
+    current_date = datetime.datetime.now().strftime("%Y-%m-%d")
     prompt = [SystemMessage(f"""
     You are a GAIA question answering expert.
     Your task is to provide an answer to a question.
     Think carefully before answering the question.
+    TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
+    CRITICAL RULES FOR SEARCH:
+    1. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
+    2. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
+    3. Cross-reference facts if they seem ambiguous.
     Do not include any thought process before answering the question, and only response exactly what was being asked of you.
     If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
     If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.

app copy.py CHANGED Viewed

@@ -28,43 +28,49 @@ class BasicAgent:
         answer = result['messages'][-1].content
         return answer
-def file_extract(local_file_path,task_file_paths):
-    if local_file_path:
         try:
-            # GAIA's file_path is relative to the dataset repo root.
-            # Download the file into the allowed cache and get its local path.
             resolved_path = hf_hub_download(
                 repo_id="gaia-benchmark/GAIA",
-                filename=local_file_path,  # e.g. "2023/test/<attachment-id>.pdf"
                 repo_type="dataset",
             )
-            task_file_paths[str(task_id)] = resolved_path
-            logger.debug(
-                f"Stored file path mapping for task_id {task_id}: {resolved_path}"
-            )
-        except Exception as e:
-            logger.warning(
-                f"Could not download file '{local_file_path}' for task_id {task_id}: {e}. "
-                "Mapping skipped."
-            )
 agent = BasicAgent()
 questions_url = f"{DEFAULT_API_URL}/questions"
 response = requests.get(questions_url, timeout=15)
 response.raise_for_status()
 questions_data = response.json()
-for item in questions_data[:5]:
     question_text = item.get("question")
     if question_text is None:
         continue
     files_text = item.get("files")
     task_id = item.get("task_id")
     file_name = item.get("file_name")
     if file_name:
-        question_text += f"\n\n[Attached File: {file_name}]"
-    # file = file_extract(,task_id)
-    print(files_text,task_id)
     output = agent(question_text)
     print("Q:", question_text)
     print("A:", output)

         answer = result['messages'][-1].content
         return answer
+def file_extract(local_file_path, task_id):
+    if not local_file_path:
+        return None
+    # GAIA files are usually placed in date-based subdirectories
+    prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]
+    for prefix in prefixes:
         try:
             resolved_path = hf_hub_download(
                 repo_id="gaia-benchmark/GAIA",
+                filename=f"{prefix}{local_file_path}",
                 repo_type="dataset",
             )
+            return resolved_path
+        except Exception:
+            continue
+    logger.warning(f"Could not download file '{local_file_path}' for task_id {task_id}")
+    return None
 agent = BasicAgent()
 questions_url = f"{DEFAULT_API_URL}/questions"
 response = requests.get(questions_url, timeout=15)
 response.raise_for_status()
 questions_data = response.json()
+for item in questions_data[3:4]:
     question_text = item.get("question")
     if question_text is None:
         continue
     files_text = item.get("files")
     task_id = item.get("task_id")
     file_name = item.get("file_name")
     if file_name:
+        # Actually download the file to local cache and get absolute path
+        resolved_path = file_extract(file_name, task_id)
+        if resolved_path:
+            question_text += f"\n\n[Attached File Local Path: {resolved_path}]"
+        else:
+            question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
+    print(files_text, task_id)
     output = agent(question_text)
     print("Q:", question_text)
     print("A:", output)

requirements.txt CHANGED Viewed

@@ -21,4 +21,5 @@ numpy
 ddgs
 groq
 unstructured[all-docs]
-opencv-python

 ddgs
 groq
 unstructured[all-docs]
+opencv-python
+beautifulsoup4