update
Browse files- __pycache__/agent.cpython-39.pyc +0 -0
- agent.py +37 -5
- app copy.py +25 -19
- requirements.txt +2 -1
__pycache__/agent.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/agent.cpython-39.pyc and b/__pycache__/agent.cpython-39.pyc differ
|
|
|
agent.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from typing import TypedDict, List, Dict, Any, Optional, Union
|
| 3 |
from langchain_core import tools
|
| 4 |
from langgraph.graph import StateGraph, START, END
|
|
@@ -11,6 +12,7 @@ from dotenv import load_dotenv
|
|
| 11 |
from groq import Groq
|
| 12 |
from langchain_groq import ChatGroq
|
| 13 |
from langchain_community.document_loaders.image import UnstructuredImageLoader
|
|
|
|
| 14 |
import base64
|
| 15 |
try:
|
| 16 |
import cv2
|
|
@@ -65,7 +67,7 @@ def web_search(keywords: str) -> str:
|
|
| 65 |
@tool
|
| 66 |
def wiki_search(query: str) -> str:
|
| 67 |
"""
|
| 68 |
-
Search Wikipedia for a query and return
|
| 69 |
|
| 70 |
Use cases:
|
| 71 |
When the question requires the use of information from wikipedia
|
|
@@ -74,13 +76,17 @@ def wiki_search(query: str) -> str:
|
|
| 74 |
query: The search query
|
| 75 |
"""
|
| 76 |
|
| 77 |
-
search_docs = WikipediaLoader(query=query, load_max_docs=
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
formatted_search_docs = "\n\n---\n\n".join(
|
| 79 |
[
|
| 80 |
-
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("
|
| 81 |
for doc in search_docs
|
| 82 |
])
|
| 83 |
-
return
|
| 84 |
|
| 85 |
|
| 86 |
|
|
@@ -168,6 +174,22 @@ def analyze_video(video_path: str, question: str) -> str:
|
|
| 168 |
except Exception as e:
|
| 169 |
return f"Error analyzing video: {str(e)}"
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
system_prompt = """
|
| 173 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
|
@@ -210,16 +232,26 @@ def restart_required(state: AgentState) -> AgentState:
|
|
| 210 |
# return {"messages": messages + [response]}
|
| 211 |
|
| 212 |
# Augment the LLM with tools
|
| 213 |
-
tools = [web_search, wiki_search, analyze_image, analyze_video]
|
| 214 |
tools_by_name = {tool.name: tool for tool in tools}
|
| 215 |
model_with_tools = model.bind_tools(tools)
|
| 216 |
|
| 217 |
def answer_message(state: AgentState) -> AgentState:
|
| 218 |
messages = state["messages"]
|
|
|
|
|
|
|
| 219 |
prompt = [SystemMessage(f"""
|
| 220 |
You are a GAIA question answering expert.
|
| 221 |
Your task is to provide an answer to a question.
|
| 222 |
Think carefully before answering the question.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
Do not include any thought process before answering the question, and only response exactly what was being asked of you.
|
| 224 |
If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
|
| 225 |
If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.
|
|
|
|
| 1 |
import os
|
| 2 |
+
import datetime
|
| 3 |
from typing import TypedDict, List, Dict, Any, Optional, Union
|
| 4 |
from langchain_core import tools
|
| 5 |
from langgraph.graph import StateGraph, START, END
|
|
|
|
| 12 |
from groq import Groq
|
| 13 |
from langchain_groq import ChatGroq
|
| 14 |
from langchain_community.document_loaders.image import UnstructuredImageLoader
|
| 15 |
+
from langchain_community.document_loaders import WebBaseLoader
|
| 16 |
import base64
|
| 17 |
try:
|
| 18 |
import cv2
|
|
|
|
| 67 |
@tool
|
| 68 |
def wiki_search(query: str) -> str:
|
| 69 |
"""
|
| 70 |
+
Search Wikipedia for a query and return up to 3 results.
|
| 71 |
|
| 72 |
Use cases:
|
| 73 |
When the question requires the use of information from wikipedia
|
|
|
|
| 76 |
query: The search query
|
| 77 |
"""
|
| 78 |
|
| 79 |
+
search_docs = WikipediaLoader(query=query, load_max_docs=3, doc_content_chars_max=15000).load()
|
| 80 |
+
|
| 81 |
+
if not search_docs:
|
| 82 |
+
return "No Wikipedia results found."
|
| 83 |
+
|
| 84 |
formatted_search_docs = "\n\n---\n\n".join(
|
| 85 |
[
|
| 86 |
+
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("title", "Unknown Title")}"/>\n{doc.page_content}\n</Document>'
|
| 87 |
for doc in search_docs
|
| 88 |
])
|
| 89 |
+
return formatted_search_docs
|
| 90 |
|
| 91 |
|
| 92 |
|
|
|
|
| 174 |
except Exception as e:
|
| 175 |
return f"Error analyzing video: {str(e)}"
|
| 176 |
|
| 177 |
+
@tool
|
| 178 |
+
def read_url(url: str) -> str:
|
| 179 |
+
"""
|
| 180 |
+
Reads and extracts text from a specific webpage URL.
|
| 181 |
+
Use this if a web search snippet doesn't contain enough detail.
|
| 182 |
+
"""
|
| 183 |
+
try:
|
| 184 |
+
loader = WebBaseLoader(url)
|
| 185 |
+
docs = loader.load()
|
| 186 |
+
# Truncate to first 15000 characters to fit context
|
| 187 |
+
if not docs:
|
| 188 |
+
return "No content could be extracted from this URL."
|
| 189 |
+
return docs[0].page_content[:15000]
|
| 190 |
+
except Exception as e:
|
| 191 |
+
return f"Error reading URL: {e}"
|
| 192 |
+
|
| 193 |
|
| 194 |
system_prompt = """
|
| 195 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
|
|
|
| 232 |
# return {"messages": messages + [response]}
|
| 233 |
|
| 234 |
# Augment the LLM with tools
|
| 235 |
+
tools = [web_search, wiki_search, analyze_image, analyze_video, read_url]
|
| 236 |
tools_by_name = {tool.name: tool for tool in tools}
|
| 237 |
model_with_tools = model.bind_tools(tools)
|
| 238 |
|
| 239 |
def answer_message(state: AgentState) -> AgentState:
|
| 240 |
messages = state["messages"]
|
| 241 |
+
current_date = datetime.datetime.now().strftime("%Y-%m-%d")
|
| 242 |
+
|
| 243 |
prompt = [SystemMessage(f"""
|
| 244 |
You are a GAIA question answering expert.
|
| 245 |
Your task is to provide an answer to a question.
|
| 246 |
Think carefully before answering the question.
|
| 247 |
+
|
| 248 |
+
TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
|
| 249 |
+
|
| 250 |
+
CRITICAL RULES FOR SEARCH:
|
| 251 |
+
1. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
|
| 252 |
+
2. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
|
| 253 |
+
3. Cross-reference facts if they seem ambiguous.
|
| 254 |
+
|
| 255 |
Do not include any thought process before answering the question, and only response exactly what was being asked of you.
|
| 256 |
If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
|
| 257 |
If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.
|
app copy.py
CHANGED
|
@@ -28,43 +28,49 @@ class BasicAgent:
|
|
| 28 |
answer = result['messages'][-1].content
|
| 29 |
return answer
|
| 30 |
|
| 31 |
-
def file_extract(local_file_path,
|
| 32 |
-
if local_file_path:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
try:
|
| 34 |
-
# GAIA's file_path is relative to the dataset repo root.
|
| 35 |
-
# Download the file into the allowed cache and get its local path.
|
| 36 |
resolved_path = hf_hub_download(
|
| 37 |
repo_id="gaia-benchmark/GAIA",
|
| 38 |
-
filename=local_file_path
|
| 39 |
repo_type="dataset",
|
| 40 |
)
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
logger.warning(
|
| 48 |
-
f"Could not download file '{local_file_path}' for task_id {task_id}: {e}. "
|
| 49 |
-
"Mapping skipped."
|
| 50 |
-
)
|
| 51 |
|
| 52 |
agent = BasicAgent()
|
| 53 |
questions_url = f"{DEFAULT_API_URL}/questions"
|
| 54 |
response = requests.get(questions_url, timeout=15)
|
| 55 |
response.raise_for_status()
|
| 56 |
questions_data = response.json()
|
| 57 |
-
for item in questions_data[:
|
| 58 |
question_text = item.get("question")
|
| 59 |
if question_text is None:
|
| 60 |
continue
|
| 61 |
files_text = item.get("files")
|
| 62 |
task_id = item.get("task_id")
|
| 63 |
file_name = item.get("file_name")
|
|
|
|
| 64 |
if file_name:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
output = agent(question_text)
|
| 69 |
print("Q:", question_text)
|
| 70 |
print("A:", output)
|
|
|
|
| 28 |
answer = result['messages'][-1].content
|
| 29 |
return answer
|
| 30 |
|
| 31 |
+
def file_extract(local_file_path, task_id):
|
| 32 |
+
if not local_file_path:
|
| 33 |
+
return None
|
| 34 |
+
|
| 35 |
+
# GAIA files are usually placed in date-based subdirectories
|
| 36 |
+
prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]
|
| 37 |
+
|
| 38 |
+
for prefix in prefixes:
|
| 39 |
try:
|
|
|
|
|
|
|
| 40 |
resolved_path = hf_hub_download(
|
| 41 |
repo_id="gaia-benchmark/GAIA",
|
| 42 |
+
filename=f"{prefix}{local_file_path}",
|
| 43 |
repo_type="dataset",
|
| 44 |
)
|
| 45 |
+
return resolved_path
|
| 46 |
+
except Exception:
|
| 47 |
+
continue
|
| 48 |
+
|
| 49 |
+
logger.warning(f"Could not download file '{local_file_path}' for task_id {task_id}")
|
| 50 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
agent = BasicAgent()
|
| 53 |
questions_url = f"{DEFAULT_API_URL}/questions"
|
| 54 |
response = requests.get(questions_url, timeout=15)
|
| 55 |
response.raise_for_status()
|
| 56 |
questions_data = response.json()
|
| 57 |
+
for item in questions_data[3:4]:
|
| 58 |
question_text = item.get("question")
|
| 59 |
if question_text is None:
|
| 60 |
continue
|
| 61 |
files_text = item.get("files")
|
| 62 |
task_id = item.get("task_id")
|
| 63 |
file_name = item.get("file_name")
|
| 64 |
+
|
| 65 |
if file_name:
|
| 66 |
+
# Actually download the file to local cache and get absolute path
|
| 67 |
+
resolved_path = file_extract(file_name, task_id)
|
| 68 |
+
if resolved_path:
|
| 69 |
+
question_text += f"\n\n[Attached File Local Path: {resolved_path}]"
|
| 70 |
+
else:
|
| 71 |
+
question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
|
| 72 |
+
|
| 73 |
+
print(files_text, task_id)
|
| 74 |
output = agent(question_text)
|
| 75 |
print("Q:", question_text)
|
| 76 |
print("A:", output)
|
requirements.txt
CHANGED
|
@@ -21,4 +21,5 @@ numpy
|
|
| 21 |
ddgs
|
| 22 |
groq
|
| 23 |
unstructured[all-docs]
|
| 24 |
-
opencv-python
|
|
|
|
|
|
| 21 |
ddgs
|
| 22 |
groq
|
| 23 |
unstructured[all-docs]
|
| 24 |
+
opencv-python
|
| 25 |
+
beautifulsoup4
|