Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -340,45 +340,494 @@ You have access to the following tools to gather information and perform actions
|
|
| 340 |
6. If more information or steps are needed, use another tool (step 4) or continue reasoning based on the gathered information. Pay close attention to previous tool results.
|
| 341 |
7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
|
| 342 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
print("Initializing LLM Endpoint...")
|
| 346 |
llm = HuggingFaceEndpoint(
|
| 347 |
-
repo_id="mistralai/Mistral-7B-Instruct-v0.2", #
|
| 348 |
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
| 349 |
-
max_new_tokens=2048,
|
| 350 |
-
temperature=0.01,
|
| 351 |
-
# stop_sequences=["\nObservation:", "\nTool Result:", "\n```"] # Help prevent hallucinating tool calls/results
|
| 352 |
)
|
| 353 |
chat_llm = ChatHuggingFace(llm=llm)
|
| 354 |
print("✅ LLM Endpoint initialized.")
|
| 355 |
|
| 356 |
# 6. Bind tools to the LLM
|
| 357 |
-
#
|
| 358 |
self.llm_with_tools = chat_llm.bind_tools(self.tools)
|
| 359 |
print("✅ Tools bound to LLM.")
|
| 360 |
|
| 361 |
-
# 7. Define the Agent Node
|
|
|
|
| 362 |
def agent_node(state: AgentState):
|
| 363 |
print("--- Running Agent Node ---")
|
| 364 |
-
messages_with_prompt = state["messages"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
if ai_message.tool_calls:
|
| 371 |
-
print(f"AI Message contains tool calls: {ai_message.tool_calls}")
|
| 372 |
-
elif
|
| 373 |
-
|
| 374 |
-
print(f"AI Message appears to contain raw tool call JSON.")
|
| 375 |
else:
|
| 376 |
-
print(f"AI Message Interpreted Content: {ai_message.pretty_repr()}")
|
| 377 |
|
| 378 |
return {"messages": [ai_message]}
|
|
|
|
| 379 |
|
| 380 |
# 8. Define the Tool Node
|
| 381 |
-
# This uses the list of tool methods directly
|
| 382 |
tool_node = ToolNode(self.tools)
|
| 383 |
|
| 384 |
# 9. Create the Graph
|
|
@@ -389,13 +838,13 @@ You have access to the following tools to gather information and perform actions
|
|
| 389 |
graph_builder.add_edge(START, "agent")
|
| 390 |
graph_builder.add_conditional_edges(
|
| 391 |
"agent",
|
| 392 |
-
tools_condition, # This checks
|
| 393 |
{
|
| 394 |
-
"tools": "tools",
|
| 395 |
-
"__end__": "__end__",
|
| 396 |
},
|
| 397 |
)
|
| 398 |
-
graph_builder.add_edge("tools", "agent")
|
| 399 |
|
| 400 |
# 10. Compile the graph and store it
|
| 401 |
self.graph = graph_builder.compile()
|
|
@@ -421,47 +870,65 @@ You have access to the following tools to gather information and perform actions
|
|
| 421 |
|
| 422 |
# Keep track of the latest AI response that isn't a tool call
|
| 423 |
if isinstance(last_message, AIMessage):
|
| 424 |
-
# Check if it has tool calls
|
| 425 |
-
|
| 426 |
-
|
|
|
|
|
|
|
| 427 |
print(f"Potential Final AI Response: {last_message.content[:500]}...")
|
| 428 |
final_answer_content = last_message.content
|
|
|
|
|
|
|
|
|
|
| 429 |
else:
|
| 430 |
-
print(f"Non-string AI message content: {last_message.content}")
|
| 431 |
|
| 432 |
elif isinstance(last_message, ToolMessage):
|
| 433 |
print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
|
|
|
|
|
|
|
| 434 |
|
| 435 |
# --- Add the cleaning step ---
|
| 436 |
cleaned_answer = final_answer_content.strip()
|
| 437 |
|
|
|
|
|
|
|
| 438 |
prefixes_to_remove = [
|
| 439 |
"The answer is:", "Here is the answer:", "Based on the information:",
|
| 440 |
"Final Answer:", "Answer:"
|
| 441 |
]
|
|
|
|
|
|
|
| 442 |
for prefix in prefixes_to_remove:
|
| 443 |
-
# Case-insensitive check
|
| 444 |
if cleaned_answer.lower().startswith(prefix.lower()):
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
if not looks_like_code:
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
|
| 455 |
-
cleaned_answer = rest.strip()
|
| 456 |
-
elif cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
|
| 457 |
cleaned_answer = cleaned_answer[1:-1].strip()
|
| 458 |
|
| 459 |
-
print(f"Agent returning final answer (cleaned): {cleaned_answer}")
|
| 460 |
-
if not cleaned_answer:
|
|
|
|
| 461 |
print("Warning: Agent produced an empty final answer after cleaning. Falling back to raw answer.")
|
| 462 |
-
return final_answer_content.strip() # Fallback
|
| 463 |
|
| 464 |
-
|
|
|
|
| 465 |
|
| 466 |
except Exception as e:
|
| 467 |
print(f"Error running agent graph: {e}")
|
|
@@ -471,17 +938,13 @@ You have access to the following tools to gather information and perform actions
|
|
| 471 |
|
| 472 |
|
| 473 |
# --- (Original Template Code Starts Here - NO CHANGES NEEDED BELOW THIS LINE) ---
|
| 474 |
-
# ... (run_and_submit_all function, Gradio interface, __main__ block) ...
|
| 475 |
-
# Note: Ensure the 'run_and_submit_all' function correctly instantiates 'BasicAgent()'
|
| 476 |
-
# The rest of the template code should remain the same.
|
| 477 |
|
| 478 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 479 |
"""
|
| 480 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 481 |
and displays the results.
|
| 482 |
"""
|
| 483 |
-
|
| 484 |
-
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 485 |
if profile:
|
| 486 |
username= f"{profile.username}"
|
| 487 |
print(f"User logged in: {username}")
|
|
@@ -493,11 +956,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 493 |
questions_url = f"{api_url}/questions"
|
| 494 |
submit_url = f"{api_url}/submit"
|
| 495 |
|
| 496 |
-
|
| 497 |
-
print("Instantiating agent...") # Changed log message slightly
|
| 498 |
try:
|
| 499 |
agent = BasicAgent()
|
| 500 |
-
# Check for ASR pipeline status after init
|
| 501 |
if agent.asr_pipeline is None:
|
| 502 |
print("⚠️ ASR Pipeline failed to load during agent init. Audio questions will likely fail.")
|
| 503 |
|
|
@@ -506,16 +967,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 506 |
import traceback
|
| 507 |
traceback.print_exc() # Print full traceback for init errors
|
| 508 |
return f"Error initializing agent: {e}", None
|
| 509 |
-
print("Agent instantiated successfully.")
|
| 510 |
|
| 511 |
-
|
| 512 |
-
agent_code = f"[https://huggingface.co/spaces/](https://huggingface.co/spaces/){space_id}/tree/main"
|
| 513 |
print(f"Agent code URL: {agent_code}")
|
| 514 |
|
| 515 |
-
# 2. Fetch Questions
|
| 516 |
print(f"Fetching questions from: {questions_url}")
|
| 517 |
try:
|
| 518 |
-
response = requests.get(questions_url, timeout=30)
|
| 519 |
response.raise_for_status()
|
| 520 |
questions_data = response.json()
|
| 521 |
if not questions_data:
|
|
@@ -533,7 +992,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 533 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 534 |
return f"An unexpected error occurred fetching questions: {e}", None
|
| 535 |
|
| 536 |
-
# 3. Run your Agent
|
| 537 |
results_log = []
|
| 538 |
answers_payload = []
|
| 539 |
total_questions = len(questions_data)
|
|
@@ -554,20 +1012,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 554 |
|
| 555 |
print(f"\n--- Running Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
|
| 556 |
try:
|
| 557 |
-
# Add file paths to the question context if present
|
| 558 |
-
# GAIA often includes files like images, audio, excel
|
| 559 |
file_path = item.get("file_path")
|
| 560 |
if file_path:
|
| 561 |
-
#
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
else:
|
| 566 |
-
|
| 567 |
-
if os.path.exists(file_path):
|
| 568 |
-
file_context = f"[Attached File (exists in cwd): {file_path}]"
|
| 569 |
-
else:
|
| 570 |
-
file_context = f"[Attached File (path provided): {file_path}]" # Agent needs to handle finding it
|
| 571 |
|
| 572 |
question_text_with_context = f"{question_text}\n\n{file_context}"
|
| 573 |
print(f"Question includes file reference: {file_path}")
|
|
@@ -651,7 +1108,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 651 |
|
| 652 |
# --- Build Gradio Interface using Blocks ---
|
| 653 |
with gr.Blocks() as demo:
|
| 654 |
-
gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph +
|
| 655 |
gr.Markdown(
|
| 656 |
"""
|
| 657 |
**Instructions:**
|
|
@@ -660,7 +1117,7 @@ with gr.Blocks() as demo:
|
|
| 660 |
---
|
| 661 |
**Notes:**
|
| 662 |
* The full evaluation can take **several hours**. Use the logs tab to monitor progress.
|
| 663 |
-
* This agent uses `
|
| 664 |
* Make sure your `HUGGINGFACEHUB_API_TOKEN` secret is set correctly in Settings.
|
| 665 |
"""
|
| 666 |
)
|
|
@@ -679,7 +1136,7 @@ if __name__ == "__main__":
|
|
| 679 |
|
| 680 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 681 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 682 |
-
space_id_startup = os.getenv("SPACE_ID")
|
| 683 |
|
| 684 |
if space_host_startup:
|
| 685 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
@@ -687,21 +1144,24 @@ if __name__ == "__main__":
|
|
| 687 |
else:
|
| 688 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 689 |
|
| 690 |
-
if space_id_startup:
|
| 691 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 692 |
-
print(f" Repo URL:
|
| 693 |
-
print(f" Repo Tree URL:
|
| 694 |
else:
|
| 695 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 696 |
|
| 697 |
# Add detailed path info for debugging file access
|
| 698 |
-
print(f"Script directory (__file__): {os.path.dirname(os.path.
|
| 699 |
print(f"Current working directory (os.getcwd()): {os.getcwd()}")
|
| 700 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
|
| 702 |
|
| 703 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 704 |
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
| 705 |
# Set queue=True to handle multiple clicks better, though only one run should happen at a time.
|
| 706 |
demo.queue().launch(debug=True, share=False)
|
| 707 |
-
|
|
|
|
| 340 |
6. If more information or steps are needed, use another tool (step 4) or continue reasoning based on the gathered information. Pay close attention to previous tool results.
|
| 341 |
7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
|
| 342 |
"""
|
| 343 |
+
import os
|
| 344 |
+
import gradio as gr
|
| 345 |
+
import requests
|
| 346 |
+
import inspect
|
| 347 |
+
import pandas as pd
|
| 348 |
+
import io
|
| 349 |
+
import contextlib
|
| 350 |
+
from typing import TypedDict, Annotated
|
| 351 |
+
import torch
|
| 352 |
+
import json # For robust tool call parsing/generation if needed
|
| 353 |
+
import re # For finding JSON
|
| 354 |
+
import uuid # For generating tool call IDs
|
| 355 |
+
|
| 356 |
+
# --- Multimodal & Web Tool Imports ---
|
| 357 |
+
from transformers import pipeline
|
| 358 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 359 |
+
import requests
|
| 360 |
+
from bs4 import BeautifulSoup
|
| 361 |
+
|
| 362 |
+
# --- LangChain & LangGraph Imports ---
|
| 363 |
+
from langgraph.graph.message import add_messages
|
| 364 |
+
# Make sure to import ToolCall
|
| 365 |
+
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
|
| 366 |
+
from langgraph.prebuilt import ToolNode
|
| 367 |
+
from langgraph.graph import START, StateGraph
|
| 368 |
+
from langgraph.prebuilt import tools_condition
|
| 369 |
+
from langchain_huggingface import ChatHuggingFace
|
| 370 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
| 371 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
| 372 |
+
from langchain_core.tools import tool, BaseTool
|
| 373 |
+
|
| 374 |
+
# (Keep Constants as is)
|
| 375 |
+
# --- Constants ---
|
| 376 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 377 |
+
|
| 378 |
+
# --- LangGraph Agent State ---
|
| 379 |
+
class AgentState(TypedDict):
|
| 380 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
# --- Basic Agent Definition ---
|
| 384 |
+
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 385 |
+
class BasicAgent:
|
| 386 |
+
|
| 387 |
+
# --- Tool Definitions as Methods ---
|
| 388 |
+
# By making tools methods, they can access self.asr_pipeline
|
| 389 |
+
|
| 390 |
+
@tool
|
| 391 |
+
def search_tool(self, query: str) -> str:
|
| 392 |
+
"""Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
|
| 393 |
+
print(f"--- Calling Search Tool with query: {query} ---")
|
| 394 |
+
try:
|
| 395 |
+
search = DuckDuckGoSearchRun()
|
| 396 |
+
return search.run(query)
|
| 397 |
+
except Exception as e:
|
| 398 |
+
return f"Error running search: {e}"
|
| 399 |
+
|
| 400 |
+
@tool
|
| 401 |
+
def code_interpreter(self, code: str) -> str:
|
| 402 |
+
"""
|
| 403 |
+
Executes a string of Python code and returns its stdout, stderr, and any error.
|
| 404 |
+
Use this for calculations, data manipulation (including pandas on dataframes read from files), list operations, string manipulations, or any other Python operation.
|
| 405 |
+
The code runs in a sandboxed environment. 'pandas' (as pd) and 'openpyxl' are available.
|
| 406 |
+
Ensure the code is complete and executable. If printing, use print().
|
| 407 |
+
"""
|
| 408 |
+
print(f"--- Calling Code Interpreter with code:\n{code}\n---")
|
| 409 |
+
output_stream = io.StringIO()
|
| 410 |
+
error_stream = io.StringIO()
|
| 411 |
+
|
| 412 |
+
try:
|
| 413 |
+
# Use contextlib to redirect stdout and stderr
|
| 414 |
+
with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
|
| 415 |
+
# Execute the code. Provide 'pd' (pandas) in the globals
|
| 416 |
+
exec(code, {"pd": pd}, {})
|
| 417 |
+
|
| 418 |
+
stdout = output_stream.getvalue()
|
| 419 |
+
stderr = error_stream.getvalue()
|
| 420 |
+
|
| 421 |
+
if stderr:
|
| 422 |
+
return f"Error: {stderr}\nStdout: {stdout}"
|
| 423 |
+
if stdout:
|
| 424 |
+
return f"Success:\n{stdout}"
|
| 425 |
+
return "Success: Code executed without error and produced no stdout."
|
| 426 |
+
|
| 427 |
+
except Exception as e:
|
| 428 |
+
# Capture any exception during exec
|
| 429 |
+
return f"Execution failed with error: {str(e)}"
|
| 430 |
+
|
| 431 |
+
@tool
|
| 432 |
+
def read_file(self, path: str) -> str:
|
| 433 |
+
"""Reads the content of a file at the specified path. Use this to examine files provided in the question."""
|
| 434 |
+
print(f"--- Calling Read File Tool at path: {path} ---")
|
| 435 |
+
try:
|
| 436 |
+
# Try finding the file relative to the app directory first
|
| 437 |
+
# Use os.path.dirname(os.path.realpath(__file__)) for robustness in different execution contexts
|
| 438 |
+
script_dir = os.path.dirname(os.path.realpath(__file__))
|
| 439 |
+
full_path = os.path.join(script_dir, path)
|
| 440 |
+
print(f"Attempting to read relative path: {full_path}")
|
| 441 |
+
if not os.path.exists(full_path):
|
| 442 |
+
# If not found, try the direct path (might be absolute or relative to cwd)
|
| 443 |
+
full_path = path
|
| 444 |
+
print(f"Attempting to read direct path: {full_path}")
|
| 445 |
+
if not os.path.exists(full_path):
|
| 446 |
+
# Try basename for GAIA questions providing just the filename
|
| 447 |
+
base_path = os.path.basename(path)
|
| 448 |
+
print(f"Attempting to read basename path in cwd: {os.path.join(os.getcwd(), base_path)}")
|
| 449 |
+
if os.path.exists(base_path): # Check relative to CWD
|
| 450 |
+
full_path = base_path
|
| 451 |
+
else:
|
| 452 |
+
# List files in current and script directory for debugging
|
| 453 |
+
try:
|
| 454 |
+
cwd_files = os.listdir(".")
|
| 455 |
+
except Exception:
|
| 456 |
+
cwd_files = ["Error listing CWD"]
|
| 457 |
+
try:
|
| 458 |
+
script_dir_files = os.listdir(script_dir)
|
| 459 |
+
except Exception:
|
| 460 |
+
script_dir_files = ["Error listing script dir"]
|
| 461 |
+
return (f"Error: File not found.\n"
|
| 462 |
+
f"Tried relative path: '{os.path.join(script_dir, path)}'\n"
|
| 463 |
+
f"Tried direct path: '{path}'\n"
|
| 464 |
+
f"Tried basename in CWD: '{base_path}'\n"
|
| 465 |
+
f"Files in current dir (.): {cwd_files}\n"
|
| 466 |
+
f"Files in script dir ({script_dir}): {script_dir_files}")
|
| 467 |
+
|
| 468 |
+
print(f"Reading file: {full_path}")
|
| 469 |
+
with open(full_path, 'r', encoding='utf-8') as f:
|
| 470 |
+
return f.read()
|
| 471 |
+
except Exception as e:
|
| 472 |
+
return f"Error reading file {path}: {str(e)}"
|
| 473 |
+
|
| 474 |
+
@tool
|
| 475 |
+
def write_file(self, path: str, content: str) -> str:
|
| 476 |
+
"""Writes the given content to a file at the specified path relative to the app's directory. Creates directories if they don't exist."""
|
| 477 |
+
print(f"--- Calling Write File Tool at path: {path} ---")
|
| 478 |
+
try:
|
| 479 |
+
# Ensure the directory exists
|
| 480 |
+
script_dir = os.path.dirname(os.path.realpath(__file__))
|
| 481 |
+
full_path = os.path.join(script_dir, path) # Write relative to script dir
|
| 482 |
+
print(f"Writing file to: {full_path}")
|
| 483 |
+
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
| 484 |
+
|
| 485 |
+
with open(full_path, 'w', encoding='utf-8') as f:
|
| 486 |
+
f.write(content)
|
| 487 |
+
return f"Successfully wrote to file {path} (relative to app)."
|
| 488 |
+
except Exception as e:
|
| 489 |
+
return f"Error writing to file {path}: {str(e)}"
|
| 490 |
|
| 491 |
+
@tool
|
| 492 |
+
def list_directory(self, path: str = ".") -> str:
|
| 493 |
+
"""Lists the contents (files and directories) of a directory at the specified path relative to the app's directory."""
|
| 494 |
+
print(f"--- Calling List Directory Tool at path: {path} ---")
|
| 495 |
+
try:
|
| 496 |
+
script_dir = os.path.dirname(os.path.realpath(__file__))
|
| 497 |
+
full_path = os.path.join(script_dir, path) # List relative to script dir
|
| 498 |
+
print(f"Listing directory: {full_path}")
|
| 499 |
+
if not os.path.isdir(full_path):
|
| 500 |
+
return f"Error: '{path}' is not a valid directory relative to the app."
|
| 501 |
+
files = os.listdir(full_path)
|
| 502 |
+
return "\n".join(files) if files else "Directory is empty."
|
| 503 |
+
except Exception as e:
|
| 504 |
+
return f"Error listing directory {path}: {str(e)}"
|
| 505 |
+
|
| 506 |
+
@tool
|
| 507 |
+
def audio_transcription_tool(self, file_path: str) -> str:
|
| 508 |
+
"""
|
| 509 |
+
Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
|
| 510 |
+
Use this for questions involving audio file analysis.
|
| 511 |
+
"""
|
| 512 |
+
print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
|
| 513 |
+
# Access the pipeline via self
|
| 514 |
+
if not self.asr_pipeline:
|
| 515 |
+
return "Error: Audio transcription pipeline is not available."
|
| 516 |
+
try:
|
| 517 |
+
# Try finding the file relative to the app directory first
|
| 518 |
+
script_dir = os.path.dirname(os.path.realpath(__file__))
|
| 519 |
+
full_path = os.path.join(script_dir, file_path)
|
| 520 |
+
print(f"Attempting to transcribe relative path: {full_path}")
|
| 521 |
+
if not os.path.exists(full_path):
|
| 522 |
+
# If not found, try the direct path
|
| 523 |
+
full_path = file_path
|
| 524 |
+
print(f"Attempting to transcribe direct path: {full_path}")
|
| 525 |
+
if not os.path.exists(full_path):
|
| 526 |
+
# Try basename for GAIA questions
|
| 527 |
+
base_path = os.path.basename(file_path)
|
| 528 |
+
print(f"Attempting to transcribe basename path in CWD: {os.path.join(os.getcwd(), base_path)}")
|
| 529 |
+
if os.path.exists(base_path): # Check relative to CWD
|
| 530 |
+
full_path = base_path
|
| 531 |
+
else:
|
| 532 |
+
try:
|
| 533 |
+
cwd_files = os.listdir(".")
|
| 534 |
+
except Exception:
|
| 535 |
+
cwd_files = ["Error listing CWD"]
|
| 536 |
+
try:
|
| 537 |
+
script_dir_files = os.listdir(script_dir)
|
| 538 |
+
except Exception:
|
| 539 |
+
script_dir_files = ["Error listing script dir"]
|
| 540 |
+
return (f"Error: Audio file not found.\n"
|
| 541 |
+
f"Tried relative path: '{os.path.join(script_dir, file_path)}'\n"
|
| 542 |
+
f"Tried direct path: '{file_path}'\n"
|
| 543 |
+
f"Tried basename in CWD: '{base_path}'\n"
|
| 544 |
+
f"Files in current dir (.): {cwd_files}\n"
|
| 545 |
+
f"Files in script dir ({script_dir}): {script_dir_files}")
|
| 546 |
+
|
| 547 |
+
print(f"Transcribing file: {full_path}")
|
| 548 |
+
# Important: Ensure the pipeline can handle the file path directly
|
| 549 |
+
transcription = self.asr_pipeline(full_path)
|
| 550 |
+
print("--- Transcription Complete ---")
|
| 551 |
+
# The output structure might vary slightly based on pipeline version
|
| 552 |
+
return transcription.get("text", "Error: Transcription failed to produce text.")
|
| 553 |
+
except Exception as e:
|
| 554 |
+
import traceback
|
| 555 |
+
print(f"Error during audio transcription: {e}")
|
| 556 |
+
traceback.print_exc()
|
| 557 |
+
return f"Error during audio transcription: {str(e)}"
|
| 558 |
+
|
| 559 |
+
@tool
|
| 560 |
+
def get_youtube_transcript(self, video_url: str) -> str:
|
| 561 |
+
"""
|
| 562 |
+
Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
|
| 563 |
+
"""
|
| 564 |
+
print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
|
| 565 |
+
try:
|
| 566 |
+
# Extract video ID from URL more robustly
|
| 567 |
+
video_id = None
|
| 568 |
+
if "watch?v=" in video_url:
|
| 569 |
+
video_id = video_url.split("v=")[1].split("&")[0]
|
| 570 |
+
elif "youtu.be/" in video_url:
|
| 571 |
+
video_id = video_url.split("youtu.be/")[1].split("?")[0]
|
| 572 |
+
|
| 573 |
+
if not video_id:
|
| 574 |
+
return f"Error: Could not extract video ID from URL: {video_url}"
|
| 575 |
+
|
| 576 |
+
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
| 577 |
+
|
| 578 |
+
# Combine all transcript parts into one string
|
| 579 |
+
full_transcript = " ".join([item["text"] for item in transcript_list])
|
| 580 |
+
print("--- Transcript Fetched ---")
|
| 581 |
+
# Return a limited amount to avoid overwhelming the context
|
| 582 |
+
return full_transcript[:8000]
|
| 583 |
+
except Exception as e:
|
| 584 |
+
return f"Error fetching YouTube transcript: {str(e)}"
|
| 585 |
+
|
| 586 |
+
@tool
|
| 587 |
+
def scrape_web_page(self, url: str) -> str:
|
| 588 |
+
"""
|
| 589 |
+
Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
|
| 590 |
+
Use this when you need the full content of a webpage found via search.
|
| 591 |
+
"""
|
| 592 |
+
print(f"--- Calling Web Scraper Tool for URL: {url} ---")
|
| 593 |
+
try:
|
| 594 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
|
| 595 |
+
response = requests.get(url, headers=headers, timeout=15) # Increased timeout
|
| 596 |
+
response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
|
| 597 |
+
|
| 598 |
+
# Check content type to avoid parsing non-HTML
|
| 599 |
+
if 'html' not in response.headers.get('Content-Type', '').lower():
|
| 600 |
+
return f"Error: URL {url} did not return HTML content."
|
| 601 |
+
|
| 602 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 603 |
+
|
| 604 |
+
# Remove common non-content tags
|
| 605 |
+
for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]):
|
| 606 |
+
tag.extract()
|
| 607 |
+
|
| 608 |
+
# Attempt to find the main content area (heuristics, may not always work)
|
| 609 |
+
main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body
|
| 610 |
+
if not main_content:
|
| 611 |
+
main_content = soup # Fallback to the whole soup if no main area found
|
| 612 |
+
|
| 613 |
+
text = main_content.get_text(separator='\n', strip=True)
|
| 614 |
+
|
| 615 |
+
# Clean up excessive whitespace
|
| 616 |
+
lines = (line.strip() for line in text.splitlines())
|
| 617 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 618 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
| 619 |
+
|
| 620 |
+
print("--- Web Page Scraped ---")
|
| 621 |
+
# Limit context size
|
| 622 |
+
return text[:8000]
|
| 623 |
+
|
| 624 |
+
except requests.exceptions.RequestException as e:
|
| 625 |
+
return f"Error fetching web page {url}: {str(e)}"
|
| 626 |
+
except Exception as e:
|
| 627 |
+
return f"Error scraping web page {url}: {str(e)}"
|
| 628 |
+
|
| 629 |
+
# --- End of Tool Definitions ---
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
def __init__(self):
|
| 633 |
+
print("BasicAgent (LangGraph) initializing...")
|
| 634 |
+
|
| 635 |
+
# 1. Initialize ASR Pipeline *inside* init - DELAYED LOADING
|
| 636 |
+
self.asr_pipeline = None # Initialize as None first
|
| 637 |
+
try:
|
| 638 |
+
print("Loading ASR (Whisper) pipeline...")
|
| 639 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 640 |
+
print(f"Using device: {device} for ASR.")
|
| 641 |
+
self.asr_pipeline = pipeline(
|
| 642 |
+
"automatic-speech-recognition",
|
| 643 |
+
model="openai/whisper-base",
|
| 644 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 645 |
+
device=device
|
| 646 |
+
)
|
| 647 |
+
print("✅ ASR (Whisper) pipeline loaded successfully.")
|
| 648 |
+
except Exception as e:
|
| 649 |
+
print(f"⚠️ Warning: Could not load ASR pipeline. Audio tool will not work. Error: {e}")
|
| 650 |
+
import traceback
|
| 651 |
+
traceback.print_exc() # Print full traceback for ASR load error
|
| 652 |
+
self.asr_pipeline = None
|
| 653 |
+
# ====================================================
|
| 654 |
+
|
| 655 |
+
# 2. Get API Token from Space Secrets
|
| 656 |
+
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 657 |
+
if not HUGGINGFACEHUB_API_TOKEN:
|
| 658 |
+
raise ValueError("HUGGINGFACEHUB_API_TOKEN secret is not set! Please add it to your Space secrets.")
|
| 659 |
+
|
| 660 |
+
# 3. Collect Tool Methods
|
| 661 |
+
self.tools = [
|
| 662 |
+
self.search_tool,
|
| 663 |
+
self.code_interpreter,
|
| 664 |
+
self.read_file,
|
| 665 |
+
self.write_file,
|
| 666 |
+
self.list_directory,
|
| 667 |
+
self.audio_transcription_tool,
|
| 668 |
+
self.get_youtube_transcript,
|
| 669 |
+
self.scrape_web_page
|
| 670 |
+
]
|
| 671 |
+
|
| 672 |
+
# 4. Define the Improved System Prompt with Placeholders
|
| 673 |
+
tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
|
| 674 |
+
self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
|
| 675 |
+
Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
|
| 676 |
+
|
| 677 |
+
**CRITICAL INSTRUCTIONS:**
|
| 678 |
+
* **DO NOT** include conversational filler (e.g., "Sure, I can help...", "The answer is...", "Here is the information...").
|
| 679 |
+
* **DO NOT** explain your reasoning or the steps you took unless the question *explicitly* asks for it.
|
| 680 |
+
* **DO NOT** repeat the question in your final answer.
|
| 681 |
+
* **FINAL ANSWER FORMAT:** Your final response must contain *only* the answer itself.
|
| 682 |
+
|
| 683 |
+
You have access to the following tools to gather information and perform actions:
|
| 684 |
+
{tool_descriptions}
|
| 685 |
+
|
| 686 |
+
**TOOL USAGE PROTOCOL:**
|
| 687 |
+
* To use a tool, you MUST respond ONLY with a single JSON object formatted exactly like this:
|
| 688 |
+
```json
|
| 689 |
+
{{
|
| 690 |
+
"tool": "tool_name",
|
| 691 |
+
"tool_input": {{ "arg_name1": "value1", "arg_name2": "value2", ... }}
|
| 692 |
+
}}
|
| 693 |
+
```
|
| 694 |
+
* Replace `tool_name` with the exact name of the tool you want to use.
|
| 695 |
+
* Provide the required arguments within the `tool_input` dictionary. Ensure argument names and value types match the tool description precisely.
|
| 696 |
+
* Do not add any text before or after the JSON tool call block.
|
| 697 |
+
|
| 698 |
+
**REASONING PROCESS:**
|
| 699 |
+
1. Carefully analyze the user's question to understand the specific information required and the expected answer format. Check if any files are attached (mentioned like `[Attached File: filename.ext]`).
|
| 700 |
+
2. Break down the problem into logical steps.
|
| 701 |
+
3. Determine if any tools are necessary. Use `read_file` for attached files, `audio_transcription_tool` for audio, `get_youtube_transcript` for YouTube URLs, `search_tool` for web info, `scrape_web_page` to read content from URLs found via search, and `code_interpreter` for calculations or data processing.
|
| 702 |
+
4. If a tool is needed, call it using the specified JSON format. Wait for the tool's output.
|
| 703 |
+
5. Analyze the tool's output. If the answer is found, proceed to step 7.
|
| 704 |
+
6. If more information or steps are needed, use another tool (step 4) or continue reasoning based on the gathered information. Pay close attention to previous tool results.
|
| 705 |
+
7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
|
| 706 |
+
"""
|
| 707 |
+
|
| 708 |
+
# 5. Initialize the LLM (Using Mistral Instruct)
|
| 709 |
print("Initializing LLM Endpoint...")
|
| 710 |
llm = HuggingFaceEndpoint(
|
| 711 |
+
repo_id="mistralai/Mistral-7B-Instruct-v0.2", # Switched model
|
| 712 |
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
| 713 |
+
max_new_tokens=2048,
|
| 714 |
+
temperature=0.01,
|
|
|
|
| 715 |
)
|
| 716 |
chat_llm = ChatHuggingFace(llm=llm)
|
| 717 |
print("✅ LLM Endpoint initialized.")
|
| 718 |
|
| 719 |
# 6. Bind tools to the LLM
|
| 720 |
+
# We still bind tools, but we'll manually parse if it fails
|
| 721 |
self.llm_with_tools = chat_llm.bind_tools(self.tools)
|
| 722 |
print("✅ Tools bound to LLM.")
|
| 723 |
|
| 724 |
+
# 7. Define the Agent Node with Manual Tool Parsing
|
| 725 |
+
# ==================== NODE WITH PLACEHOLDER REGEX ====================
|
| 726 |
def agent_node(state: AgentState):
|
| 727 |
print("--- Running Agent Node ---")
|
| 728 |
+
messages_with_prompt = state["messages"]
|
| 729 |
+
|
| 730 |
+
# Invoke the LLM (which has tools bound)
|
| 731 |
+
ai_message: AIMessage = self.llm_with_tools.invoke(messages_with_prompt)
|
| 732 |
+
print(f"AI Message Raw Content: {ai_message.content}")
|
| 733 |
+
|
| 734 |
+
# --- Manual Tool Call Parsing Logic ---
|
| 735 |
+
tool_calls = []
|
| 736 |
+
# Check if bind_tools already populated tool_calls (ideal case)
|
| 737 |
+
if ai_message.tool_calls:
|
| 738 |
+
print(f"SUCCESS: bind_tools correctly parsed tool_calls: {ai_message.tool_calls}")
|
| 739 |
+
tool_calls = ai_message.tool_calls
|
| 740 |
+
# Fallback: Check if content contains likely JSON for tool calls
|
| 741 |
+
# Use regex to find JSON possibly wrapped in markdown
|
| 742 |
+
elif isinstance(ai_message.content, str):
|
| 743 |
+
print("Attempting manual JSON parsing from content...")
|
| 744 |
+
# --- THIS IS THE LINE WITH THE FIRST PLACEHOLDER ---
|
| 745 |
+
json_match = re.search(r"...") # Replace this line manually
|
| 746 |
+
|
| 747 |
+
if json_match:
|
| 748 |
+
# Extract the first valid group that contains JSON
|
| 749 |
+
json_str = json_match.group(1) or json_match.group(2)
|
| 750 |
+
if json_str:
|
| 751 |
+
try:
|
| 752 |
+
# Attempt to strip potential leading/trailing non-JSON chars if regex was too broad
|
| 753 |
+
json_str_cleaned = json_str.strip()
|
| 754 |
+
# Basic validation: starts with { or [ ends with } or ]
|
| 755 |
+
if (json_str_cleaned.startswith('{') and json_str_cleaned.endswith('}')) or \
|
| 756 |
+
(json_str_cleaned.startswith('[') and json_str_cleaned.endswith(']')):
|
| 757 |
+
data = json.loads(json_str_cleaned)
|
| 758 |
+
# Check structure for single tool call (dict)
|
| 759 |
+
if isinstance(data, dict) and "tool" in data and "tool_input" in data:
|
| 760 |
+
tool_name = data.get("tool")
|
| 761 |
+
tool_input = data.get("tool_input")
|
| 762 |
+
# Basic validation of tool name and input type
|
| 763 |
+
if isinstance(tool_name, str) and isinstance(tool_input, dict):
|
| 764 |
+
call_id = f"tool_{uuid.uuid4()}" # Generate unique ID
|
| 765 |
+
tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
|
| 766 |
+
print(f"Manually parsed Single Tool Call: ID={call_id}, Name={tool_name}, Args={tool_input}")
|
| 767 |
+
ai_message.content = "" # Clear content after successful parse
|
| 768 |
+
else:
|
| 769 |
+
print("Parsed JSON dict, but incorrect tool name type or tool_input is not a dict.")
|
| 770 |
+
# Check structure for multiple tool calls (if model outputs a list)
|
| 771 |
+
elif isinstance(data, list):
|
| 772 |
+
print("Attempting to parse list as multiple tool calls...")
|
| 773 |
+
parsed_list_ok = True
|
| 774 |
+
temp_tool_calls = []
|
| 775 |
+
for item in data:
|
| 776 |
+
if isinstance(item, dict) and "tool" in item and "tool_input" in item:
|
| 777 |
+
tool_name = item.get("tool")
|
| 778 |
+
tool_input = item.get("tool_input")
|
| 779 |
+
if isinstance(tool_name, str) and isinstance(tool_input, dict):
|
| 780 |
+
call_id = f"tool_{uuid.uuid4()}"
|
| 781 |
+
temp_tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
|
| 782 |
+
print(f"Manually parsed Multi-Tool Call item: ID={call_id}, Name={tool_name}, Args={tool_input}")
|
| 783 |
+
else:
|
| 784 |
+
parsed_list_ok = False
|
| 785 |
+
print("Parsed JSON list item, but incorrect tool name type or tool_input is not a dict.")
|
| 786 |
+
break
|
| 787 |
+
else:
|
| 788 |
+
parsed_list_ok = False
|
| 789 |
+
print("Parsed JSON list item, but not a valid tool call structure (missing 'tool' or 'tool_input').")
|
| 790 |
+
break
|
| 791 |
+
if parsed_list_ok and temp_tool_calls:
|
| 792 |
+
tool_calls.extend(temp_tool_calls)
|
| 793 |
+
ai_message.content = "" # Clear content if list successfully parsed
|
| 794 |
+
else:
|
| 795 |
+
print("Parsed JSON, but incorrect structure (neither dict with tool/tool_input nor list of such dicts).")
|
| 796 |
+
else:
|
| 797 |
+
print(f"Skipping manual parse: Cleaned JSON string ('{json_str_cleaned[:50]}...') does not start/end correctly with braces/brackets.")
|
| 798 |
+
except json.JSONDecodeError as e:
|
| 799 |
+
print(f"Manual JSON parsing failed: {e}. String was: '{json_str[:500]}...'") # Log the problematic string
|
| 800 |
+
except Exception as e:
|
| 801 |
+
print(f"Unexpected error during manual parsing: {e}")
|
| 802 |
+
import traceback
|
| 803 |
+
traceback.print_exc()
|
| 804 |
+
else:
|
| 805 |
+
print("Regex matched, but no JSON content found in capture groups.")
|
| 806 |
+
else:
|
| 807 |
+
print("No JSON block found in content for manual parsing.")
|
| 808 |
+
else:
|
| 809 |
+
print("AI Message content is not a string, skipping manual parse.")
|
| 810 |
+
# --- End Manual Parsing ---
|
| 811 |
|
| 812 |
+
# Attach manually parsed calls (if any) to the message
|
| 813 |
+
# This allows tools_condition to work correctly
|
| 814 |
+
if tool_calls and not ai_message.tool_calls:
|
| 815 |
+
ai_message.tool_calls = tool_calls
|
| 816 |
+
# Also clear invalid_tool_calls if we manually succeeded
|
| 817 |
+
ai_message.invalid_tool_calls = [] # Use empty list instead of None
|
| 818 |
+
|
| 819 |
+
# Log final interpretation
|
| 820 |
if ai_message.tool_calls:
|
| 821 |
+
print(f"AI Message contains tool calls (after manual check): {ai_message.tool_calls}")
|
| 822 |
+
elif ai_message.invalid_tool_calls:
|
| 823 |
+
print(f"AI Message contains INVALID tool calls: {ai_message.invalid_tool_calls}")
|
|
|
|
| 824 |
else:
|
| 825 |
+
print(f"AI Message Interpreted Content (no tool calls): {ai_message.pretty_repr()}")
|
| 826 |
|
| 827 |
return {"messages": [ai_message]}
|
| 828 |
+
# =======================================================
|
| 829 |
|
| 830 |
# 8. Define the Tool Node
|
|
|
|
| 831 |
tool_node = ToolNode(self.tools)
|
| 832 |
|
| 833 |
# 9. Create the Graph
|
|
|
|
| 838 |
graph_builder.add_edge(START, "agent")
|
| 839 |
graph_builder.add_conditional_edges(
|
| 840 |
"agent",
|
| 841 |
+
tools_condition, # This condition checks ai_message.tool_calls
|
| 842 |
{
|
| 843 |
+
"tools": "tools",
|
| 844 |
+
"__end__": "__end__",
|
| 845 |
},
|
| 846 |
)
|
| 847 |
+
graph_builder.add_edge("tools", "agent")
|
| 848 |
|
| 849 |
# 10. Compile the graph and store it
|
| 850 |
self.graph = graph_builder.compile()
|
|
|
|
| 870 |
|
| 871 |
# Keep track of the latest AI response that isn't a tool call
|
| 872 |
if isinstance(last_message, AIMessage):
|
| 873 |
+
# Check if it has tool calls or invalid tool calls
|
| 874 |
+
has_calls = bool(last_message.tool_calls or last_message.invalid_tool_calls)
|
| 875 |
+
if not has_calls: # Only consider it final if no calls were attempted
|
| 876 |
+
# Ensure content is a string and not empty before assigning
|
| 877 |
+
if isinstance(last_message.content, str) and last_message.content.strip():
|
| 878 |
print(f"Potential Final AI Response: {last_message.content[:500]}...")
|
| 879 |
final_answer_content = last_message.content
|
| 880 |
+
# If content is empty after manual parsing cleared it, don't overwrite a previous potential answer
|
| 881 |
+
elif not isinstance(last_message.content, str) or not last_message.content.strip():
|
| 882 |
+
print("AI Message has no tool calls and empty/non-string content.")
|
| 883 |
else:
|
| 884 |
+
print(f"Non-string AI message content without tool calls: {last_message.content}")
|
| 885 |
|
| 886 |
elif isinstance(last_message, ToolMessage):
|
| 887 |
print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
|
| 888 |
+
# After a tool result, the next AI message might be the final one,
|
| 889 |
+
# so don't necessarily clear final_answer_content here. Let the loop find the *last* non-tool-call AI message.
|
| 890 |
|
| 891 |
# --- Add the cleaning step ---
|
| 892 |
cleaned_answer = final_answer_content.strip()
|
| 893 |
|
| 894 |
+
# More aggressive cleaning (optional, use with caution):
|
| 895 |
+
# Try to remove common conversational prefixes if they slipped through
|
| 896 |
prefixes_to_remove = [
|
| 897 |
"The answer is:", "Here is the answer:", "Based on the information:",
|
| 898 |
"Final Answer:", "Answer:"
|
| 899 |
]
|
| 900 |
+
# More thorough prefix removal
|
| 901 |
+
original_cleaned = cleaned_answer
|
| 902 |
for prefix in prefixes_to_remove:
|
|
|
|
| 903 |
if cleaned_answer.lower().startswith(prefix.lower()):
|
| 904 |
+
# Find where the actual answer starts after the prefix
|
| 905 |
+
potential_answer = cleaned_answer[len(prefix):].strip()
|
| 906 |
+
if potential_answer: # Only strip if there's content after the prefix
|
| 907 |
+
cleaned_answer = potential_answer
|
| 908 |
+
break # Stop after removing the first found prefix
|
| 909 |
+
# If nothing was stripped but prefixes exist, log it
|
| 910 |
+
if cleaned_answer == original_cleaned and any(cleaned_answer.lower().startswith(p.lower()) for p in prefixes_to_remove):
|
| 911 |
+
print(f"Warning: Prefix found but not stripped (maybe answer was empty after prefix?): '{original_cleaned[:100]}...'")
|
| 912 |
+
|
| 913 |
+
|
| 914 |
+
# Remove potential markdown code blocks only if the answer isn't expected to be code
|
| 915 |
+
# More robust check for code-like content
|
| 916 |
+
looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
|
| 917 |
if not looks_like_code:
|
| 918 |
+
# --- THIS IS THE LINE WITH THE SECOND PLACEHOLDER ---
|
| 919 |
+
cleaned_answer = [[[REGEX_PLACEHOLDER_SUB]]] # Replace this line manually
|
| 920 |
+
# Remove single backticks if they surround the whole answer
|
| 921 |
+
if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
|
|
|
|
|
|
|
|
|
|
| 922 |
cleaned_answer = cleaned_answer[1:-1].strip()
|
| 923 |
|
| 924 |
+
print(f"Agent returning final answer (cleaned): '{cleaned_answer}'") # Add quotes for clarity
|
| 925 |
+
if not cleaned_answer and final_answer_content:
|
| 926 |
+
# If cleaning resulted in empty but original wasn't, return original
|
| 927 |
print("Warning: Agent produced an empty final answer after cleaning. Falling back to raw answer.")
|
| 928 |
+
return final_answer_content.strip() # Fallback if cleaning removed everything
|
| 929 |
|
| 930 |
+
# Handle case where agent legitimately produces no answer (e.g., error during loop)
|
| 931 |
+
return cleaned_answer if cleaned_answer else "AGENT FAILED TO PRODUCE ANSWER"
|
| 932 |
|
| 933 |
except Exception as e:
|
| 934 |
print(f"Error running agent graph: {e}")
|
|
|
|
| 938 |
|
| 939 |
|
| 940 |
# --- (Original Template Code Starts Here - NO CHANGES NEEDED BELOW THIS LINE) ---
|
|
|
|
|
|
|
|
|
|
| 941 |
|
| 942 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 943 |
"""
|
| 944 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 945 |
and displays the results.
|
| 946 |
"""
|
| 947 |
+
space_id = os.getenv("SPACE_ID")
|
|
|
|
| 948 |
if profile:
|
| 949 |
username= f"{profile.username}"
|
| 950 |
print(f"User logged in: {username}")
|
|
|
|
| 956 |
questions_url = f"{api_url}/questions"
|
| 957 |
submit_url = f"{api_url}/submit"
|
| 958 |
|
| 959 |
+
print("Instantiating agent...")
|
|
|
|
| 960 |
try:
|
| 961 |
agent = BasicAgent()
|
|
|
|
| 962 |
if agent.asr_pipeline is None:
|
| 963 |
print("⚠️ ASR Pipeline failed to load during agent init. Audio questions will likely fail.")
|
| 964 |
|
|
|
|
| 967 |
import traceback
|
| 968 |
traceback.print_exc() # Print full traceback for init errors
|
| 969 |
return f"Error initializing agent: {e}", None
|
| 970 |
+
print("Agent instantiated successfully.")
|
| 971 |
|
| 972 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
|
|
| 973 |
print(f"Agent code URL: {agent_code}")
|
| 974 |
|
|
|
|
| 975 |
print(f"Fetching questions from: {questions_url}")
|
| 976 |
try:
|
| 977 |
+
response = requests.get(questions_url, timeout=30)
|
| 978 |
response.raise_for_status()
|
| 979 |
questions_data = response.json()
|
| 980 |
if not questions_data:
|
|
|
|
| 992 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 993 |
return f"An unexpected error occurred fetching questions: {e}", None
|
| 994 |
|
|
|
|
| 995 |
results_log = []
|
| 996 |
answers_payload = []
|
| 997 |
total_questions = len(questions_data)
|
|
|
|
| 1012 |
|
| 1013 |
print(f"\n--- Running Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
|
| 1014 |
try:
|
|
|
|
|
|
|
| 1015 |
file_path = item.get("file_path")
|
| 1016 |
if file_path:
|
| 1017 |
+
# Check existence relative to script dir first, then CWD
|
| 1018 |
+
script_dir = os.path.dirname(os.path.realpath(__file__))
|
| 1019 |
+
potential_script_path = os.path.join(script_dir, file_path)
|
| 1020 |
+
potential_cwd_path = os.path.join(os.getcwd(), file_path) # Check CWD too
|
| 1021 |
+
|
| 1022 |
+
if os.path.exists(potential_script_path):
|
| 1023 |
+
file_context = f"[Attached File (exists): {file_path}]" # Path relative to script is good enough for agent
|
| 1024 |
+
elif os.path.exists(potential_cwd_path):
|
| 1025 |
+
file_context = f"[Attached File (exists in cwd): {file_path}]" # Path relative to cwd
|
| 1026 |
else:
|
| 1027 |
+
file_context = f"[Attached File (path provided): {file_path}]" # Agent needs to handle finding it
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
|
| 1029 |
question_text_with_context = f"{question_text}\n\n{file_context}"
|
| 1030 |
print(f"Question includes file reference: {file_path}")
|
|
|
|
| 1108 |
|
| 1109 |
# --- Build Gradio Interface using Blocks ---
|
| 1110 |
with gr.Blocks() as demo:
|
| 1111 |
+
gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Mistral)") # Updated title
|
| 1112 |
gr.Markdown(
|
| 1113 |
"""
|
| 1114 |
**Instructions:**
|
|
|
|
| 1117 |
---
|
| 1118 |
**Notes:**
|
| 1119 |
* The full evaluation can take **several hours**. Use the logs tab to monitor progress.
|
| 1120 |
+
* This agent uses `mistralai/Mistral-7B-Instruct-v0.2` and multiple tools.
|
| 1121 |
* Make sure your `HUGGINGFACEHUB_API_TOKEN` secret is set correctly in Settings.
|
| 1122 |
"""
|
| 1123 |
)
|
|
|
|
| 1136 |
|
| 1137 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 1138 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 1139 |
+
space_id_startup = os.getenv("SPACE_ID")
|
| 1140 |
|
| 1141 |
if space_host_startup:
|
| 1142 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
|
| 1144 |
else:
|
| 1145 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 1146 |
|
| 1147 |
+
if space_id_startup:
|
| 1148 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 1149 |
+
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 1150 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 1151 |
else:
|
| 1152 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 1153 |
|
| 1154 |
# Add detailed path info for debugging file access
|
| 1155 |
+
print(f"Script directory (__file__): {os.path.dirname(os.path.realpath(__file__))}")
|
| 1156 |
print(f"Current working directory (os.getcwd()): {os.getcwd()}")
|
| 1157 |
+
# List files only if the directory exists
|
| 1158 |
+
try:
|
| 1159 |
+
print("Files in current working directory:", os.listdir("."))
|
| 1160 |
+
except FileNotFoundError:
|
| 1161 |
+
print("Warning: Could not list current working directory.")
|
| 1162 |
|
| 1163 |
|
| 1164 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 1165 |
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
| 1166 |
# Set queue=True to handle multiple clicks better, though only one run should happen at a time.
|
| 1167 |
demo.queue().launch(debug=True, share=False)
|
|
|