gabejavitt commited on
Commit
8cd5fe1
·
verified ·
1 Parent(s): 2d2d572

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +536 -76
app.py CHANGED
@@ -340,45 +340,494 @@ You have access to the following tools to gather information and perform actions
340
  6. If more information or steps are needed, use another tool (step 4) or continue reasoning based on the gathered information. Pay close attention to previous tool results.
341
  7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
342
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- # 5. Initialize the LLM (Using Qwen Coder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  print("Initializing LLM Endpoint...")
346
  llm = HuggingFaceEndpoint(
347
- repo_id="mistralai/Mistral-7B-Instruct-v0.2", # Changed model
348
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
349
- max_new_tokens=2048, # Increased token limit for potentially longer reasoning/tool use
350
- temperature=0.01, # Keep temperature low for factual tasks
351
- # stop_sequences=["\nObservation:", "\nTool Result:", "\n```"] # Help prevent hallucinating tool calls/results
352
  )
353
  chat_llm = ChatHuggingFace(llm=llm)
354
  print("✅ LLM Endpoint initialized.")
355
 
356
  # 6. Bind tools to the LLM
357
- # Ensure the LLM knows how to format calls for the tools
358
  self.llm_with_tools = chat_llm.bind_tools(self.tools)
359
  print("✅ Tools bound to LLM.")
360
 
361
- # 7. Define the Agent Node
 
362
  def agent_node(state: AgentState):
363
  print("--- Running Agent Node ---")
364
- messages_with_prompt = state["messages"] # We inject in __call__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
- ai_message = self.llm_with_tools.invoke(messages_with_prompt)
367
- print(f"AI Message Raw: {ai_message}") # Log raw output for debugging
368
- content_str = ai_message.content if isinstance(ai_message.content, str) else ""
369
- # Check for tool_calls attribute populated by bind_tools
 
 
 
 
370
  if ai_message.tool_calls:
371
- print(f"AI Message contains tool calls: {ai_message.tool_calls}")
372
- elif '"tool":' in content_str and '"tool_input":' in content_str:
373
- # Fallback check if bind_tools didn't populate tool_calls but JSON is present
374
- print(f"AI Message appears to contain raw tool call JSON.")
375
  else:
376
- print(f"AI Message Interpreted Content: {ai_message.pretty_repr()}")
377
 
378
  return {"messages": [ai_message]}
 
379
 
380
  # 8. Define the Tool Node
381
- # This uses the list of tool methods directly
382
  tool_node = ToolNode(self.tools)
383
 
384
  # 9. Create the Graph
@@ -389,13 +838,13 @@ You have access to the following tools to gather information and perform actions
389
  graph_builder.add_edge(START, "agent")
390
  graph_builder.add_conditional_edges(
391
  "agent",
392
- tools_condition, # This checks if the AIMessage contains tool_calls
393
  {
394
- "tools": "tools", # If tool_calls exist, go to tool node
395
- "__end__": "__end__", # Otherwise, end the graph
396
  },
397
  )
398
- graph_builder.add_edge("tools", "agent") # Loop back to agent after tools run
399
 
400
  # 10. Compile the graph and store it
401
  self.graph = graph_builder.compile()
@@ -421,47 +870,65 @@ You have access to the following tools to gather information and perform actions
421
 
422
  # Keep track of the latest AI response that isn't a tool call
423
  if isinstance(last_message, AIMessage):
424
- # Check if it has tool calls. If not, it might be the final answer.
425
- if not last_message.tool_calls and not last_message.invalid_tool_calls:
426
- if isinstance(last_message.content, str):
 
 
427
  print(f"Potential Final AI Response: {last_message.content[:500]}...")
428
  final_answer_content = last_message.content
 
 
 
429
  else:
430
- print(f"Non-string AI message content: {last_message.content}")
431
 
432
  elif isinstance(last_message, ToolMessage):
433
  print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
 
 
434
 
435
  # --- Add the cleaning step ---
436
  cleaned_answer = final_answer_content.strip()
437
 
 
 
438
  prefixes_to_remove = [
439
  "The answer is:", "Here is the answer:", "Based on the information:",
440
  "Final Answer:", "Answer:"
441
  ]
 
 
442
  for prefix in prefixes_to_remove:
443
- # Case-insensitive check
444
  if cleaned_answer.lower().startswith(prefix.lower()):
445
- cleaned_answer = cleaned_answer[len(prefix):].strip()
446
- break
447
-
448
- looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class "]) or cleaned_answer.count('\n') > 3
 
 
 
 
 
 
 
 
 
449
  if not looks_like_code:
450
- if cleaned_answer.startswith("```") and cleaned_answer.endswith("```"):
451
- cleaned_answer = cleaned_answer[3:-3].strip()
452
- if '\n' in cleaned_answer:
453
- first_line, rest = cleaned_answer.split('\n', 1)
454
- if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
455
- cleaned_answer = rest.strip()
456
- elif cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
457
  cleaned_answer = cleaned_answer[1:-1].strip()
458
 
459
- print(f"Agent returning final answer (cleaned): {cleaned_answer}")
460
- if not cleaned_answer:
 
461
  print("Warning: Agent produced an empty final answer after cleaning. Falling back to raw answer.")
462
- return final_answer_content.strip() # Fallback
463
 
464
- return cleaned_answer
 
465
 
466
  except Exception as e:
467
  print(f"Error running agent graph: {e}")
@@ -471,17 +938,13 @@ You have access to the following tools to gather information and perform actions
471
 
472
 
473
  # --- (Original Template Code Starts Here - NO CHANGES NEEDED BELOW THIS LINE) ---
474
- # ... (run_and_submit_all function, Gradio interface, __main__ block) ...
475
- # Note: Ensure the 'run_and_submit_all' function correctly instantiates 'BasicAgent()'
476
- # The rest of the template code should remain the same.
477
 
478
  def run_and_submit_all( profile: gr.OAuthProfile | None):
479
  """
480
  Fetches all questions, runs the BasicAgent on them, submits all answers,
481
  and displays the results.
482
  """
483
- # --- Determine HF Space Runtime URL and Repo URL ---
484
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
485
  if profile:
486
  username= f"{profile.username}"
487
  print(f"User logged in: {username}")
@@ -493,11 +956,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
493
  questions_url = f"{api_url}/questions"
494
  submit_url = f"{api_url}/submit"
495
 
496
- # 1. Instantiate Agent
497
- print("Instantiating agent...") # Changed log message slightly
498
  try:
499
  agent = BasicAgent()
500
- # Check for ASR pipeline status after init
501
  if agent.asr_pipeline is None:
502
  print("⚠️ ASR Pipeline failed to load during agent init. Audio questions will likely fail.")
503
 
@@ -506,16 +967,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
506
  import traceback
507
  traceback.print_exc() # Print full traceback for init errors
508
  return f"Error initializing agent: {e}", None
509
- print("Agent instantiated successfully.") # Changed log message slightly
510
 
511
- # Agent code URL
512
- agent_code = f"[https://huggingface.co/spaces/](https://huggingface.co/spaces/){space_id}/tree/main"
513
  print(f"Agent code URL: {agent_code}")
514
 
515
- # 2. Fetch Questions
516
  print(f"Fetching questions from: {questions_url}")
517
  try:
518
- response = requests.get(questions_url, timeout=30) # Increased timeout
519
  response.raise_for_status()
520
  questions_data = response.json()
521
  if not questions_data:
@@ -533,7 +992,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
533
  print(f"An unexpected error occurred fetching questions: {e}")
534
  return f"An unexpected error occurred fetching questions: {e}", None
535
 
536
- # 3. Run your Agent
537
  results_log = []
538
  answers_payload = []
539
  total_questions = len(questions_data)
@@ -554,20 +1012,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
554
 
555
  print(f"\n--- Running Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
556
  try:
557
- # Add file paths to the question context if present
558
- # GAIA often includes files like images, audio, excel
559
  file_path = item.get("file_path")
560
  if file_path:
561
- # Construct a potential path within the space if it's just a filename
562
- potential_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_path)
563
- if os.path.exists(potential_path):
564
- file_context = f"[Attached File (exists): {file_path}]"
 
 
 
 
 
565
  else:
566
- # Check if it exists in the current working directory too
567
- if os.path.exists(file_path):
568
- file_context = f"[Attached File (exists in cwd): {file_path}]"
569
- else:
570
- file_context = f"[Attached File (path provided): {file_path}]" # Agent needs to handle finding it
571
 
572
  question_text_with_context = f"{question_text}\n\n{file_context}"
573
  print(f"Question includes file reference: {file_path}")
@@ -651,7 +1108,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
651
 
652
  # --- Build Gradio Interface using Blocks ---
653
  with gr.Blocks() as demo:
654
- gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Qwen)")
655
  gr.Markdown(
656
  """
657
  **Instructions:**
@@ -660,7 +1117,7 @@ with gr.Blocks() as demo:
660
  ---
661
  **Notes:**
662
  * The full evaluation can take **several hours**. Use the logs tab to monitor progress.
663
- * This agent uses `Qwen/Qwen2.5-Coder-32B-Instruct` and multiple tools (search, code, file, audio, youtube, web scrape).
664
  * Make sure your `HUGGINGFACEHUB_API_TOKEN` secret is set correctly in Settings.
665
  """
666
  )
@@ -679,7 +1136,7 @@ if __name__ == "__main__":
679
 
680
  # Check for SPACE_HOST and SPACE_ID at startup for information
681
  space_host_startup = os.getenv("SPACE_HOST")
682
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
683
 
684
  if space_host_startup:
685
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -687,21 +1144,24 @@ if __name__ == "__main__":
687
  else:
688
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
689
 
690
- if space_id_startup: # Print repo URLs if SPACE_ID is found
691
  print(f"✅ SPACE_ID found: {space_id_startup}")
692
- print(f" Repo URL: [https://huggingface.co/spaces/](https://huggingface.co/spaces/){space_id_startup}")
693
- print(f" Repo Tree URL: [https://huggingface.co/spaces/](https://huggingface.co/spaces/){space_id_startup}/tree/main")
694
  else:
695
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
696
 
697
  # Add detailed path info for debugging file access
698
- print(f"Script directory (__file__): {os.path.dirname(os.path.abspath(__file__))}")
699
  print(f"Current working directory (os.getcwd()): {os.getcwd()}")
700
- print("Files in current working directory:", os.listdir("."))
 
 
 
 
701
 
702
 
703
  print("-"*(60 + len(" App Starting ")) + "\n")
704
  print("Launching Gradio Interface for GAIA Agent Evaluation...")
705
  # Set queue=True to handle multiple clicks better, though only one run should happen at a time.
706
  demo.queue().launch(debug=True, share=False)
707
-
 
340
  6. If more information or steps are needed, use another tool (step 4) or continue reasoning based on the gathered information. Pay close attention to previous tool results.
341
  7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
342
  """
343
+ import os
344
+ import gradio as gr
345
+ import requests
346
+ import inspect
347
+ import pandas as pd
348
+ import io
349
+ import contextlib
350
+ from typing import TypedDict, Annotated
351
+ import torch
352
+ import json # For robust tool call parsing/generation if needed
353
+ import re # For finding JSON
354
+ import uuid # For generating tool call IDs
355
+
356
+ # --- Multimodal & Web Tool Imports ---
357
+ from transformers import pipeline
358
+ from youtube_transcript_api import YouTubeTranscriptApi
359
+ import requests
360
+ from bs4 import BeautifulSoup
361
+
362
+ # --- LangChain & LangGraph Imports ---
363
+ from langgraph.graph.message import add_messages
364
+ # Make sure to import ToolCall
365
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
366
+ from langgraph.prebuilt import ToolNode
367
+ from langgraph.graph import START, StateGraph
368
+ from langgraph.prebuilt import tools_condition
369
+ from langchain_huggingface import ChatHuggingFace
370
+ from langchain_huggingface import HuggingFaceEndpoint
371
+ from langchain_community.tools import DuckDuckGoSearchRun
372
+ from langchain_core.tools import tool, BaseTool
373
+
374
+ # (Keep Constants as is)
375
+ # --- Constants ---
376
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
377
+
378
+ # --- LangGraph Agent State ---
379
+ class AgentState(TypedDict):
380
+ messages: Annotated[list[AnyMessage], add_messages]
381
+
382
+
383
+ # --- Basic Agent Definition ---
384
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
385
+ class BasicAgent:
386
+
387
+ # --- Tool Definitions as Methods ---
388
+ # By making tools methods, they can access self.asr_pipeline
389
+
390
+ @tool
391
+ def search_tool(self, query: str) -> str:
392
+ """Calls DuckDuckGo search and returns the results. Use this for recent information or general web searches."""
393
+ print(f"--- Calling Search Tool with query: {query} ---")
394
+ try:
395
+ search = DuckDuckGoSearchRun()
396
+ return search.run(query)
397
+ except Exception as e:
398
+ return f"Error running search: {e}"
399
+
400
+ @tool
401
+ def code_interpreter(self, code: str) -> str:
402
+ """
403
+ Executes a string of Python code and returns its stdout, stderr, and any error.
404
+ Use this for calculations, data manipulation (including pandas on dataframes read from files), list operations, string manipulations, or any other Python operation.
405
+ The code runs in a sandboxed environment. 'pandas' (as pd) and 'openpyxl' are available.
406
+ Ensure the code is complete and executable. If printing, use print().
407
+ """
408
+ print(f"--- Calling Code Interpreter with code:\n{code}\n---")
409
+ output_stream = io.StringIO()
410
+ error_stream = io.StringIO()
411
+
412
+ try:
413
+ # Use contextlib to redirect stdout and stderr
414
+ with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
415
+ # Execute the code. Provide 'pd' (pandas) in the globals
416
+ exec(code, {"pd": pd}, {})
417
+
418
+ stdout = output_stream.getvalue()
419
+ stderr = error_stream.getvalue()
420
+
421
+ if stderr:
422
+ return f"Error: {stderr}\nStdout: {stdout}"
423
+ if stdout:
424
+ return f"Success:\n{stdout}"
425
+ return "Success: Code executed without error and produced no stdout."
426
+
427
+ except Exception as e:
428
+ # Capture any exception during exec
429
+ return f"Execution failed with error: {str(e)}"
430
+
431
+ @tool
432
+ def read_file(self, path: str) -> str:
433
+ """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
434
+ print(f"--- Calling Read File Tool at path: {path} ---")
435
+ try:
436
+ # Try finding the file relative to the app directory first
437
+ # Use os.path.dirname(os.path.realpath(__file__)) for robustness in different execution contexts
438
+ script_dir = os.path.dirname(os.path.realpath(__file__))
439
+ full_path = os.path.join(script_dir, path)
440
+ print(f"Attempting to read relative path: {full_path}")
441
+ if not os.path.exists(full_path):
442
+ # If not found, try the direct path (might be absolute or relative to cwd)
443
+ full_path = path
444
+ print(f"Attempting to read direct path: {full_path}")
445
+ if not os.path.exists(full_path):
446
+ # Try basename for GAIA questions providing just the filename
447
+ base_path = os.path.basename(path)
448
+ print(f"Attempting to read basename path in cwd: {os.path.join(os.getcwd(), base_path)}")
449
+ if os.path.exists(base_path): # Check relative to CWD
450
+ full_path = base_path
451
+ else:
452
+ # List files in current and script directory for debugging
453
+ try:
454
+ cwd_files = os.listdir(".")
455
+ except Exception:
456
+ cwd_files = ["Error listing CWD"]
457
+ try:
458
+ script_dir_files = os.listdir(script_dir)
459
+ except Exception:
460
+ script_dir_files = ["Error listing script dir"]
461
+ return (f"Error: File not found.\n"
462
+ f"Tried relative path: '{os.path.join(script_dir, path)}'\n"
463
+ f"Tried direct path: '{path}'\n"
464
+ f"Tried basename in CWD: '{base_path}'\n"
465
+ f"Files in current dir (.): {cwd_files}\n"
466
+ f"Files in script dir ({script_dir}): {script_dir_files}")
467
+
468
+ print(f"Reading file: {full_path}")
469
+ with open(full_path, 'r', encoding='utf-8') as f:
470
+ return f.read()
471
+ except Exception as e:
472
+ return f"Error reading file {path}: {str(e)}"
473
+
474
+ @tool
475
+ def write_file(self, path: str, content: str) -> str:
476
+ """Writes the given content to a file at the specified path relative to the app's directory. Creates directories if they don't exist."""
477
+ print(f"--- Calling Write File Tool at path: {path} ---")
478
+ try:
479
+ # Ensure the directory exists
480
+ script_dir = os.path.dirname(os.path.realpath(__file__))
481
+ full_path = os.path.join(script_dir, path) # Write relative to script dir
482
+ print(f"Writing file to: {full_path}")
483
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
484
+
485
+ with open(full_path, 'w', encoding='utf-8') as f:
486
+ f.write(content)
487
+ return f"Successfully wrote to file {path} (relative to app)."
488
+ except Exception as e:
489
+ return f"Error writing to file {path}: {str(e)}"
490
 
491
+ @tool
492
+ def list_directory(self, path: str = ".") -> str:
493
+ """Lists the contents (files and directories) of a directory at the specified path relative to the app's directory."""
494
+ print(f"--- Calling List Directory Tool at path: {path} ---")
495
+ try:
496
+ script_dir = os.path.dirname(os.path.realpath(__file__))
497
+ full_path = os.path.join(script_dir, path) # List relative to script dir
498
+ print(f"Listing directory: {full_path}")
499
+ if not os.path.isdir(full_path):
500
+ return f"Error: '{path}' is not a valid directory relative to the app."
501
+ files = os.listdir(full_path)
502
+ return "\n".join(files) if files else "Directory is empty."
503
+ except Exception as e:
504
+ return f"Error listing directory {path}: {str(e)}"
505
+
506
+ @tool
507
+ def audio_transcription_tool(self, file_path: str) -> str:
508
+ """
509
+ Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
510
+ Use this for questions involving audio file analysis.
511
+ """
512
+ print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
513
+ # Access the pipeline via self
514
+ if not self.asr_pipeline:
515
+ return "Error: Audio transcription pipeline is not available."
516
+ try:
517
+ # Try finding the file relative to the app directory first
518
+ script_dir = os.path.dirname(os.path.realpath(__file__))
519
+ full_path = os.path.join(script_dir, file_path)
520
+ print(f"Attempting to transcribe relative path: {full_path}")
521
+ if not os.path.exists(full_path):
522
+ # If not found, try the direct path
523
+ full_path = file_path
524
+ print(f"Attempting to transcribe direct path: {full_path}")
525
+ if not os.path.exists(full_path):
526
+ # Try basename for GAIA questions
527
+ base_path = os.path.basename(file_path)
528
+ print(f"Attempting to transcribe basename path in CWD: {os.path.join(os.getcwd(), base_path)}")
529
+ if os.path.exists(base_path): # Check relative to CWD
530
+ full_path = base_path
531
+ else:
532
+ try:
533
+ cwd_files = os.listdir(".")
534
+ except Exception:
535
+ cwd_files = ["Error listing CWD"]
536
+ try:
537
+ script_dir_files = os.listdir(script_dir)
538
+ except Exception:
539
+ script_dir_files = ["Error listing script dir"]
540
+ return (f"Error: Audio file not found.\n"
541
+ f"Tried relative path: '{os.path.join(script_dir, file_path)}'\n"
542
+ f"Tried direct path: '{file_path}'\n"
543
+ f"Tried basename in CWD: '{base_path}'\n"
544
+ f"Files in current dir (.): {cwd_files}\n"
545
+ f"Files in script dir ({script_dir}): {script_dir_files}")
546
+
547
+ print(f"Transcribing file: {full_path}")
548
+ # Important: Ensure the pipeline can handle the file path directly
549
+ transcription = self.asr_pipeline(full_path)
550
+ print("--- Transcription Complete ---")
551
+ # The output structure might vary slightly based on pipeline version
552
+ return transcription.get("text", "Error: Transcription failed to produce text.")
553
+ except Exception as e:
554
+ import traceback
555
+ print(f"Error during audio transcription: {e}")
556
+ traceback.print_exc()
557
+ return f"Error during audio transcription: {str(e)}"
558
+
559
+ @tool
560
+ def get_youtube_transcript(self, video_url: str) -> str:
561
+ """
562
+ Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
563
+ """
564
+ print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
565
+ try:
566
+ # Extract video ID from URL more robustly
567
+ video_id = None
568
+ if "watch?v=" in video_url:
569
+ video_id = video_url.split("v=")[1].split("&")[0]
570
+ elif "youtu.be/" in video_url:
571
+ video_id = video_url.split("youtu.be/")[1].split("?")[0]
572
+
573
+ if not video_id:
574
+ return f"Error: Could not extract video ID from URL: {video_url}"
575
+
576
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
577
+
578
+ # Combine all transcript parts into one string
579
+ full_transcript = " ".join([item["text"] for item in transcript_list])
580
+ print("--- Transcript Fetched ---")
581
+ # Return a limited amount to avoid overwhelming the context
582
+ return full_transcript[:8000]
583
+ except Exception as e:
584
+ return f"Error fetching YouTube transcript: {str(e)}"
585
+
586
+ @tool
587
+ def scrape_web_page(self, url: str) -> str:
588
+ """
589
+ Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
590
+ Use this when you need the full content of a webpage found via search.
591
+ """
592
+ print(f"--- Calling Web Scraper Tool for URL: {url} ---")
593
+ try:
594
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
595
+ response = requests.get(url, headers=headers, timeout=15) # Increased timeout
596
+ response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
597
+
598
+ # Check content type to avoid parsing non-HTML
599
+ if 'html' not in response.headers.get('Content-Type', '').lower():
600
+ return f"Error: URL {url} did not return HTML content."
601
+
602
+ soup = BeautifulSoup(response.text, 'html.parser')
603
+
604
+ # Remove common non-content tags
605
+ for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]):
606
+ tag.extract()
607
+
608
+ # Attempt to find the main content area (heuristics, may not always work)
609
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body
610
+ if not main_content:
611
+ main_content = soup # Fallback to the whole soup if no main area found
612
+
613
+ text = main_content.get_text(separator='\n', strip=True)
614
+
615
+ # Clean up excessive whitespace
616
+ lines = (line.strip() for line in text.splitlines())
617
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
618
+ text = '\n'.join(chunk for chunk in chunks if chunk)
619
+
620
+ print("--- Web Page Scraped ---")
621
+ # Limit context size
622
+ return text[:8000]
623
+
624
+ except requests.exceptions.RequestException as e:
625
+ return f"Error fetching web page {url}: {str(e)}"
626
+ except Exception as e:
627
+ return f"Error scraping web page {url}: {str(e)}"
628
+
629
+ # --- End of Tool Definitions ---
630
+
631
+
632
+ def __init__(self):
633
+ print("BasicAgent (LangGraph) initializing...")
634
+
635
+ # 1. Initialize ASR Pipeline *inside* init - DELAYED LOADING
636
+ self.asr_pipeline = None # Initialize as None first
637
+ try:
638
+ print("Loading ASR (Whisper) pipeline...")
639
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
640
+ print(f"Using device: {device} for ASR.")
641
+ self.asr_pipeline = pipeline(
642
+ "automatic-speech-recognition",
643
+ model="openai/whisper-base",
644
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
645
+ device=device
646
+ )
647
+ print("✅ ASR (Whisper) pipeline loaded successfully.")
648
+ except Exception as e:
649
+ print(f"⚠️ Warning: Could not load ASR pipeline. Audio tool will not work. Error: {e}")
650
+ import traceback
651
+ traceback.print_exc() # Print full traceback for ASR load error
652
+ self.asr_pipeline = None
653
+ # ====================================================
654
+
655
+ # 2. Get API Token from Space Secrets
656
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
657
+ if not HUGGINGFACEHUB_API_TOKEN:
658
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN secret is not set! Please add it to your Space secrets.")
659
+
660
+ # 3. Collect Tool Methods
661
+ self.tools = [
662
+ self.search_tool,
663
+ self.code_interpreter,
664
+ self.read_file,
665
+ self.write_file,
666
+ self.list_directory,
667
+ self.audio_transcription_tool,
668
+ self.get_youtube_transcript,
669
+ self.scrape_web_page
670
+ ]
671
+
672
+ # 4. Define the Improved System Prompt with Placeholders
673
+ tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
674
+ self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
675
+ Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
676
+
677
+ **CRITICAL INSTRUCTIONS:**
678
+ * **DO NOT** include conversational filler (e.g., "Sure, I can help...", "The answer is...", "Here is the information...").
679
+ * **DO NOT** explain your reasoning or the steps you took unless the question *explicitly* asks for it.
680
+ * **DO NOT** repeat the question in your final answer.
681
+ * **FINAL ANSWER FORMAT:** Your final response must contain *only* the answer itself.
682
+
683
+ You have access to the following tools to gather information and perform actions:
684
+ {tool_descriptions}
685
+
686
+ **TOOL USAGE PROTOCOL:**
687
+ * To use a tool, you MUST respond ONLY with a single JSON object formatted exactly like this:
688
+ ```json
689
+ {{
690
+ "tool": "tool_name",
691
+ "tool_input": {{ "arg_name1": "value1", "arg_name2": "value2", ... }}
692
+ }}
693
+ ```
694
+ * Replace `tool_name` with the exact name of the tool you want to use.
695
+ * Provide the required arguments within the `tool_input` dictionary. Ensure argument names and value types match the tool description precisely.
696
+ * Do not add any text before or after the JSON tool call block.
697
+
698
+ **REASONING PROCESS:**
699
+ 1. Carefully analyze the user's question to understand the specific information required and the expected answer format. Check if any files are attached (mentioned like `[Attached File: filename.ext]`).
700
+ 2. Break down the problem into logical steps.
701
+ 3. Determine if any tools are necessary. Use `read_file` for attached files, `audio_transcription_tool` for audio, `get_youtube_transcript` for YouTube URLs, `search_tool` for web info, `scrape_web_page` to read content from URLs found via search, and `code_interpreter` for calculations or data processing.
702
+ 4. If a tool is needed, call it using the specified JSON format. Wait for the tool's output.
703
+ 5. Analyze the tool's output. If the answer is found, proceed to step 7.
704
+ 6. If more information or steps are needed, use another tool (step 4) or continue reasoning based on the gathered information. Pay close attention to previous tool results.
705
+ 7. Once you have derived the final, definitive answer that meets the question's requirements, output **ONLY** that answer and nothing else. Stop the process.
706
+ """
707
+
708
+ # 5. Initialize the LLM (Using Mistral Instruct)
709
  print("Initializing LLM Endpoint...")
710
  llm = HuggingFaceEndpoint(
711
+ repo_id="mistralai/Mistral-7B-Instruct-v0.2", # Switched model
712
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
713
+ max_new_tokens=2048,
714
+ temperature=0.01,
 
715
  )
716
  chat_llm = ChatHuggingFace(llm=llm)
717
  print("✅ LLM Endpoint initialized.")
718
 
719
  # 6. Bind tools to the LLM
720
+ # We still bind tools, but we'll manually parse if it fails
721
  self.llm_with_tools = chat_llm.bind_tools(self.tools)
722
  print("✅ Tools bound to LLM.")
723
 
724
+ # 7. Define the Agent Node with Manual Tool Parsing
725
+ # ==================== NODE WITH PLACEHOLDER REGEX ====================
726
  def agent_node(state: AgentState):
727
  print("--- Running Agent Node ---")
728
+ messages_with_prompt = state["messages"]
729
+
730
+ # Invoke the LLM (which has tools bound)
731
+ ai_message: AIMessage = self.llm_with_tools.invoke(messages_with_prompt)
732
+ print(f"AI Message Raw Content: {ai_message.content}")
733
+
734
+ # --- Manual Tool Call Parsing Logic ---
735
+ tool_calls = []
736
+ # Check if bind_tools already populated tool_calls (ideal case)
737
+ if ai_message.tool_calls:
738
+ print(f"SUCCESS: bind_tools correctly parsed tool_calls: {ai_message.tool_calls}")
739
+ tool_calls = ai_message.tool_calls
740
+ # Fallback: Check if content contains likely JSON for tool calls
741
+ # Use regex to find JSON possibly wrapped in markdown
742
+ elif isinstance(ai_message.content, str):
743
+ print("Attempting manual JSON parsing from content...")
744
+ # --- THIS IS THE LINE WITH THE FIRST PLACEHOLDER ---
745
+ json_match = re.search(r"...") # Replace this line manually
746
+
747
+ if json_match:
748
+ # Extract the first valid group that contains JSON
749
+ json_str = json_match.group(1) or json_match.group(2)
750
+ if json_str:
751
+ try:
752
+ # Attempt to strip potential leading/trailing non-JSON chars if regex was too broad
753
+ json_str_cleaned = json_str.strip()
754
+ # Basic validation: starts with { or [ ends with } or ]
755
+ if (json_str_cleaned.startswith('{') and json_str_cleaned.endswith('}')) or \
756
+ (json_str_cleaned.startswith('[') and json_str_cleaned.endswith(']')):
757
+ data = json.loads(json_str_cleaned)
758
+ # Check structure for single tool call (dict)
759
+ if isinstance(data, dict) and "tool" in data and "tool_input" in data:
760
+ tool_name = data.get("tool")
761
+ tool_input = data.get("tool_input")
762
+ # Basic validation of tool name and input type
763
+ if isinstance(tool_name, str) and isinstance(tool_input, dict):
764
+ call_id = f"tool_{uuid.uuid4()}" # Generate unique ID
765
+ tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
766
+ print(f"Manually parsed Single Tool Call: ID={call_id}, Name={tool_name}, Args={tool_input}")
767
+ ai_message.content = "" # Clear content after successful parse
768
+ else:
769
+ print("Parsed JSON dict, but incorrect tool name type or tool_input is not a dict.")
770
+ # Check structure for multiple tool calls (if model outputs a list)
771
+ elif isinstance(data, list):
772
+ print("Attempting to parse list as multiple tool calls...")
773
+ parsed_list_ok = True
774
+ temp_tool_calls = []
775
+ for item in data:
776
+ if isinstance(item, dict) and "tool" in item and "tool_input" in item:
777
+ tool_name = item.get("tool")
778
+ tool_input = item.get("tool_input")
779
+ if isinstance(tool_name, str) and isinstance(tool_input, dict):
780
+ call_id = f"tool_{uuid.uuid4()}"
781
+ temp_tool_calls.append(ToolCall(name=tool_name, args=tool_input, id=call_id))
782
+ print(f"Manually parsed Multi-Tool Call item: ID={call_id}, Name={tool_name}, Args={tool_input}")
783
+ else:
784
+ parsed_list_ok = False
785
+ print("Parsed JSON list item, but incorrect tool name type or tool_input is not a dict.")
786
+ break
787
+ else:
788
+ parsed_list_ok = False
789
+ print("Parsed JSON list item, but not a valid tool call structure (missing 'tool' or 'tool_input').")
790
+ break
791
+ if parsed_list_ok and temp_tool_calls:
792
+ tool_calls.extend(temp_tool_calls)
793
+ ai_message.content = "" # Clear content if list successfully parsed
794
+ else:
795
+ print("Parsed JSON, but incorrect structure (neither dict with tool/tool_input nor list of such dicts).")
796
+ else:
797
+ print(f"Skipping manual parse: Cleaned JSON string ('{json_str_cleaned[:50]}...') does not start/end correctly with braces/brackets.")
798
+ except json.JSONDecodeError as e:
799
+ print(f"Manual JSON parsing failed: {e}. String was: '{json_str[:500]}...'") # Log the problematic string
800
+ except Exception as e:
801
+ print(f"Unexpected error during manual parsing: {e}")
802
+ import traceback
803
+ traceback.print_exc()
804
+ else:
805
+ print("Regex matched, but no JSON content found in capture groups.")
806
+ else:
807
+ print("No JSON block found in content for manual parsing.")
808
+ else:
809
+ print("AI Message content is not a string, skipping manual parse.")
810
+ # --- End Manual Parsing ---
811
 
812
+ # Attach manually parsed calls (if any) to the message
813
+ # This allows tools_condition to work correctly
814
+ if tool_calls and not ai_message.tool_calls:
815
+ ai_message.tool_calls = tool_calls
816
+ # Also clear invalid_tool_calls if we manually succeeded
817
+ ai_message.invalid_tool_calls = [] # Use empty list instead of None
818
+
819
+ # Log final interpretation
820
  if ai_message.tool_calls:
821
+ print(f"AI Message contains tool calls (after manual check): {ai_message.tool_calls}")
822
+ elif ai_message.invalid_tool_calls:
823
+ print(f"AI Message contains INVALID tool calls: {ai_message.invalid_tool_calls}")
 
824
  else:
825
+ print(f"AI Message Interpreted Content (no tool calls): {ai_message.pretty_repr()}")
826
 
827
  return {"messages": [ai_message]}
828
+ # =======================================================
829
 
830
  # 8. Define the Tool Node
 
831
  tool_node = ToolNode(self.tools)
832
 
833
  # 9. Create the Graph
 
838
  graph_builder.add_edge(START, "agent")
839
  graph_builder.add_conditional_edges(
840
  "agent",
841
+ tools_condition, # This condition checks ai_message.tool_calls
842
  {
843
+ "tools": "tools",
844
+ "__end__": "__end__",
845
  },
846
  )
847
+ graph_builder.add_edge("tools", "agent")
848
 
849
  # 10. Compile the graph and store it
850
  self.graph = graph_builder.compile()
 
870
 
871
  # Keep track of the latest AI response that isn't a tool call
872
  if isinstance(last_message, AIMessage):
873
+ # Check if it has tool calls or invalid tool calls
874
+ has_calls = bool(last_message.tool_calls or last_message.invalid_tool_calls)
875
+ if not has_calls: # Only consider it final if no calls were attempted
876
+ # Ensure content is a string and not empty before assigning
877
+ if isinstance(last_message.content, str) and last_message.content.strip():
878
  print(f"Potential Final AI Response: {last_message.content[:500]}...")
879
  final_answer_content = last_message.content
880
+ # If content is empty after manual parsing cleared it, don't overwrite a previous potential answer
881
+ elif not isinstance(last_message.content, str) or not last_message.content.strip():
882
+ print("AI Message has no tool calls and empty/non-string content.")
883
  else:
884
+ print(f"Non-string AI message content without tool calls: {last_message.content}")
885
 
886
  elif isinstance(last_message, ToolMessage):
887
  print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
888
+ # After a tool result, the next AI message might be the final one,
889
+ # so don't necessarily clear final_answer_content here. Let the loop find the *last* non-tool-call AI message.
890
 
891
  # --- Add the cleaning step ---
892
  cleaned_answer = final_answer_content.strip()
893
 
894
+ # More aggressive cleaning (optional, use with caution):
895
+ # Try to remove common conversational prefixes if they slipped through
896
  prefixes_to_remove = [
897
  "The answer is:", "Here is the answer:", "Based on the information:",
898
  "Final Answer:", "Answer:"
899
  ]
900
+ # More thorough prefix removal
901
+ original_cleaned = cleaned_answer
902
  for prefix in prefixes_to_remove:
 
903
  if cleaned_answer.lower().startswith(prefix.lower()):
904
+ # Find where the actual answer starts after the prefix
905
+ potential_answer = cleaned_answer[len(prefix):].strip()
906
+ if potential_answer: # Only strip if there's content after the prefix
907
+ cleaned_answer = potential_answer
908
+ break # Stop after removing the first found prefix
909
+ # If nothing was stripped but prefixes exist, log it
910
+ if cleaned_answer == original_cleaned and any(cleaned_answer.lower().startswith(p.lower()) for p in prefixes_to_remove):
911
+ print(f"Warning: Prefix found but not stripped (maybe answer was empty after prefix?): '{original_cleaned[:100]}...'")
912
+
913
+
914
+ # Remove potential markdown code blocks only if the answer isn't expected to be code
915
+ # More robust check for code-like content
916
+ looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
917
  if not looks_like_code:
918
+ # --- THIS IS THE LINE WITH THE SECOND PLACEHOLDER ---
919
+ cleaned_answer = [[[REGEX_PLACEHOLDER_SUB]]] # Replace this line manually
920
+ # Remove single backticks if they surround the whole answer
921
+ if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
 
 
 
922
  cleaned_answer = cleaned_answer[1:-1].strip()
923
 
924
+ print(f"Agent returning final answer (cleaned): '{cleaned_answer}'") # Add quotes for clarity
925
+ if not cleaned_answer and final_answer_content:
926
+ # If cleaning resulted in empty but original wasn't, return original
927
  print("Warning: Agent produced an empty final answer after cleaning. Falling back to raw answer.")
928
+ return final_answer_content.strip() # Fallback if cleaning removed everything
929
 
930
+ # Handle case where agent legitimately produces no answer (e.g., error during loop)
931
+ return cleaned_answer if cleaned_answer else "AGENT FAILED TO PRODUCE ANSWER"
932
 
933
  except Exception as e:
934
  print(f"Error running agent graph: {e}")
 
938
 
939
 
940
  # --- (Original Template Code Starts Here - NO CHANGES NEEDED BELOW THIS LINE) ---
 
 
 
941
 
942
  def run_and_submit_all( profile: gr.OAuthProfile | None):
943
  """
944
  Fetches all questions, runs the BasicAgent on them, submits all answers,
945
  and displays the results.
946
  """
947
+ space_id = os.getenv("SPACE_ID")
 
948
  if profile:
949
  username= f"{profile.username}"
950
  print(f"User logged in: {username}")
 
956
  questions_url = f"{api_url}/questions"
957
  submit_url = f"{api_url}/submit"
958
 
959
+ print("Instantiating agent...")
 
960
  try:
961
  agent = BasicAgent()
 
962
  if agent.asr_pipeline is None:
963
  print("⚠️ ASR Pipeline failed to load during agent init. Audio questions will likely fail.")
964
 
 
967
  import traceback
968
  traceback.print_exc() # Print full traceback for init errors
969
  return f"Error initializing agent: {e}", None
970
+ print("Agent instantiated successfully.")
971
 
972
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
973
  print(f"Agent code URL: {agent_code}")
974
 
 
975
  print(f"Fetching questions from: {questions_url}")
976
  try:
977
+ response = requests.get(questions_url, timeout=30)
978
  response.raise_for_status()
979
  questions_data = response.json()
980
  if not questions_data:
 
992
  print(f"An unexpected error occurred fetching questions: {e}")
993
  return f"An unexpected error occurred fetching questions: {e}", None
994
 
 
995
  results_log = []
996
  answers_payload = []
997
  total_questions = len(questions_data)
 
1012
 
1013
  print(f"\n--- Running Task {i+1}/{len(questions_to_run)} (ID: {task_id}) ---")
1014
  try:
 
 
1015
  file_path = item.get("file_path")
1016
  if file_path:
1017
+ # Check existence relative to script dir first, then CWD
1018
+ script_dir = os.path.dirname(os.path.realpath(__file__))
1019
+ potential_script_path = os.path.join(script_dir, file_path)
1020
+ potential_cwd_path = os.path.join(os.getcwd(), file_path) # Check CWD too
1021
+
1022
+ if os.path.exists(potential_script_path):
1023
+ file_context = f"[Attached File (exists): {file_path}]" # Path relative to script is good enough for agent
1024
+ elif os.path.exists(potential_cwd_path):
1025
+ file_context = f"[Attached File (exists in cwd): {file_path}]" # Path relative to cwd
1026
  else:
1027
+ file_context = f"[Attached File (path provided): {file_path}]" # Agent needs to handle finding it
 
 
 
 
1028
 
1029
  question_text_with_context = f"{question_text}\n\n{file_context}"
1030
  print(f"Question includes file reference: {file_path}")
 
1108
 
1109
  # --- Build Gradio Interface using Blocks ---
1110
  with gr.Blocks() as demo:
1111
+ gr.Markdown("# GAIA Agent Evaluation Runner (LangGraph + Mistral)") # Updated title
1112
  gr.Markdown(
1113
  """
1114
  **Instructions:**
 
1117
  ---
1118
  **Notes:**
1119
  * The full evaluation can take **several hours**. Use the logs tab to monitor progress.
1120
+ * This agent uses `mistralai/Mistral-7B-Instruct-v0.2` and multiple tools.
1121
  * Make sure your `HUGGINGFACEHUB_API_TOKEN` secret is set correctly in Settings.
1122
  """
1123
  )
 
1136
 
1137
  # Check for SPACE_HOST and SPACE_ID at startup for information
1138
  space_host_startup = os.getenv("SPACE_HOST")
1139
+ space_id_startup = os.getenv("SPACE_ID")
1140
 
1141
  if space_host_startup:
1142
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
1144
  else:
1145
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
1146
 
1147
+ if space_id_startup:
1148
  print(f"✅ SPACE_ID found: {space_id_startup}")
1149
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
1150
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
1151
  else:
1152
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
1153
 
1154
  # Add detailed path info for debugging file access
1155
+ print(f"Script directory (__file__): {os.path.dirname(os.path.realpath(__file__))}")
1156
  print(f"Current working directory (os.getcwd()): {os.getcwd()}")
1157
+ # List files only if the directory exists
1158
+ try:
1159
+ print("Files in current working directory:", os.listdir("."))
1160
+ except FileNotFoundError:
1161
+ print("Warning: Could not list current working directory.")
1162
 
1163
 
1164
  print("-"*(60 + len(" App Starting ")) + "\n")
1165
  print("Launching Gradio Interface for GAIA Agent Evaluation...")
1166
  # Set queue=True to handle multiple clicks better, though only one run should happen at a time.
1167
  demo.queue().launch(debug=True, share=False)