Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Oct 27, 2025

Commit

c72322b

verified ·

1 Parent(s): afbd919

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -250

app.py CHANGED Viewed

@@ -22,8 +22,8 @@ from langgraph.graph.message import add_messages
 # Make sure to import ToolCall
 from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
 from langgraph.prebuilt import ToolNode
-from langgraph.graph import START, StateGraph
-from langgraph.prebuilt import tools_condition
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool, BaseTool
 # --- ADD GROQ IMPORT ---
@@ -34,26 +34,23 @@ from langchain_groq import ChatGroq
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
 # --- Initialize ASR Pipeline (Moved back to Global Scope) ---
-# Load the model once when the app starts for efficiency
-asr_pipeline = None # Initialize as None first
 try:
     print("Loading ASR (Whisper) pipeline globally...")
-    # Decide device based on availability, default to CPU if unsure
-    device = 0 if torch.cuda.is_available() else -1 # device=0 for GPU, -1 for CPU
     device_name = "cuda:0" if device == 0 else "cpu"
     print(f"Attempting to use device: {device_name} for ASR.")
     asr_pipeline = pipeline(
         "automatic-speech-recognition",
         model="openai/whisper-base",
-        # Use float16 only if CUDA is definitely available and working
         torch_dtype=torch.float16 if device == 0 else torch.float32,
-        device=device # Pass device index or -1
     )
     print("✅ ASR (Whisper) pipeline loaded successfully.")
 except Exception as e:
-    print(f"⚠️ Warning: Could not load ASR pipeline globally. Audio tool will not work. Error: {e}")
     import traceback
-    traceback.print_exc() # Print full traceback for ASR load error
     asr_pipeline = None
 # ====================================================
@@ -81,50 +78,33 @@ def code_interpreter(code: str) -> str:
     print(f"--- Calling Code Interpreter with code:\n{code}\n---")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
     try:
-        # Use contextlib to redirect stdout and stderr
         with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
-            # Execute the code. Provide 'pd' (pandas) in the globals
             exec(code, {"pd": pd}, {})
-        stdout = output_stream.getvalue()
-        stderr = error_stream.getvalue()
-        if stderr:
-            return f"Error: {stderr}\nStdout: {stdout}"
-        if stdout:
-             return f"Success:\n{stdout}"
         return "Success: Code executed without error and produced no stdout."
-    except Exception as e:
-        # Capture any exception during exec
-        return f"Execution failed with error: {str(e)}"
 @tool
 def read_file(path: str) -> str:
     """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
     print(f"--- Calling Read File Tool at path: {path} ---")
     try:
-        # Use getcwd() as the primary base for relative paths in Spaces
-        script_dir = os.getcwd() # Changed from __file__ for broader compatibility
         print(f"Base directory for reading: {script_dir}")
         full_path = os.path.join(script_dir, path)
         print(f"Attempting to read relative path: {full_path}")
         if not os.path.exists(full_path):
-             # If not found, try the direct path (might be absolute)
              full_path = path
              print(f"Attempting to read direct/absolute path: {full_path}")
              if not os.path.exists(full_path):
-                  # Try basename in CWD as last resort (GAIA might just give filename)
                   base_path = os.path.basename(path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
                   print(f"Attempting to read basename path in CWD: {cwd_base_path}")
-                  if os.path.exists(cwd_base_path):
-                       full_path = cwd_base_path
                   else:
-                      # List files for debugging
                       try: cwd_files = os.listdir(".")
                       except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
                       return (f"Error: File not found.\n"
@@ -132,29 +112,22 @@ def read_file(path: str) -> str:
                               f"Tried direct/absolute: '{path}'\n"
                               f"Tried basename in CWD: '{cwd_base_path}'\n"
                               f"Files in CWD (.): {cwd_files}")
         print(f"Reading file: {full_path}")
-        with open(full_path, 'r', encoding='utf-8') as f:
-            return f.read()
-    except Exception as e:
-        return f"Error reading file {path}: {str(e)}"
 @tool
 def write_file(path: str, content: str) -> str:
     """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
     print(f"--- Calling Write File Tool at path: {path} ---")
     try:
-        # Ensure the directory exists relative to CWD
         base_dir = os.getcwd()
         full_path = os.path.join(base_dir, path)
         print(f"Writing file to: {full_path}")
         os.makedirs(os.path.dirname(full_path), exist_ok=True)
-        with open(full_path, 'w', encoding='utf-8') as f:
-            f.write(content)
         return f"Successfully wrote to file {path} (relative to CWD)."
-    except Exception as e:
-        return f"Error writing to file {path}: {str(e)}"
 @tool
 def list_directory(path: str = ".") -> str:
@@ -164,115 +137,85 @@ def list_directory(path: str = ".") -> str:
         base_dir = os.getcwd()
         full_path = os.path.join(base_dir, path)
         print(f"Listing directory: {full_path}")
-        if not os.path.isdir(full_path):
-             return f"Error: '{path}' is not a valid directory relative to CWD."
-        files = os.listdir(full_path)
-        return "\n".join(files) if files else "Directory is empty."
-    except Exception as e:
-        return f"Error listing directory {path}: {str(e)}"
 @tool
 def audio_transcription_tool(file_path: str) -> str:
-    """
-    Transcribes an audio file (like .mp3 or .wav) using Whisper and returns the text content.
-    Use this for questions involving audio file analysis.
-    """
-    print(f"--- Calling Audio Transcription Tool at path: {file_path} ---")
-    # Access the globally loaded pipeline
-    if asr_pipeline is None:
-        return "Error: Audio transcription pipeline is not available or failed to load."
     try:
-        # Use the same path resolution logic as read_file
-        script_dir = os.getcwd() # Base directory
         full_path = os.path.join(script_dir, file_path)
-        print(f"Attempting to transcribe relative path: {full_path}")
         if not os.path.exists(full_path):
-             full_path = file_path # Try direct/absolute
-             print(f"Attempting to transcribe direct/absolute path: {full_path}")
              if not os.path.exists(full_path):
                   base_path = os.path.basename(file_path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
-                  print(f"Attempting to transcribe basename path in CWD: {cwd_base_path}")
-                  if os.path.exists(cwd_base_path):
-                       full_path = cwd_base_path
-                  else:
-                      try: cwd_files = os.listdir(".")
-                      except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
-                      return (f"Error: Audio file not found.\n"
-                              f"Tried relative: '{os.path.join(script_dir, file_path)}'\n"
-                              f"Tried direct/absolute: '{file_path}'\n"
-                              f"Tried basename in CWD: '{cwd_base_path}'\n"
-                              f"Files in CWD (.): {cwd_files}")
-        print(f"Transcribing file: {full_path}")
         transcription = asr_pipeline(full_path)
-        print("--- Transcription Complete ---")
         return transcription.get("text", "Error: Transcription failed.")
-    except Exception as e:
-        import traceback; traceback.print_exc()
-        return f"Error during audio transcription: {str(e)}"
 @tool
 def get_youtube_transcript(video_url: str) -> str:
-    """
-    Fetches the transcript for a given YouTube video URL. Use this for questions about YouTube video content.
-    """
-    print(f"--- Calling YouTube Transcript Tool for URL: {video_url} ---")
     try:
         video_id = None
         if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
-        if not video_id: return f"Error: Could not extract video ID from URL: {video_url}"
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         full_transcript = " ".join([item["text"] for item in transcript_list])
-        print("--- Transcript Fetched ---")
-        return full_transcript[:8000] # Limit context
-    except Exception as e: return f"Error fetching YouTube transcript: {str(e)}"
 @tool
 def scrape_web_page(url: str) -> str:
-    """
-    Fetches the primary text content of a given web page URL, removing navigation, footer, scripts, and styles.
-    Use this when you need the full content of a webpage found via search.
-    """
-    print(f"--- Calling Web Scraper Tool for URL: {url} ---")
     try:
         headers = {'User-Agent': 'Mozilla/5.0'}
-        response = requests.get(url, headers=headers, timeout=15)
-        response.raise_for_status()
-        if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: URL {url} did not return HTML."
         soup = BeautifulSoup(response.text, 'html.parser')
         for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
         main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
         text = main_content.get_text(separator='\n', strip=True)
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
         text = '\n'.join(chunk for chunk in chunks if chunk)
-        print("--- Web Page Scraped ---")
-        return text[:8000] # Limit context
-    except requests.exceptions.RequestException as e: return f"Error fetching web page {url}: {str(e)}"
-    except Exception as e: return f"Error scraping web page {url}: {str(e)}"
 # --- Helper Function for Cleaning Fences ---
-# +++++++++++++++++++ ADDED FUNCTION DEFINITION +++++++++++++++++++
 def remove_fences_simple(text):
     """Removes triple backtick fences and optional language identifiers."""
-    original_text = text # Keep original for comparison
-    text = text.strip() # Remove leading/trailing whitespace
     if text.startswith("```") and text.endswith("```"):
-        text = text[3:-3].strip() # Remove the fences and any inner whitespace
-        # Attempt to remove language identifier if present
         if '\n' in text:
             first_line, rest = text.split('\n', 1)
-            # Simple check: is the first line short and likely a language tag?
             if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
                 text = rest.strip()
         return text
-    return original_text # Return original if no fences found
-# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-# --- End of Tool Definitions ---
 # List of standalone tool functions
 defined_tools = [
@@ -283,13 +226,29 @@ defined_tools = [
     list_directory,
     audio_transcription_tool,
     get_youtube_transcript,
-    scrape_web_page
 ]
 # --- LangGraph Agent State ---
 class AgentState(TypedDict):
     messages: Annotated[list[AnyMessage], add_messages]
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
@@ -297,64 +256,73 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent (LangGraph) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
         HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not HUGGINGFACEHUB_API_TOKEN: print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
         self.tools = defined_tools
-        tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in self.tools])
         # ==================== MODIFIED SYSTEM PROMPT ====================
         self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
-Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question, exactly matching the format required by the benchmark (e.g., a name, a number, a specific string format, a comma-separated list).
 **CRITICAL INSTRUCTIONS:**
-* **DO NOT** include conversational filler (e.g., "Sure, I can help...", "The answer is...", "Here is the information...").
-* **DO NOT** explain your reasoning or the steps you took unless the question *explicitly* asks for it.
-* **DO NOT** repeat the question in your final answer.
-* **FINAL ANSWER FORMAT:** Your final response must contain *only* the answer itself, with no extra text or formatting like markdown code blocks unless the answer itself is code.
 You have access to the following tools:
 {tool_descriptions}
 **TOOL USAGE PROTOCOL:**
-* To use a tool, you MUST respond ONLY with a single JSON object formatted exactly like this:
-    ```json
     {{
       "tool": "tool_name",
       "tool_input": {{ "arg_name1": "value1", ... }}
     }}
-    ```
 * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
-* No text before or after the JSON block.
 **REASONING PROCESS & STOPPING CONDITION:**
-1. Analyze question for required info and format. Check for `[Attached File: ...]`.
-2. Break down problem into steps.
-3. Determine if tools are needed.
-4. If needed, call tool using JSON format. Wait for output.
-5. Analyze tool output.
-6. **DECISION POINT:**
-    a. **If the tool output directly contains the final answer in the correct format:** Your *next* response MUST be ONLY that answer. **DO NOT** call any more tools. **DO NOT** add explanations. Just output the answer.
-    b. **If more steps or tools are needed:** Continue reasoning, potentially go back to step 4.
-7. **FINAL OUTPUT:** Once the definitive answer is derived (either directly from reasoning or after processing tool results), output **ONLY** that answer and nothing else. Stop the process immediately after outputting the answer. Do not attempt to verify it with tools unless the verification itself *is* the task.
 """
         # =============================================================
-        # Initialize LLM (Using Groq)
-        print("Initializing Groq LLM...")
         try:
-            chat_llm = ChatGroq(temperature=0.01, groq_api_key=GROQ_API_KEY, model_name="llama-3.1-8b-instant")
-            print("✅ Groq LLM initialized.")
         except Exception as e: print(f"Error initializing Groq: {e}"); raise
         self.llm_with_tools = chat_llm.bind_tools(self.tools)
-        print("✅ Tools bound to LLM.")
-        # Define Agent Node
         def agent_node(state: AgentState):
-            # ... (agent_node implementation remains the same) ...
             print("--- Running Agent Node ---")
             ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
             print(f"AI Message Raw Content: {ai_message.content}")
@@ -363,39 +331,53 @@ You have access to the following tools:
             else: print(f"AI content (no calls): {ai_message.pretty_repr()}")
             return {"messages": [ai_message]}
         tool_node = ToolNode(self.tools)
-        # Create Graph
         print("Building agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
         graph_builder.add_node("tools", tool_node)
         graph_builder.add_edge(START, "agent")
-        graph_builder.add_conditional_edges("agent", tools_condition, {"tools": "tools", "__end__": "__end__"})
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
         print("✅ Graph compiled.")
     def __call__(self, question: str) -> str:
-        # ... (__call__ implementation remains the same, including cleaning) ...
-        print(f"\n--- Starting Agent Run ---")
-        print(f"Question (100 chars): {question[:100]}...")
-        graph_input = {"messages": [SystemMessage(content=self.system_prompt), HumanMessage(content=question)]}
-        final_answer_content = ""
         try:
             for event in self.graph.stream(graph_input, stream_mode="values", config={"recursion_limit": 25}):
                 last_message = event["messages"][-1]
-                if isinstance(last_message, AIMessage):
-                    has_calls = bool(last_message.tool_calls or last_message.invalid_tool_calls)
-                    if not has_calls and isinstance(last_message.content, str) and last_message.content.strip():
-                        print(f"Potential Final Response: {last_message.content[:500]}...")
-                        final_answer_content = last_message.content
-                    elif not has_calls: print("AI Message no calls, empty/non-string content.")
                 elif isinstance(last_message, ToolMessage):
                      print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
-            cleaned_answer = final_answer_content.strip()
             prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
             original_cleaned = cleaned_answer
             for prefix in prefixes_to_remove:
@@ -406,26 +388,25 @@ You have access to the following tools:
                  print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
             looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
             if not looks_like_code:
                  cleaned_answer = remove_fences_simple(cleaned_answer) # Use the helper function
                  if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
                       cleaned_answer = cleaned_answer[1:-1].strip()
             print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
-            if not cleaned_answer and final_answer_content:
-                 print("Warning: Cleaned answer empty, falling back to raw.")
-                 return final_answer_content.strip()
-            return cleaned_answer if cleaned_answer else "AGENT FAILED TO PRODUCE ANSWER"
         except Exception as e:
             print(f"Error running agent graph: {e}")
             import traceback; traceback.print_exc()
             return f"AGENT GRAPH ERROR: {e}"
 # --- (Original Template Code - Mock Questions Version) ---
-# ... (run_and_submit_all function remains the same) ...
-# ... (Gradio UI remains the same) ...
-# ... (__main__ block remains the same) ...
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """ MOCK RUN: Runs agent on mock Qs, displays results. DOES NOT SUBMIT. """
@@ -433,100 +414,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     username = profile.username if profile else "local_test_user"
     print(f"User: {username}{'' if profile else ' (dummy)'}")
-    submit_url = f"{DEFAULT_API_URL}/submit" # Keep for context
     print("Instantiating agent...")
     try:
         agent = BasicAgent()
         if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
     except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
-    print("Agent instantiated.")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
     print(f"Agent code URL: {agent_code}")
     print("--- USING MOCK QUESTIONS ---")
     mock_questions_data = [
-    {
-        "task_id": "mock_level1_001",
-        "question": r"""Here's a fun riddle that I'd like you to try.\n\nAn adventurer exploring an ancient tomb came across a horde of gold coins, all neatly stacked in columns. As he reached to scoop them into his backpack, a mysterious voice filled the room. \"You have fallen for my trap adventurer,\" the voice began, and suddenly the doorway to the chamber was sealed by a heavy rolling disk of stone. The adventurer tried to move the stone disk but was unable to budge the heavy stone. Trapped, he was startled when the voice again spoke. \n\n\"If you solve my riddle, I will reward you with a portion of my riches, but if you are not clever, you will never leave this treasure chamber. Before you are 200 gold coins. I pose a challenge to you, adventurer. Within these stacks of coins, all but 30 are face-up. You must divide the coins into two piles, one is yours, and one is mine. You may place as many coins as you like in either pile. You may flip any coins over, but you may not balance any coins on their edges. For every face-down coin in your pile, you will be rewarded with two gold coins. But be warned, if both piles do not contain the same number of face-down coins, the door will remain sealed for all eternity!\"\n\nThe adventurer smiled, as this would be an easy task. All he had to do was flip over every coin so it was face down, and he would win the entire treasure! As he moved to the columns of coins, however, the light suddenly faded, and he was left in total darkness. The adventurer reached forward and picked up one of the coins, and was shocked when he realized that both sides felt almost the same. Without the light, he was unable to determine which side of the coin was heads and which side was tails. He carefully replaced the coin in its original orientation and tried to think of a way to solve the puzzle. Finally, out of desperation, the adventurer removed 30 coins to create his pile. He then carefully flipped over each coin in his pile, so its orientation was inverted from its original state.\n\n\"I've finished,\" he said, and the lights returned. Looking at the two piles, he noticed that the larger pile contained 14 face-down coins.\n\nWhat was the outcome for the adventurer? If he failed the challenge, please respond with \"The adventurer died.\" Otherwise, please provide the number of coins the adventurer won at the conclusion of the riddle. If the adventurer won any coins, provide your response as the number of coins, with no other text."""
-    },
-    {
-        "task_id": "mock_level1_002",
-        "question": r"""If you use some of the letters in the given Letter Bank to spell out the sentence "I am a penguin halfway to the moon", which of the remaining unused letters would have to be changed to spell out, "The moon is made of cheese"? Return a comma-separated alphabetized list.\nLetter Bank: {OAMFETIMPECRFSHTDNIWANEPNOFAAIYOOMGUTNAHHLNEHCME}"""
-    },
-    {
-        "task_id": "mock_level1_003",
-        "question": r"""A data annotator stayed up too late creating test questions to check that a system was working properly and submitted several questions with mathematical errors. On nights when they created 15 test questions, they made 1 error. On nights when they created fewer than 15 questions, they also corrected 3 errors. On nights they created 20 questions, they made 0 errors. On nights when they created 25 or more, they made 4 errors. Over the course of five nights, the worker produced a total of 6 errors. When asked how many nights they created 15 questions, they gave three possible numbers as responses. What are the three numbers, presented in the format x, y, z in ascending order?"""
-    },
-    {
-        "task_id": "mock_level1_004",
-        "question": r"""Please solve the following crossword:\n\n|1|2|3|4|5|\n|6| | | | |\n|7| | | | |\n|8| | | | |\n|X|9| | | |\n\nI have indicated by numbers where the hints start, so you should replace numbers and spaces by the answers.\nAnd X denotes a black square that isn\u2019t to fill.\n\nACROSS\n- 1 Wooden strips on a bed frame\n- 6 _ Minhaj, Peabody-winning comedian for "Patriot Act"\n- 7 Japanese city of 2.6+ million\n- 8 Stopwatch, e.g.\n- 9 Pain in the neck\n\nDOWN\n- 1 Quick drink of whiskey\n- 2 Eye procedure\n- 3 "Same here," in a three-word phrase\n- 4 Already occupied, as a seat\n- 5 Sarcastically critical commentary. Answer by concatenating the characters you choose to fill the crossword, in row-major order."""
-    },
-    {
-        "task_id": "mock_level1_005",
-        "question": r"""I wanted to make another batch of cherry melomel. I remember liking the last recipe I tried, but I can't remember it off the top of my head. It was from the Reddit, r/mead. I remember that the user who made it had a really distinct name, I think it was StormBeforeDawn. Could you please look up the recipe for me? I'm not sure if it has been changed, so please make sure that the recipe you review wasn't updated after July 14, 2022. That's the last time I tried the recipe.\n\nWhat I want to know is how many cherries I'm supposed to use. I'm making a 10-gallon batch in two 5-gallon carboys. Please just respond with the integer number of pounds of whole cherries with pits that are supposed to be used for a 10-gallon batch."""
-    },
-    {
-        "task_id": "mock_level1_006",
-        "question": r"""Verify each of the following ISBN 13 numbers:\n\n1. 9783518188156\n2. 9788476540746\n3. 9788415091004\n4. 9788256014590\n5. 9782046407331\n\nIf any are invalid, correct them by changing the final digit. Then, return the list, comma separated, in the same order as in the question."""
-    },
-    {
-        "task_id": "mock_level1_007",
-        "question": r"""A porterhouse by any other name is centered around a letter. What does Three Dog Night think about the first natural number that starts with that letter? Give the first line from the lyrics that references it."""
-    },
-    {
-        "task_id": "mock_level1_008",
-        "question": r"""Bob has genome type Aa, and Linda has genome type Aa. Assuming that a child of theirs also has a child with someone who also has genome type Aa, what is the probability that Bob and Linda's grandchild will have Genome type Aa? Write the answer as a percentage, rounding to the nearest integer if necessary."""
-    },
-    {
-        "task_id": "mock_level1_009",
-        "question": r"""An array of candy is set out to choose from including gumballs, candy corn, gumdrops, banana taffy, chocolate chips, and gummy bears. There is one bag of each type of candy. The gumballs come in red, orange, yellow, green, blue, and brown. The candy corn is yellow, white, and orange. The gumdrops are red, green, purple, yellow, and orange. The banana taffy is yellow. The chocolate chips are brown and white. The gummy bears are red, green, yellow, and orange. Five people pass through and each selects one bag. The first selects one with only primary colors. The second selects one with no primary colors. The third selects one with all the primary colors. The fourth selects one that has neither the most nor the least colors of the remaining bags. The fifth selects the one with their favorite color, green. A second bag of the candy the first person chose is added to the remaining bag of candy. Which two candies are in the remaining bag after the addition? Give me them in a comma separated list, in alphabetical order"""
-    },
-    {
-        "task_id": "mock_level1_010",
-        "question": r"""In the year 2020, where were koi fish found in the watershed with the id 02040203? Give only the name of the pond, lake, or stream where the fish were found, and not the name of the city or county."""
-    },
-    {
-        "task_id": "mock_level1_011",
-        "question": r"""In Sonia Sanchez\u2019s poem \u201cfather\u2019s voice\u201d, what primary colour is evoked by the imagery in the beginning of the tenth stanza? Answer with a capitalized word."""
-    },
-    {
-        "task_id": "mock_level1_012",
-        "question": r"""According to Papers with Code, what was the name of the first model to go beyond 70% of accuracy on ImageNet ?"""
-    },
-    {
-        "task_id": "mock_level1_013",
-        "question": r"""What is the dimension of the boundary of the tame twindragon rounded to two decimal places?"""
-    },
-    {
-        "task_id": "mock_level1_014",
-        "question": r"""In what year was the home village of the subject of British Museum item #Bb,11.118 founded?"""
-    },
-    {
-        "task_id": "mock_level1_015",
-        "question": r"""What is the ISSN of the journal that included G. Scott's potato article that mentioned both a fast food restaurant and a Chinese politician in the title in a 2012 issue?"""
-    },
-    {
-        "task_id": "mock_level1_016",
-        "question": r"""VNV Nation has a song that shares its title with the nickname of Louis XV. What album was it released with?"""
-    },
-    {
-        "task_id": "mock_level1_017",
-        "question": r"""If I combine a Beatle's first name and a type of beer, in what category and year of Nobel Prize do I have a winner? Answer using the format CATEGORY, YEAR."""
-    },
-    {
-        "task_id": "mock_level1_018",
-        "question": r"""In the version of NumPy where the numpy.msort function was deprecated, which attribute was added to the numpy.polynomial package's polynomial classes?"""
-    },
-    {
-        "task_id": "mock_level1_019",
-        "question": r"""A word meaning dramatic or theatrical forms a species of duck when appended with two letters and then duplicated. What is that word?"""
-    },
-    {
-        "task_id": "mock_level1_020",
-        "question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
-    },
         # {"task_id": "mock_audio_001", "question": "Transcribe 'sample.mp3'", "file_path": "sample.mp3"}, # Needs sample.mp3
     ]
     questions_data = mock_questions_data
@@ -572,6 +475,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
     return final_status, results_df
 # --- Build Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
@@ -587,17 +491,18 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Print env info...
-    space_host = os.getenv("SPACE_HOST"); space_id = os.getenv("SPACE_ID")
-    if space_host: print(f"✅ SPACE_HOST: {space_host}")
     else: print("ℹ️ No SPACE_HOST (local?).")
-    if space_id: print(f"✅ SPACE_ID: {space_id}")
     else: print("ℹ️ No SPACE_ID (local?).")
     print(f"CWD: {os.getcwd()}")
     try: print("Files in CWD:", os.listdir("."))
-    except Exception as e: print(f"Warning: Error listing CWD: {e}")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface...")
     demo.queue().launch(debug=True, share=False)

 # Make sure to import ToolCall
 from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage, ToolCall
 from langgraph.prebuilt import ToolNode
+from langgraph.graph import START, END, StateGraph
+# Removed tools_condition, we'll use a custom one
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool, BaseTool
 # --- ADD GROQ IMPORT ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # This URL is currently not working
 # --- Initialize ASR Pipeline (Moved back to Global Scope) ---
+asr_pipeline = None
 try:
     print("Loading ASR (Whisper) pipeline globally...")
+    device = 0 if torch.cuda.is_available() else -1
     device_name = "cuda:0" if device == 0 else "cpu"
     print(f"Attempting to use device: {device_name} for ASR.")
     asr_pipeline = pipeline(
         "automatic-speech-recognition",
         model="openai/whisper-base",
         torch_dtype=torch.float16 if device == 0 else torch.float32,
+        device=device
     )
     print("✅ ASR (Whisper) pipeline loaded successfully.")
 except Exception as e:
+    print(f"⚠️ Warning: Could not load ASR pipeline globally. Error: {e}")
     import traceback
+    traceback.print_exc()
     asr_pipeline = None
 # ====================================================
     print(f"--- Calling Code Interpreter with code:\n{code}\n---")
     output_stream = io.StringIO()
     error_stream = io.StringIO()
     try:
         with contextlib.redirect_stdout(output_stream), contextlib.redirect_stderr(error_stream):
             exec(code, {"pd": pd}, {})
+        stdout = output_stream.getvalue(); stderr = error_stream.getvalue()
+        if stderr: return f"Error: {stderr}\nStdout: {stdout}"
+        if stdout: return f"Success:\n{stdout}"
         return "Success: Code executed without error and produced no stdout."
+    except Exception as e: return f"Execution failed with error: {str(e)}"
 @tool
 def read_file(path: str) -> str:
     """Reads the content of a file at the specified path. Use this to examine files provided in the question."""
     print(f"--- Calling Read File Tool at path: {path} ---")
     try:
+        script_dir = os.getcwd()
         print(f"Base directory for reading: {script_dir}")
         full_path = os.path.join(script_dir, path)
         print(f"Attempting to read relative path: {full_path}")
         if not os.path.exists(full_path):
              full_path = path
              print(f"Attempting to read direct/absolute path: {full_path}")
              if not os.path.exists(full_path):
                   base_path = os.path.basename(path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
                   print(f"Attempting to read basename path in CWD: {cwd_base_path}")
+                  if os.path.exists(cwd_base_path): full_path = cwd_base_path
                   else:
                       try: cwd_files = os.listdir(".")
                       except Exception as list_e: cwd_files = [f"Error listing CWD: {list_e}"]
                       return (f"Error: File not found.\n"
                               f"Tried direct/absolute: '{path}'\n"
                               f"Tried basename in CWD: '{cwd_base_path}'\n"
                               f"Files in CWD (.): {cwd_files}")
         print(f"Reading file: {full_path}")
+        with open(full_path, 'r', encoding='utf-8') as f: return f.read()
+    except Exception as e: return f"Error reading file {path}: {str(e)}"
 @tool
 def write_file(path: str, content: str) -> str:
     """Writes the given content to a file at the specified path relative to the app's current directory. Creates directories if they don't exist."""
     print(f"--- Calling Write File Tool at path: {path} ---")
     try:
         base_dir = os.getcwd()
         full_path = os.path.join(base_dir, path)
         print(f"Writing file to: {full_path}")
         os.makedirs(os.path.dirname(full_path), exist_ok=True)
+        with open(full_path, 'w', encoding='utf-8') as f: f.write(content)
         return f"Successfully wrote to file {path} (relative to CWD)."
+    except Exception as e: return f"Error writing to file {path}: {str(e)}"
 @tool
 def list_directory(path: str = ".") -> str:
         base_dir = os.getcwd()
         full_path = os.path.join(base_dir, path)
         print(f"Listing directory: {full_path}")
+        if not os.path.isdir(full_path): return f"Error: '{path}' is not a valid directory."
+        files = os.listdir(full_path); return "\n".join(files) if files else "Directory is empty."
+    except Exception as e: return f"Error listing directory {path}: {str(e)}"
 @tool
 def audio_transcription_tool(file_path: str) -> str:
+    """Transcribes an audio file (like .mp3 or .wav) and returns the text content."""
+    print(f"--- Calling Audio Transcription: {file_path} ---")
+    if asr_pipeline is None: return "Error: ASR pipeline unavailable."
     try:
+        script_dir = os.getcwd()
         full_path = os.path.join(script_dir, file_path)
         if not os.path.exists(full_path):
+             full_path = file_path
              if not os.path.exists(full_path):
                   base_path = os.path.basename(file_path)
                   cwd_base_path = os.path.join(os.getcwd(), base_path)
+                  if os.path.exists(cwd_base_path): full_path = cwd_base_path
+                  else: return f"Error: Audio file not found."
         transcription = asr_pipeline(full_path)
         return transcription.get("text", "Error: Transcription failed.")
+    except Exception as e: import traceback; traceback.print_exc(); return f"Error transcribing: {e}"
 @tool
 def get_youtube_transcript(video_url: str) -> str:
+    """Fetches YouTube transcript."""
+    print(f"--- Calling YouTube Transcript: {video_url} ---")
     try:
         video_id = None
         if "watch?v=" in video_url: video_id = video_url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[1].split("?")[0]
+        if not video_id: return f"Error: Invalid YouTube URL."
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         full_transcript = " ".join([item["text"] for item in transcript_list])
+        return full_transcript[:8000]
+    except Exception as e: return f"Error getting transcript: {e}"
 @tool
 def scrape_web_page(url: str) -> str:
+    """Fetches primary text content of a webpage."""
+    print(f"--- Calling Web Scraper: {url} ---")
     try:
         headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, headers=headers, timeout=15); response.raise_for_status()
+        if 'html' not in response.headers.get('Content-Type', '').lower(): return f"Error: Not HTML."
         soup = BeautifulSoup(response.text, 'html.parser')
         for tag in soup(["script", "style", "nav", "footer", "aside", "header", "form", "button", "input"]): tag.extract()
         main_content = soup.find('main') or soup.find('article') or soup.find('div', role='main') or soup.body or soup
         text = main_content.get_text(separator='\n', strip=True)
+        lines = (line.strip() for line in text.splitlines()); chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
         text = '\n'.join(chunk for chunk in chunks if chunk)
+        return text[:8000]
+    except Exception as e: return f"Error scraping {url}: {e}"
+# +++++++++++++++++++ NEW FINAL ANSWER TOOL +++++++++++++++++++
+@tool
+def final_answer_tool(answer: str) -> str:
+    """
+    Call this tool *only* when you have the final, definitive answer to the user's question.
+    The 'answer' argument should be the single, concise, factual answer, formatted exactly as requested by the user's prompt.
+    """
+    print(f"--- AGENT CALLING FINAL ANSWER TOOL ---")
+    return answer
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 # --- Helper Function for Cleaning Fences ---
 def remove_fences_simple(text):
     """Removes triple backtick fences and optional language identifiers."""
+    original_text = text
+    text = text.strip()
     if text.startswith("```") and text.endswith("```"):
+        text = text[3:-3].strip()
         if '\n' in text:
             first_line, rest = text.split('\n', 1)
             if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
                 text = rest.strip()
         return text
+    return original_text
+# --- End Helper ---
 # List of standalone tool functions
 defined_tools = [
     list_directory,
     audio_transcription_tool,
     get_youtube_transcript,
+    scrape_web_page,
+    final_answer_tool # Add the new tool to the list
 ]
 # --- LangGraph Agent State ---
 class AgentState(TypedDict):
     messages: Annotated[list[AnyMessage], add_messages]
+# --- Custom Conditional Edge ---
+def should_continue(state: AgentState):
+    """Custom logic to decide whether to continue or end."""
+    last_message = state['messages'][-1]
+    if isinstance(last_message, AIMessage):
+        if last_message.tool_calls:
+            if last_message.tool_calls[0].get("name") == "final_answer_tool":
+                print("--- Condition: Saw final_answer_tool, ending graph. ---")
+                return END
+            else:
+                print("--- Condition: Saw other tools, calling tools node. ---")
+                return "tools"
+    # This path should ideally not be taken if the prompt is followed
+    print("--- Condition: No tool call detected, ending graph (forcing agent to use final_answer_tool). ---")
+    return END
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
     def __init__(self):
         print("BasicAgent (LangGraph) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY secret is not set!")
         HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not HUGGINGFACEHUB_API_TOKEN: print("⚠️ Warning: HUGGINGFACEHUB_API_TOKEN secret not set.")
         self.tools = defined_tools
+        tool_descriptions = "\n".join([
+            f"- {tool.name}: {tool.description}" if tool.name != 'code_interpreter' else
+            (f"- {tool.name}: Executes Python code. Use for calculations, data manipulation, or logic puzzles. "
+             "**When solving logic puzzles, write out your reasoning steps as comments in the code.** "
+             "'pandas' (as pd) is available.")
+            for tool in self.tools
+        ])
         # ==================== MODIFIED SYSTEM PROMPT ====================
         self.system_prompt = f"""You are a highly intelligent and meticulous AI assistant built to answer questions from the GAIA benchmark.
+Your primary goal is to provide **only the concise, factual, and direct answer** to the user's question.
 **CRITICAL INSTRUCTIONS:**
+* **DO NOT** provide the final answer as plain text.
+* **THE ONLY WAY** to provide a final answer is by calling the `final_answer_tool`.
+* **DO NOT** include conversational filler (e.g., "The answer is...").
+* **DO NOT** explain your reasoning unless it's inside a `code_interpreter` comment.
 You have access to the following tools:
 {tool_descriptions}
 **TOOL USAGE PROTOCOL:**
+* To call a tool, respond ONLY with a single JSON object formatted exactly like this:
+    [[[JSON_MARKER_START]]]
     {{
       "tool": "tool_name",
       "tool_input": {{ "arg_name1": "value1", ... }}
     }}
+    [[[JSON_MARKER_END]]]
 * Replace `tool_name` with the tool's name. Provide arguments in `tool_input`. Match names/types precisely.
+* Do not add any text before or after the JSON block.
 **REASONING PROCESS & STOPPING CONDITION:**
+1.  Analyze the question.
+2.  Break down problem into steps.
+3.  Determine if tools are needed.
+4.  If needed, call tool using JSON format. Wait for output.
+5.  Analyze tool output.
+6.  **DECISION POINT:**
+    a. **If the tool output directly contains the final answer (or allows trivial calculation):** Your *next* response MUST be a call to `final_answer_tool` with only that answer. **DO NOT** call any more tools.
+    b. **If more steps or tools are needed:** Continue reasoning, go back to step 4.
+    c. **If a tool call results in an error:** **Do not** try the exact same tool call again. Re-evaluate, try different arguments, or a different tool. If stuck, call `final_answer_tool` with an error message.
+7.  **FINAL OUTPUT:** The graph will stop *only* when you call `final_answer_tool`.
 """
         # =============================================================
+        print("Initializing Groq LLM Endpoint...")
         try:
+            chat_llm = ChatGroq(
+                temperature=0.01,
+                groq_api_key=GROQ_API_KEY,
+                model_name="llama-3.1-8b-instant" # Use Llama 3.1 8B Instant
+            )
+            print("✅ Groq LLM Endpoint initialized with llama-3.1-8b-instant.")
         except Exception as e: print(f"Error initializing Groq: {e}"); raise
         self.llm_with_tools = chat_llm.bind_tools(self.tools)
+        print("✅ Tools bound to LLM (using bind_tools).")
         def agent_node(state: AgentState):
             print("--- Running Agent Node ---")
             ai_message: AIMessage = self.llm_with_tools.invoke(state["messages"])
             print(f"AI Message Raw Content: {ai_message.content}")
             else: print(f"AI content (no calls): {ai_message.pretty_repr()}")
             return {"messages": [ai_message]}
         tool_node = ToolNode(self.tools)
         print("Building agent graph...")
         graph_builder = StateGraph(AgentState)
         graph_builder.add_node("agent", agent_node)
         graph_builder.add_node("tools", tool_node)
         graph_builder.add_edge(START, "agent")
+        graph_builder.add_conditional_edges("agent", should_continue, {"tools": "tools", END: END})
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
         print("✅ Graph compiled.")
+    # ++++++++++++++++++++ __call__ METHOD ++++++++++++++++++++
     def __call__(self, question: str) -> str:
+        print(f"\n--- Starting Agent Run for Question ---")
+        print(f"Agent received question (first 100 chars): {question[:100]}...")
+        graph_input = {"messages": [
+            SystemMessage(content=self.system_prompt),
+            HumanMessage(content=question)
+        ]}
+        final_answer = "AGENT FAILED TO PRODUCE ANSWER" # Default answer
         try:
             for event in self.graph.stream(graph_input, stream_mode="values", config={"recursion_limit": 25}):
                 last_message = event["messages"][-1]
+                if isinstance(last_message, AIMessage) and last_message.tool_calls:
+                    for tool_call in last_message.tool_calls:
+                        if tool_call.get("name") == "final_answer_tool":
+                            final_answer = tool_call['args'].get('answer', "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER")
+                            print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
+                            break
                 elif isinstance(last_message, ToolMessage):
                      print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
+                elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
+                     # This might be an error or the agent failing to call final_answer_tool
+                     print(f"AI Message (no tool call): {last_message.content[:500]}...")
+                     # We store this in case the graph ends here, but it's not the ideal path
+                     if isinstance(last_message.content, str) and last_message.content.strip():
+                         final_answer = last_message.content # Fallback
+            # --- Cleaning step (for the final answer, wherever it came from) ---
+            cleaned_answer = str(final_answer).strip() # Ensure it's a string
             prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
             original_cleaned = cleaned_answer
             for prefix in prefixes_to_remove:
                  print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
             looks_like_code = any(kw in cleaned_answer for kw in ["def ", "import ", "print(", "for ", "while ", "if ", "class ", "=>", "dict(", "list["]) or cleaned_answer.count('\n') > 3 or (cleaned_answer.startswith('[') and cleaned_answer.endswith(']')) or (cleaned_answer.startswith('{') and cleaned_answer.endswith('}'))
             if not looks_like_code:
+                 # ++++++++++++++++ USING remove_fences_simple ++++++++++++++++
                  cleaned_answer = remove_fences_simple(cleaned_answer) # Use the helper function
+                 # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
                  if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
                       cleaned_answer = cleaned_answer[1:-1].strip()
             print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
+            return cleaned_answer # Return the cleaned answer
         except Exception as e:
             print(f"Error running agent graph: {e}")
             import traceback; traceback.print_exc()
             return f"AGENT GRAPH ERROR: {e}"
+    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 # --- (Original Template Code - Mock Questions Version) ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """ MOCK RUN: Runs agent on mock Qs, displays results. DOES NOT SUBMIT. """
     username = profile.username if profile else "local_test_user"
     print(f"User: {username}{'' if profile else ' (dummy)'}")
+    submit_url = f"{DEFAULT_API_URL}/submit"
     print("Instantiating agent...")
     try:
         agent = BasicAgent()
         if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
     except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
+    print("Agent instantiated successfully.")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
     print(f"Agent code URL: {agent_code}")
     print("--- USING MOCK QUESTIONS ---")
     mock_questions_data = [
+        {"task_id": "mock_search_001", "question": "What is the capital of France?"},
+        {"task_id": "mock_code_001", "question": "Calculate 15 factorial using python. Only output the final number."},
         # {"task_id": "mock_audio_001", "question": "Transcribe 'sample.mp3'", "file_path": "sample.mp3"}, # Needs sample.mp3
     ]
     questions_data = mock_questions_data
     results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
     return final_status, results_df
 # --- Build Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
+    space_host_startup = os.getenv("SPACE_HOST"); space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n   Runtime URL: https://{space_host_startup}.hf.space")
     else: print("ℹ️ No SPACE_HOST (local?).")
+    if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n   Repo URL: https://huggingface.co/spaces/{space_id_startup}\n   Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else: print("ℹ️ No SPACE_ID (local?).")
+    try: script_dir = os.path.dirname(os.path.realpath(__file__))
+    except NameError: script_dir = os.getcwd()
+    print(f"Script directory: {script_dir}")
     print(f"CWD: {os.getcwd()}")
     try: print("Files in CWD:", os.listdir("."))
+    except FileNotFoundError: print("Warning: CWD listing failed.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface...")
     demo.queue().launch(debug=True, share=False)