Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Oct 30, 2025

Commit

9f84911

verified ·

1 Parent(s): 7e7c48d

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -338

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import json
 import re
 import uuid
 import time
-import ast
 # --- Pydantic Import ---
 from pydantic import BaseModel, Field
@@ -63,7 +63,7 @@ except Exception as e:
 agent = None
 # ====================================================
-# --- Tool Definitions ---
 class SearchInput(BaseModel):
     query: str = Field(description="The search query.")
@@ -465,89 +465,65 @@ def remove_fences_simple(text):
         return text
     return original_text
 def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
     """
     Parses malformed tool call strings (dribbled) from an LLM response.
-    Tries two strategies:
-    1. <function(tool_name)>{json}</function> format
-    2. Bare JSON with tool name inference
-    Args:
-        content: Raw text string from LLM response
-        tools: List of valid tool definitions for validation
-    Returns:
-        List containing a ToolCall object if parsing succeeded, empty list otherwise
     """
-    def extract_json_with_balanced_braces(text: str) -> str:
-        """Extract first complete JSON object using balanced brace counting."""
-        start_idx = text.find('{')
-        if start_idx == -1:
-            return ""
-        brace_count = 0
-        in_string = False
-        escape_next = False
-        for i in range(start_idx, len(text)):
-            char = text[i]
-            if escape_next:
-                escape_next = False
-                continue
-            if char == '\\':
-                escape_next = True
-                continue
-            if char == '"':
-                in_string = not in_string
-                continue
-            if not in_string:
-                if char == '{':
-                    brace_count += 1
-                elif char == '}':
-                    brace_count -= 1
-                    if brace_count == 0:
-                        return text[start_idx:i+1]
-        return ""
     tool_name = None
     tool_input = None
-    print(f"Original LLM content for fallback parsing:\n---\n{content[:500]}...\n---")
-    # ========================================================================
-    # STRATEGY 1: Try to parse <function(tool_name)>{json}</function> format
-    # ========================================================================
-    REGEX_STRING_FOR_FUNCTION = r"<function\(([^)]+)\)>"
-    func_match = re.search(REGEX_STRING_FOR_FUNCTION, content, re.IGNORECASE)
     if func_match:
         try:
-            tool_name = func_match.group(1).strip()
-            # Extract JSON starting after the function tag
-            json_start = func_match.end()
-            remaining_content = content[json_start:]
-            json_str = extract_json_with_balanced_braces(remaining_content)
-            if json_str:
-                tool_input = json.loads(json_str)
-                print(f"🔧 Fallback (Format 1): Parsed tool call for '{tool_name}'")
             else:
-                print(f"⚠️ Fallback (Format 1): Found <function> but no valid JSON")
                 tool_name = None
         except json.JSONDecodeError as e:
-            print(f"⚠️ Fallback (Format 1): Failed to parse JSON: {e}")
-            tool_name = None
-            tool_input = None
     # ========================================================================
     # STRATEGY 2: Try to parse bare JSON (if Strategy 1 failed)
@@ -672,32 +648,52 @@ defined_tools = [
 ]
-# --- LangGraph Agent State ---
 class AgentState(TypedDict):
     messages: Annotated[List[AnyMessage], add_messages]
-    plan: List[str]  # A list of steps to execute
     turn: int
-# --- Conditional Edge Function ---
-def route_from_planner(state: AgentState):
     """
-    Routes to the executor if a plan exists, or ends the graph if the plan is complete.
     """
-    plan = state.get('plan', [])
-    if plan:
-        print("--- Condition: Plan has steps. Routing to executor. ---")
-        return "executor"
-    else:
-        print("--- Condition: Plan is empty. Ending. ---")
         return END
 # ====================================================
-# --- Basic Agent Class ---
 class BasicAgent:
     def __init__(self):
-        print("BasicAgent (Planner-Executor) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
@@ -725,7 +721,6 @@ class BasicAgent:
         # Build tool descriptions
         tool_desc_list = []
         for tool in self.tools:
-            # Use Pydantic schema if available for richer descriptions
             if tool.args_schema:
                 schema = tool.args_schema.model_json_schema()
                 args_desc = []
@@ -738,20 +733,19 @@ class BasicAgent:
                 desc = f"- {tool.name}: {tool.description}"
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
-        tool_names_list = [t.name for t in self.tools]
-        tool_names_str = ", ".join(tool_names_list)
         # ==================== SYSTEM PROMPT V7 (Simplified) ====================
         self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
 Your goal: Provide the EXACT answer in the EXACT format requested.
 **PROTOCOL:**
-1. **ANALYZE:** Read the question. What info is needed? What is the answer format?
-2. **ACT:** Call ONE tool to get information.
 3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
-   - **If NO:** Go back to Step 2.
-   - **If YES:** Call final_answer_tool immediately.
 **CRITICAL RULES:**
@@ -766,319 +760,165 @@ Your goal: Provide the EXACT answer in the EXACT format requested.
 **EXAMPLE: FINAL ANSWER**
 {{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
-**EXAMPLE: RAG SCRAPER**
-{{ "name": "scrape_and_retrieve", "arguments": {{"url": "https://example.com", "query": "what is X?"}} }}
-**CRITICAL TOOL CALLING FORMAT:**
-When calling tools, you MUST use this EXACT JSON format:
-{{"name": "tool_name", "arguments": {{"param": "value"}}}}
-NEVER use XML format like <function(...)>.
-NEVER include tool name in arguments.
-**AVAILABLE TOOLS:**
 {tool_descriptions}
-**REMEMBER:** Use tools. Format JSON correctly.
 """
-        print("Initializing Groq LLMs...")
         try:
-            # LLM 1: The Executor (binds to tools)
-            self.executor_llm = ChatGroq(
                 temperature=0,
                 groq_api_key=GROQ_API_KEY,
-                model_name="openai/gpt-oss-120b",
                 max_tokens=4096,
-                timeout=60,
-                #model_kwargs={"response_format": {"type": "json_object"}}  # Force JSON
             ).bind_tools(self.tools)
-            print("✅ Executor LLM (with tools) initialized.")
-            # LLM 2: The Planner (no tools, just reasoning)
-            self.planner_llm = ChatGroq(
-                temperature=0,
-                groq_api_key=GROQ_API_KEY,
-                model_name="openai/gpt-oss-120b",
-                max_tokens=4096,
-                timeout=60
-            ).bind(tool_choice="none")
-            print("✅ Planner LLM (no tools) initialized.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
-# --- Define Planner Prompt ---
-        self.planner_prompt = """You are a planning assistant. Your ONLY job is to output a Python list.
-AVAILABLE TOOLS:
-{tool_names}
-Original Question: {{original_question}}
-Recent History:
-{{history}}
-INSTRUCTIONS:
-1. Check if the task is complete (look for final_answer_tool in history)
-   - If YES: Output []
-   - If NO: Create 1-2 next steps
-2. Each step MUST use one of these EXACT tool names:
-   - search_tool (for web searches)
-   - code_interpreter (for calculations, data processing)
-   - scrape_and_retrieve (for specific webpage content)
-   - read_file (to read uploaded files)
-   - final_answer_tool (when you have the final answer)
-3. Format: "Use [exact_tool_name] to [specific action]"
-EXAMPLES:
-["Use search_tool to find information about porterhouse steak"]
-["Use code_interpreter to calculate 15 factorial"]
-["Use scrape_and_retrieve to extract recipe from Reddit"]
-["Use final_answer_tool to submit the answer"]
-[]
-CRITICAL: Use ONLY the tools listed above. Output ONLY the list.
-Your response:"""
-# Store tool names in the prompt
-        self.planner_prompt = self.planner_prompt.format(
-            tool_names=tool_names_str)
-        # --- Node 1: The Planner ---
-        def planner_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
             print(f"\n{'='*60}")
-            print(f"PLANNER TURN {current_turn}/{MAX_TURNS}")
             print('='*60)
-            if current_turn > MAX_TURNS:
-                print("--- Max turns reached. Ending. ---")
-                return {"plan": [], "turn": current_turn}
-            # DON'T pass along existing plan - always replan!
-            # Get last 10 messages for context
-            recent_messages = state['messages'][-10:]
-            history_str = "\n".join([
-                f"{msg.__class__.__name__}: {str(msg.content)[:200]}..."
-                for msg in recent_messages
-            ])
-            # Extract original question
-            original_question = next(
-                (msg.content for msg in state['messages'] if isinstance(msg, HumanMessage)),
-                "Unknown question"
-            )
-            # Check if final_answer_tool was called
-            for msg in reversed(state['messages']):
-                if isinstance(msg, AIMessage) and msg.tool_calls:
-                    if any(tc.get('name') == 'final_answer_tool' for tc in msg.tool_calls):
-                        print("✅ Final answer detected. Ending.")
-                        return {"plan": [], "turn": current_turn}
-            # Format prompt
-            prompt = self.planner_prompt.format(
-                original_question=original_question,
-                history=history_str
-            )
-            # Call planner LLM
-            try:
-                response = self.planner_llm.invoke(prompt)
-                plan_str = response.content
-                print(f"Raw planner output: {plan_str[:300]}...")
-            except Exception as e:
-                print(f"⚠️ Planner LLM failed: {e}")
-                return {"plan": [], "turn": current_turn}
-            # Parse plan with multiple strategies
-            plan_list = []
-            # Strategy 1: Try to find a list in the output
-            match = re.search(r'\[([^\]]*)\]', plan_str, re.DOTALL)
-            if match:
-                try:
-                    list_str = '[' + match.group(1) + ']'
-                    # Clean up common issues
-                    list_str = list_str.replace('\n', ' ')
-                    list_str = re.sub(r'\s+', ' ', list_str)  # Normalize whitespace
-                    parsed = json.loads(list_str)
-                    if isinstance(parsed, list) and all(isinstance(x, str) for x in parsed):
-                        plan_list = parsed
-                        print(f"✅ Parsed plan: {plan_list}")
-                except json.JSONDecodeError:
-                    print(f"⚠️ Failed to parse as JSON")
-            # Strategy 2: Look for quoted strings if JSON parsing failed
-            if not plan_list:
-                quoted_strings = re.findall(r'"([^"]+)"', plan_str)
-                if quoted_strings and len(quoted_strings) <= 5:
-                    # Check if they look like tool steps
-                    valid_steps = []
-                    for s in quoted_strings:
-                        if any(tool.name in s.lower() for tool in self.tools):
-                            valid_steps.append(s)
-                    if valid_steps:
-                        plan_list = valid_steps
-                        print(f"✅ Extracted steps from quotes: {plan_list}")
-            # Validate plan
-            if plan_list:
-                # Remove any non-descriptive or invalid steps
-                validated_plan = []
-                for step in plan_list:
-                    step_lower = step.lower().strip()
-                    # Check if step mentions ANY tool
-                    mentioned_tool = None
-                    for tool in self.tools:
-                        if tool.name.lower() in step_lower:
-                            mentioned_tool = tool.name
-                            break
-                    if mentioned_tool:
-                        # Valid step - has a real tool name
-                        validated_plan.append(step)
-                        print(f"✅ Accepted step: '{step}' (uses {mentioned_tool})")
-                    else:
-                        # Invalid - no real tool mentioned
-                        print(f"❌ Rejected step: '{step}' (no valid tool name found)")
-                plan_list = validated_plan
-            if not plan_list:
-                print("⚠️ No valid plan generated. Ending.")
-            print(f"📋 Final Plan: {plan_list}")
-            return {"plan": plan_list, "turn": current_turn}
-        # --- Node 2: The Executor ---
-        def executor_node(state: AgentState):
-            print(f"\n--- EXECUTOR ---")
-            plan = state.get('plan', [])
-            if not plan:
-                print("⚠️ No plan to execute!")
-                return {"messages": [], "plan": []}
-            current_step = plan[0]
-            print(f"Executing Step: {current_step}")
-            # Build executor message
-            executor_messages = state['messages'] + [
-                HumanMessage(content=f"""Execute: {current_step}
-            Available tools: search_tool, code_interpreter, scrape_and_retrieve, final_answer_tool
-            Call ONE tool in JSON format: {{"name": "tool_name", "arguments": {{...}}}}""")
-            ]
-            # Try to call LLM
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
-                    ai_message = self.executor_llm.invoke(executor_messages)
                     break
                 except Exception as e:
-                    print(f"⚠️ Executor LLM attempt {attempt+1}/{max_retries} failed: {e}")
                     if attempt == max_retries - 1:
-                        ai_message = AIMessage(content=f"Error: Executor LLM failed: {e}")
                     time.sleep(2 ** attempt)
-            # Fallback parsing
             if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
                 parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
                 if parsed_tool_calls:
-                    print("🔧 Fallback SUCCESS: Rebuilt tool call")
                     ai_message.tool_calls = parsed_tool_calls
-                    ai_message.content = ""
                 else:
-                    print(f"⚠️ Fallback FAILED")
             if ai_message.tool_calls:
-                print(f"🔧 Tool Call: {ai_message.tool_calls[0]['name']}")
             else:
-                print("⚠️ No tool call generated")
-            # IMPORTANT: Clear the plan so planner creates a new one
-            return {"messages": [ai_message], "plan": []}
         # --- Tool Node ---
         tool_node = ToolNode(self.tools)
         # --- Build Graph ---
-        print("Building Planner-Executor graph...")
         graph_builder = StateGraph(AgentState)
-        graph_builder.add_node("planner", planner_node)
-        graph_builder.add_node("executor", executor_node)
         graph_builder.add_node("tools", tool_node)
-        graph_builder.add_edge(START, "planner")
         graph_builder.add_conditional_edges(
-            "planner",
-            route_from_planner,
             {
-                "executor": "executor",
                 END: END
             }
         )
-        graph_builder.add_edge("executor", "tools")
-        graph_builder.add_edge("tools", "planner") # Loop back to planner
         self.graph = graph_builder.compile()
-        print("✅ Planner-Executor graph compiled successfully.")
     def __call__(self, question: str) -> str:
         print(f"\n--- Starting Agent Run for Question ---")
-        print(f"Question: {question[:100]}...")
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=question)
             ],
-            "plan": [],
             "turn": 0
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
-            config = {"recursion_limit": 50}  # ← Increased from 25
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
-                last_message = event["messages"][-1]
-                # Check for final answer
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
-                    for tc in last_message.tool_calls:
-                        if tc.get("name") == "final_answer_tool":
-                            final_answer = tc['args'].get('answer', "ERROR")
-                            print(f"✅ Final Answer: '{final_answer}'")
-                            break
-                if final_answer != "AGENT FAILED TO PRODUCE ANSWER":
-                    break
-            # Clean answer
             cleaned_answer = str(final_answer).strip()
-            print(f"Returning: '{cleaned_answer}'")
             return cleaned_answer
         except Exception as e:
-            print(f"❌ Error: {e}")
-            traceback.print_exc()
-            return f"AGENT ERROR: {e}"
 # ====================================================
@@ -1095,7 +935,7 @@ except Exception as e:
 # ====================================================
 # --- (Original Template Code - Mock Questions Version) ---
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches MOCK questions, runs the BasicAgent on them, simulates submission prep,
     and displays the results. DOES NOT SUBMIT.
@@ -1109,7 +949,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
     print("Using globally instantiated agent.")
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
     print(f"Agent code URL: {agent_code}")
     print("--- USING MOCK QUESTIONS ---")
@@ -1198,9 +1038,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             "task_id": "mock_level1_020",
             "question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
         }
-        #
-        # ^^^ PASTE YOUR FULL LIST OF 20 MOCK QUESTIONS HERE ^^^
-        #
     ]
     questions_data = mock_questions_data
@@ -1237,7 +1074,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
     print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
-    final_status = "--- MOK RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED."
     results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
     return final_status, results_df
@@ -1247,7 +1084,7 @@ with gr.Blocks() as demo:
     gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
     gr.Markdown("""
         **Instructions:** Click 'Run Mock Evaluation'.
-        **Notes:** Uses Groq (Llama 3.1 8B). Ensure `GROQ_API_KEY` secret/env var exists. **DOES NOT** fetch official Qs or submit. Check logs for details.
         """)
     gr.LoginButton()
     run_button = gr.Button("Run Mock Evaluation")
@@ -1257,7 +1094,7 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n   Runtime URL: https://{space_host_startup}.hf.space")
     else: print("ℹ️ No SPACE_HOST (local?).")
     if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n   Repo URL: https://huggingface.co/spaces/{space_id_startup}\n   Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -1272,3 +1109,4 @@ if __name__ == "__main__":
     print("Launching Gradio Interface...")
     demo.queue().launch(debug=True, share=False)

 import re
 import uuid
 import time
+import ast # <-- Import ast module
 # --- Pydantic Import ---
 from pydantic import BaseModel, Field
 agent = None
 # ====================================================
+# --- Tool Definitions (Unchanged) ---
 class SearchInput(BaseModel):
     query: str = Field(description="The search query.")
         return text
     return original_text
+# --- *** ROBUST FALLBACK PARSER *** ---
 def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
     """
     Parses malformed tool call strings (dribbled) from an LLM response.
     """
+    print(f"Original LLM content for fallback parsing:\n---\n{content}\n---")
     tool_name = None
     tool_input = None
+    cleaned_str = None # For storing cleaned string before parsing
+    # STRATEGY 1: Try to parse <function(tool_name)>...{json_string}...
+    # This also handles <function=tool_name>...{json_string}...
+    func_match = re.search(
+        r"<function[(=]\s*([^)]+)\s*[)>](.*)", # <-- More robust regex
+        content,
+        re.DOTALL | re.IGNORECASE
+    )
     if func_match:
         try:
+            tool_name = func_match.group(1).strip().replace("'", "").replace('"', '') # Clean tool name
+            remaining_content = func_match.group(2)
+            json_start_index = remaining_content.find('{')
+            if json_start_index != -1:
+                json_str = remaining_content[json_start_index:]
+                # --- Aggressive Cleaning ---
+                cleaned_str = json_str.strip()
+                cleaned_str = ''.join(c for c in cleaned_str if c.isprintable() or c in '\n\r\t')
+                cleaned_str = cleaned_str.strip().rstrip(',')
+                tool_input = json.loads(cleaned_str)
+                print(f"🔧 Fallback (Format 1 - json.loads): Parsed tool call for '{tool_name}'")
             else:
+                print(f"⚠️ Fallback (Format 1): Found <function> but no JSON blob.")
                 tool_name = None
         except json.JSONDecodeError as e:
+            print(f"⚠️ Fallback (Format 1): json.loads failed after cleaning: {e}. Trying ast.literal_eval.")
+            try:
+                # Secondary attempt with ast.literal_eval
+                if cleaned_str:
+                    potential_input = ast.literal_eval(cleaned_str)
+                    if isinstance(potential_input, dict):
+                        tool_input = potential_input
+                        print(f"🔧 Fallback (Format 1 - ast.literal_eval): Parsed tool call for '{tool_name}'")
+                    else:
+                         print(f"⚠️ Fallback (Format 1): ast.literal_eval did not produce a dict.")
+                         tool_name = None
+                else:
+                     tool_name = None
+            except (SyntaxError, ValueError) as ast_e:
+                 print(f"⚠️ Fallback (Format 1): ast.literal_eval also failed: {ast_e}")
+                 tool_name = None
+            except Exception as e_inner:
+                print(f"⚠️ Fallback (Format 1): Unexpected error during ast.literal_eval: {e_inner}")
+                tool_name = None
     # ========================================================================
     # STRATEGY 2: Try to parse bare JSON (if Strategy 1 failed)
 ]
+# --- *** NEW: Reverted AgentState *** ---
 class AgentState(TypedDict):
     messages: Annotated[List[AnyMessage], add_messages]
     turn: int
+# --- *** NEW: Reverted Conditional Edge Function *** ---
+def should_continue(state: AgentState):
     """
+    Decide whether to continue, call tools, or end.
     """
+    last_message = state['messages'][-1]
+    current_turn = state.get('turn', 0)
+    # 1. Check for final_answer_tool
+    if isinstance(last_message, AIMessage) and last_message.tool_calls:
+        for tool_call in last_message.tool_calls:
+            if tool_call.get("name") == "final_answer_tool":
+                print("--- Condition: final_answer_tool called, ending. ---")
+                return END
+    # 2. Check turn limit
+    if current_turn >= MAX_TURNS:
+        print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
         return END
+    # 3. Route to tools if tool calls exist
+    if isinstance(last_message, AIMessage) and last_message.tool_calls:
+        print("--- Condition: Tools called, routing to tools node. ---")
+        return "tools"
+    # 4. Loop prevention
+    if len(state['messages']) > 2 and isinstance(last_message, AIMessage) and isinstance(state['messages'][-2], AIMessage):
+         print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
+         return END
+    # 5. Loop back to agent (reasoning/planning step)
+    print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
+    return "agent"
 # ====================================================
+# --- *** NEW: Reverted Basic Agent Class *** ---
 class BasicAgent:
     def __init__(self):
+        print("BasicAgent (Single LLM) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
         # Build tool descriptions
         tool_desc_list = []
         for tool in self.tools:
             if tool.args_schema:
                 schema = tool.args_schema.model_json_schema()
                 args_desc = []
                 desc = f"- {tool.name}: {tool.description}"
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
         # ==================== SYSTEM PROMPT V7 (Simplified) ====================
+        # This prompt is for a single, powerful agent
         self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
 Your goal: Provide the EXACT answer in the EXACT format requested.
 **PROTOCOL:**
+1. **ANALYZE:** Read the question and history. What is the next logical step?
+2. **ACT:** Call ONE tool to get information or perform a calculation.
 3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
+   - **If NO:** Go back to Step 1 and decide the *next* step.
+   - **If YES:** Call final_answer_tool immediately with the answer.
 **CRITICAL RULES:**
 **EXAMPLE: FINAL ANSWER**
 {{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
+**TOOLS:**
 {tool_descriptions}
+**REMEMBER:** One step at a time. Use tools. Format JSON correctly.
 """
+        print("Initializing Groq LLM...")
         try:
+            # --- Initialize ONE Powerful LLM for all tasks ---
+            self.llm_with_tools = ChatGroq(
                 temperature=0,
                 groq_api_key=GROQ_API_KEY,
+                model_name="llama-3.3-70b-versatile", # <-- Use the powerful model
                 max_tokens=4096,
+                timeout=60
             ).bind_tools(self.tools)
+            print("✅ Main LLM (llama-3.3-70b-versatile with tools) initialized.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
+        # --- Node 1: The Agent ---
+        def agent_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
             print(f"\n{'='*60}")
+            print(f"AGENT TURN {current_turn}/{MAX_TURNS}")
             print('='*60)
+            # Note: Max turns is also checked in should_continue, but good to have here
+            if current_turn > MAX_TURNS:
+                return {"messages": [SystemMessage(content="Max turns reached.")]}
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
+                    # Call the single, powerful LLM
+                    ai_message = self.llm_with_tools.invoke(state["messages"])
                     break
                 except Exception as e:
+                    print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {e}")
                     if attempt == max_retries - 1:
+                        ai_message = AIMessage(
+                            content=f"Error: LLM failed after {max_retries} attempts: {e}"
+                        )
                     time.sleep(2 ** attempt)
+            # --- Fallback Parsing Logic ---
             if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
                 parsed_tool_calls = parse_tool_call_from_string(ai_message.content, self.tools)
                 if parsed_tool_calls:
+                    print("🔧 Fallback SUCCESS: Rebuilding tool call(s).")
                     ai_message.tool_calls = parsed_tool_calls
+                    ai_message.content = "" # Clear the text content
                 else:
+                    print(f"⚠️ Fallback FAILED: Could not parse any tool call from content:\n{ai_message.content[:200]}...")
             if ai_message.tool_calls:
+                 print(f"🔧 Agent Tool Call: {ai_message.tool_calls[0]['name']}")
             else:
+                print(f"💭 Agent Reasoning: {ai_message.content[:200]}...")
+            return {"messages": [ai_message], "turn": current_turn}
         # --- Tool Node ---
         tool_node = ToolNode(self.tools)
         # --- Build Graph ---
+        print("Building Single-Agent graph...")
         graph_builder = StateGraph(AgentState)
+        graph_builder.add_node("agent", agent_node)
         graph_builder.add_node("tools", tool_node)
+        graph_builder.add_edge(START, "agent")
         graph_builder.add_conditional_edges(
+            "agent",
+            should_continue, # Use the reverted conditional function
             {
+                "tools": "tools",
+                "agent": "agent", # For loop prevention
                 END: END
             }
         )
+        graph_builder.add_edge("tools", "agent") # Loop back to agent
         self.graph = graph_builder.compile()
+        print("✅ Single-Agent graph compiled successfully.")
     def __call__(self, question: str) -> str:
         print(f"\n--- Starting Agent Run for Question ---")
+        print(f"Agent received question (first 100 chars): {question[:100]}...")
+        # --- Initialize Reverted AgentState (no plan) ---
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=question)
             ],
             "turn": 0
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
+            config = {"recursion_limit": MAX_TURNS + 5}
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
+                if event.get('messages'): # Ensure messages exist
+                    last_message = event["messages"][-1]
+                else:
+                    continue # Skip if no messages yet
+                # Check for final answer extraction
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
+                    if last_message.tool_calls[0].get("name") == "final_answer_tool":
+                        final_answer_args = last_message.tool_calls[0].get('args', {})
+                        if 'answer' in final_answer_args:
+                             final_answer = final_answer_args['answer']
+                             print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
+                             break
+                        else:
+                             print(f"⚠️ Final Answer tool called without 'answer' argument: {final_answer_args}")
+                             final_answer = "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER"
+                             break
+                elif isinstance(last_message, ToolMessage):
+                    print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
+                elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
+                    print(f"AI Message (Reasoning): {last_message.content[:500]}...")
+                elif isinstance(last_message, SystemMessage):
+                     print(f"System Message: {last_message.content[:500]}...")
+            # --- Final Answer Cleaning ---
             cleaned_answer = str(final_answer).strip()
+            prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
+            original_cleaned = cleaned_answer
+            for prefix in prefixes_to_remove:
+                if cleaned_answer.lower().startswith(prefix.lower()):
+                    potential_answer = cleaned_answer[len(prefix):].strip()
+                    if potential_answer:
+                        cleaned_answer = potential_answer
+                        break
+            cleaned_answer = remove_fences_simple(cleaned_answer)
+            if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
+                    cleaned_answer = cleaned_answer[1:-1].strip()
+            print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
             return cleaned_answer
         except Exception as e:
+            print(f"Error running agent graph: {e}")
+            tb_str = traceback.format_exc()
+            print(tb_str)
+            return f"AGENT GRAPH ERROR: {e}"
 # ====================================================
 # ====================================================
 # --- (Original Template Code - Mock Questions Version) ---
+def run_and_submit_all( profile: gr.OAuthProfile | None): # Corrected type hint
     """
     Fetches MOCK questions, runs the BasicAgent on them, simulates submission prep,
     and displays the results. DOES NOT SUBMIT.
         return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
     print("Using globally instantiated agent.")
+    agent_code = f"httpsS://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run" # Corrected URL
     print(f"Agent code URL: {agent_code}")
     print("--- USING MOCK QUESTIONS ---")
             "task_id": "mock_level1_020",
             "question": r"""As of August 2023, how many in-text citations on the West African Vodun Wikipedia page reference a source that was cited using Scopus?"""
         }
     ]
     questions_data = mock_questions_data
     status_update = f"Finished mock run. Processed {len(answers_payload)} answers for '{username}'."
     print(status_update); print("--- MOCK RUN - SUBMISSION SKIPPED ---")
+    final_status = "--- Mock RUN COMPLETE ---\n" + status_update + "\nSubmission SKIPPED." # Corrected typo
     results_df = pd.DataFrame(results_log); results_df['Correct'] = 'N/A (Mock)'
     return final_status, results_df
     gr.Markdown("# GAIA Agent - MOCK TEST (Groq Llama3.1)")
     gr.Markdown("""
         **Instructions:** Click 'Run Mock Evaluation'.
+        **Notes:** Uses Groq (Llama-3.3-70b Executor). Ensure `GROQ_API_KEY` secret/env var exists. **DOES NOT** fetch official Qs or submit. Check logs for details.
         """)
     gr.LoginButton()
     run_button = gr.Button("Run Mock Evaluation")
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
+    space_host_startup = os.getenv("SPACE_ID"); space_id_startup = os.getenv("SPACE_ID") # Corrected variable name
     if space_host_startup: print(f"✅ SPACE_HOST: {space_host_startup}\n   Runtime URL: https://{space_host_startup}.hf.space")
     else: print("ℹ️ No SPACE_HOST (local?).")
     if space_id_startup: print(f"✅ SPACE_ID: {space_id_startup}\n   Repo URL: https://huggingface.co/spaces/{space_id_startup}\n   Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     print("Launching Gradio Interface...")
     demo.queue().launch(debug=True, share=False)