Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Oct 29, 2025

Commit

6428172

verified ·

1 Parent(s): f58c066

Update app.py

Browse files

Files changed (1) hide show

app.py +268 -354

app.py CHANGED Viewed

@@ -27,10 +27,15 @@ from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.tools import tool
 from langchain_groq import ChatGroq
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MAX_TURNS = 20  # Increased from 15 for complex questions
-MAX_MESSAGE_LENGTH = 8000  # Truncate long outputs
 # --- Initialize ASR Pipeline ---
 asr_pipeline = None
@@ -50,6 +55,9 @@ except Exception as e:
     print(f"⚠️ Warning: Could not load ASR pipeline globally. Error: {e}")
     asr_pipeline = None
 # ====================================================
 # --- Tool Definitions ---
@@ -63,13 +71,10 @@ def search_tool(query: str) -> str:
     try:
         search = DuckDuckGoSearchRun()
         result = search.run(query)
-        # Truncate if too long
         if len(result) > MAX_MESSAGE_LENGTH:
             result = result[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(result)} total chars]"
         return result
     except Exception as e:
-        tb_str = traceback.format_exc()
-        print(f"--- Search Tool FAILED ---\n{tb_str}\n---")
         return f"Error running search for '{query}': {str(e)}"
@@ -94,7 +99,6 @@ def code_interpreter(code: str) -> str:
         if pattern in code_lower:
             return f"Error: Potentially dangerous operation '{pattern}' is not allowed."
-    # Check for file writing in code
     if 'open(' in code_lower and any(mode in code for mode in ["'w'", '"w"', "'a'", '"a"', "'wb'", '"wb"']):
         return "Error: Writing files is not allowed in code_interpreter. Use write_file tool instead."
@@ -117,7 +121,6 @@ def code_interpreter(code: str) -> str:
             return f"Error in execution:\n{stderr}\n\nStdout (if any):\n{stdout}"
         if stdout:
-            # Truncate if too long
             if len(stdout) > MAX_MESSAGE_LENGTH:
                 stdout = stdout[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(stdout)} total chars]"
             return f"Success:\n{stdout}"
@@ -126,9 +129,7 @@ def code_interpreter(code: str) -> str:
     except Exception as e:
         tb_str = traceback.format_exc()
-        print(f"--- Code Interpreter FAILED ---\n{tb_str}\n---")
-        error_msg = f"Execution failed:\n{tb_str}\n\n💡 Hints:\n- Check your syntax\n- Ensure you're using print() for output\n- Verify variable names and types"
-        return error_msg
 @tool
@@ -143,11 +144,10 @@ def read_file(path: str) -> str:
         script_dir = os.getcwd()
         safe_path = os.path.normpath(path)
-        # Try multiple path strategies
         paths_to_try = [
-            os.path.join(script_dir, safe_path),  # Relative to CWD
-            safe_path,  # Direct/absolute
-            os.path.join(os.getcwd(), os.path.basename(safe_path))  # Basename in CWD
         ]
         full_path = None
@@ -157,49 +157,32 @@ def read_file(path: str) -> str:
                 break
         if not full_path:
-            try:
-                cwd_files = os.listdir(".")
-            except Exception:
-                cwd_files = ["(could not list)"]
             return (f"Error: File not found: '{path}'\n"
-                   f"Tried paths:\n" + "\n".join(f"  - {p}" for p in paths_to_try) +
-                   f"\n\nFiles in current directory: {cwd_files}")
         print(f"Reading file: {full_path}")
-        # Try to detect file type
         _, ext = os.path.splitext(full_path)
         try:
             with open(full_path, 'r', encoding='utf-8') as f:
                 content = f.read()
-            # Truncate if too long
             if len(content) > MAX_MESSAGE_LENGTH:
                 content = content[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(content)} total chars]"
             return content
         except UnicodeDecodeError:
-            # Try binary read for non-text files
             try:
                 with open(full_path, 'rb') as f:
                     binary_content = f.read()
                 return f"File appears to be binary ({len(binary_content)} bytes). Cannot display as text.\nFile type: {ext}\nConsider using audio_transcription_tool for audio files."
             except Exception as bin_e:
                 return f"Error: Could not read file as text or binary: {str(bin_e)}"
-        except PermissionError:
-            return f"Error: Permission denied reading '{full_path}'."
-        except IsADirectoryError:
-            return f"Error: '{full_path}' is a directory, not a file. Use list_directory to see its contents."
         except Exception as read_e:
-            tb_str = traceback.format_exc()
-            return f"Error reading file: {str(read_e)}\n{tb_str}"
     except Exception as e:
-        tb_str = traceback.format_exc()
-        print(f"--- Read File Tool FAILED ---\n{tb_str}\n---")
         return f"Unexpected error accessing file '{path}': {str(e)}"
@@ -217,7 +200,6 @@ def write_file(path: str, content: str) -> str:
         base_dir = os.getcwd()
         full_path = os.path.join(base_dir, path)
-        # Create directories if needed
         dir_path = os.path.dirname(full_path)
         if dir_path:
             os.makedirs(dir_path, exist_ok=True)
@@ -227,11 +209,8 @@ def write_file(path: str, content: str) -> str:
         return f"Successfully wrote {len(content)} characters to '{path}'."
-    except PermissionError:
-        return f"Error: Permission denied writing to '{path}'."
     except Exception as e:
-        tb_str = traceback.format_exc()
-        return f"Error writing file '{path}': {str(e)}\n{tb_str}"
 @tool
@@ -254,10 +233,7 @@ def list_directory(path: str = ".") -> str:
         if not items:
             return f"Directory '{path}' is empty."
-        # Separate files and directories
-        files = []
-        directories = []
         for item in sorted(items):
             item_path = os.path.join(full_path, item)
             if os.path.isdir(item_path):
@@ -274,11 +250,8 @@ def list_directory(path: str = ".") -> str:
         return result
-    except PermissionError:
-        return f"Error: Permission denied listing directory '{path}'."
     except Exception as e:
-        tb_str = traceback.format_exc()
-        return f"Error listing directory '{path}': {str(e)}\n{tb_str}"
 @tool
@@ -293,7 +266,6 @@ def audio_transcription_tool(file_path: str) -> str:
         return "Error: ASR pipeline is not available. Audio transcription cannot be performed."
     try:
-        # Find file using same strategy as read_file
         script_dir = os.getcwd()
         safe_path = os.path.normpath(file_path)
@@ -317,17 +289,15 @@ def audio_transcription_tool(file_path: str) -> str:
         result_text = transcription.get("text", "")
         if not result_text:
-            return "Error: Transcription produced no text. The audio file may be empty or corrupted."
-        # Truncate if too long
         if len(result_text) > MAX_MESSAGE_LENGTH:
-            result_text = result_text[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, original length unknown]"
         return f"Transcription:\n{result_text}"
     except Exception as e:
-        tb_str = traceback.format_exc()
-        return f"Error transcribing '{file_path}': {str(e)}\n{tb_str}"
 @tool
@@ -339,100 +309,96 @@ def get_youtube_transcript(video_url: str) -> str:
     print(f"--- Calling YouTube Transcript: {video_url} ---")
     try:
-        # Extract video ID
         video_id = None
         if "watch?v=" in video_url:
             video_id = video_url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in video_url:
             video_id = video_url.split("youtu.be/")[1].split("?")[0]
-        elif len(video_url) == 11 and video_url.isalnum():  # Direct video ID
-            video_id = video_url
         if not video_id:
-            return f"Error: Could not extract YouTube video ID from '{video_url}'. Provide a valid YouTube URL."
-        print(f"Fetching transcript for video ID: {video_id}")
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         if not transcript_list:
-            return "Error: No transcript found for this video. It may not have captions available."
         full_transcript = " ".join([item["text"] for item in transcript_list])
-        # Truncate if too long
         if len(full_transcript) > MAX_MESSAGE_LENGTH:
-            full_transcript = full_transcript[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(full_transcript)} total chars]"
         return f"YouTube Transcript:\n{full_transcript}"
     except Exception as e:
-        tb_str = traceback.format_exc()
-        return f"Error getting transcript for '{video_url}': {str(e)}\nThis video may not have transcripts available.\n{tb_str}"
 @tool
-def scrape_web_page(url: str) -> str:
-    """Fetches and extracts the main text content from a webpage."""
-    if not isinstance(url, str) or not url.strip():
-        return "Error: Invalid input. 'url' must be a non-empty string."
-    if not url.lower().startswith(('http://', 'https://')):
         return f"Error: Invalid URL. Must start with http:// or https://. Got: '{url}'"
-    print(f"--- Calling Web Scraper: {url} ---")
     try:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(url, headers=headers, timeout=20)
         response.raise_for_status()
-        content_type = response.headers.get('Content-Type', '').lower()
-        if 'html' not in content_type:
-            return f"Error: URL returned '{content_type}', not HTML. Cannot scrape non-HTML content."
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove unwanted elements
-        for tag in soup(["script", "style", "nav", "footer", "aside", "header",
-                        "form", "button", "input", "img", "link", "meta"]):
             tag.extract()
-        # Try to find main content area
-        main_content = (soup.find('main') or
-                       soup.find('article') or
-                       soup.find('div', role='main') or
-                       soup.find('div', class_=lambda x: x and 'content' in x.lower()) or
-                       soup.body)
         if not main_content:
-            return "Error: Could not find main content area on the page."
         text = main_content.get_text(separator='\n', strip=True)
-        # Clean up whitespace
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        text = '\n'.join(chunk for chunk in chunks if chunk)
         if not text:
-            return "Error: Scraped content was empty after cleaning."
-        # Truncate if too long
-        if len(text) > MAX_MESSAGE_LENGTH:
-            text = text[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(text)} total chars]"
-        return f"Content from {url}:\n\n{text}"
-    except requests.exceptions.Timeout:
-        return f"Error: Request to {url} timed out after 20 seconds."
-    except requests.exceptions.RequestException as req_e:
-        return f"Error fetching URL {url}: {str(req_e)}"
     except Exception as e:
         tb_str = traceback.format_exc()
-        return f"Error scraping {url}: {str(e)}\n{tb_str}"
 @tool
@@ -440,10 +406,6 @@ def final_answer_tool(answer: str) -> str:
     """
     Call this tool ONLY when you have the final, definitive answer.
     The 'answer' must be EXACTLY what was asked for, with no extra text.
-    Examples:
-    - If asked for a number: "42" (not "The answer is 42")
-    - If asked for a list: "apple, banana, cherry"
-    - If asked for a name: "John Smith"
     """
     if not isinstance(answer, str):
         try:
@@ -461,16 +423,13 @@ def remove_fences_simple(text):
     """Remove code fences from text."""
     original_text = text
     text = text.strip()
     if text.startswith("```") and text.endswith("```"):
         text = text[3:-3].strip()
         if '\n' in text:
             first_line, rest = text.split('\n', 1)
-            # Remove language identifier
             if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
                 text = rest.strip()
         return text
     return original_text
@@ -483,7 +442,7 @@ defined_tools = [
     list_directory,
     audio_transcription_tool,
     get_youtube_transcript,
-    scrape_web_page,
     final_answer_tool
 ]
@@ -491,59 +450,29 @@ defined_tools = [
 # --- LangGraph Agent State ---
 class AgentState(TypedDict):
     messages: Annotated[List[AnyMessage], add_messages]
     turn: int
 # --- Conditional Edge Function ---
-def should_continue(state: AgentState):
-    """Decide whether to continue, call tools, or end."""
-    last_message = state['messages'][-1]
-    current_turn = state.get('turn', 0)
-    # 1. Check for final_answer_tool
-    if isinstance(last_message, AIMessage) and last_message.tool_calls:
-        for tool_call in last_message.tool_calls:
-            if tool_call.get("name") == "final_answer_tool":
-                print("--- Condition: final_answer_tool called, ending. ---")
-                return END
-    # 2. Check turn limit
-    if current_turn >= MAX_TURNS:
-        print(f"--- Condition: Max turns ({MAX_TURNS}) reached. Ending. ---")
-        state['messages'].append(
-            SystemMessage(content=f"SYSTEM: Maximum turn limit ({MAX_TURNS}) reached. Ending execution.")
-        )
         return END
-    # 3. Route to tools if tool calls exist
-    if isinstance(last_message, AIMessage) and last_message.tool_calls:
-        print("--- Condition: Tools called, routing to tools node. ---")
-        return "tools"
-    # 4. NEW LOOP PREVENTION:
-    # Check for consecutive AI messages without tool calls.
-    # This catches "thinking" loops or raw answer dribbling (like "58").
-    if isinstance(last_message, AIMessage) and not last_message.tool_calls:
-        # Check if the message *before* this one was ALSO an AIMessage.
-        # We need at least 3 messages total (System, Human, AI-Turn1-Plan)
-        # for this check to be valid, so we check len > 2.
-        if len(state['messages']) > 2 and isinstance(state['messages'][-2], AIMessage):
-            print(f"--- Condition: Detected 2+ consecutive AI messages (Turn {current_turn}). Ending to prevent loop. ---")
-            state['messages'].append(
-                SystemMessage(content=f"SYSTEM: Agent stuck in a loop (consecutive non-tool-call AI messages). Ending execution.")
-            )
-            return END
-    # 5. Default: Loop back to agent (e.g., after Turn 1 plan)
-    print(f"--- Condition: No tool call (Turn {current_turn}). Continuing to agent. ---")
-    return "agent"
 # ====================================================
 # --- Basic Agent Class ---
 class BasicAgent:
     def __init__(self):
-        print("BasicAgent (LangGraph) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
@@ -551,292 +480,283 @@ class BasicAgent:
         self.tools = defined_tools
         # Build tool descriptions
         tool_desc_list = []
         for tool in self.tools:
-            if tool.name == 'code_interpreter':
-                desc = (
-                    f"- {tool.name}: Executes Python code. Use for calculations, data analysis, logic puzzles.\n"
-                    f"  **CRITICAL RULES:**\n"
-                    f"  1. ALWAYS use print() to output results\n"
-                    f"  2. Write simple, focused code (one task per execution)\n"
-                    f"  3. Add comments (#) to explain your logic\n"
-                    f"  Available: pandas as pd"
-                )
-            else:
-                desc = f"- {tool.name}: {tool.description}"
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
-        # ==================== SYSTEM PROMPT V5 ====================
         self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
 Your goal: Provide the EXACT answer in the EXACT format requested.
 **PROTOCOL:**
-1. **ANALYZE QUESTION:**
-   - What information is needed?
-   - What format should the answer be? (number, list, yes/no, name, etc.)
-   - Are there any files attached?
-2. **FIRST TURN - MAKE A PLAN:**
-   Your FIRST response MUST be a brief plan (2-3 sentences):
-   - What tools you'll use
-   - What order you'll use them
-   - What format the final answer should be
-   DO NOT call tools on your first turn!
-3. **EXECUTE:**
-   - Call ONE tool per turn
-   - Wait for the result before planning your next step
-   - For ANY calculation or logic: use code_interpreter with print()
-4. **VERIFY RESULTS:**
-   - Check if tool output contains errors
-   - If error: plan a different approach
-   - If success: decide if you need more info or have the answer
-5. **FINISH:**
-   When you have the answer from a tool output:
-   - Call final_answer_tool immediately
-   - Provide ONLY the exact answer (no explanations!)
 **CRITICAL RULES:**
-❌ NEVER guess or use training data for the final answer
-❌ NEVER call multiple tools in one turn
-❌ NEVER add explanations to final_answer_tool
-✅ ALWAYS use code_interpreter for calculations/logic
-✅ ALWAYS match the requested answer format exactly
-✅ ALWAYS base your answer on tool outputs, not memory
-**ANSWER FORMAT EXAMPLES:**
-- "What is 5+5?" → final_answer("10")
-- "List the colors" → final_answer("red, blue, green")
-- "Is it true?" → final_answer("Yes") or final_answer("No")
-- "What's the name?" → final_answer("John Smith")
 **TOOLS:**
 {tool_descriptions}
-**REMEMBER:** One tool per turn. Base everything on tool outputs. Match the format exactly.
 """
-        print("Initializing Groq LLM...")
         try:
-            chat_llm = ChatGroq(
-                temperature=0,  # Maximum determinism
                 groq_api_key=GROQ_API_KEY,
-                model_name="openai/gpt-oss-120b",  # Best reasoning model
                 max_tokens=4096,
                 timeout=60
             )
-            print("✅ Groq LLM initialized with llama-3.3-70b-versatile")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
-        self.llm_with_tools = chat_llm.bind_tools(self.tools)
-        print("✅ Tools bound to LLM")
-        # --- Agent Node ---
-# --- Agent Node (v3 - Simplified) ---
-        def agent_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
             print(f"\n{'='*60}")
-            print(f"AGENT TURN {current_turn}/{MAX_TURNS}")
             print('='*60)
-            messages_to_send = state["messages"]
-            # Retry logic with exponential backoff
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
-                    ai_message = self.llm_with_tools.invoke(messages_to_send)
                     break
                 except Exception as e:
-                    print(f"⚠️ LLM attempt {attempt+1}/{max_retries} failed: {e}")
                     if attempt == max_retries - 1:
-                        error_msg = AIMessage(
-                            content=f"Error: LLM failed after {max_retries} attempts: {str(e)}"
                         )
-                        return {"messages": [error_msg], "turn": current_turn}
-                    time.sleep(2 ** attempt)  # Exponential backoff
-            # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-            # --- ROBUST FALLBACK PARSING BLOCK ---
-            # (We still need this to catch malformed tool calls)
-            if not ai_message.tool_calls and isinstance(ai_message.content, str) and ai_message.content.strip():
-                content = ai_message.content
-                tool_name = None
-                tool_input = None
-                # 1. Try to parse <function(tool_name)>{json}</function>
-                func_match = re.search(
-                    r"<function\(([^)]+)\)>(\{.*?\})(?:</function>)?",
-                    content,
-                    re.DOTALL | re.IGNORECASE
-                )
-                if func_match:
-                    try:
-                        tool_name = func_match.group(1).strip()
-                        json_str = func_match.group(2)
-                        tool_input = json.loads(json_str)
-                        print(f"🔧 Fallback (Format 1): Parsed tool call for '{tool_name}'")
-                    except json.JSONDecodeError as e:
-                        print(f"⚠️ Fallback (Format 1): Failed to parse JSON: {e}")
-                        tool_name = None
-                # 2. If Format 1 failed, try to parse bare JSON
-                if not tool_name:
-                    json_match = re.search(
-                        r"```(?:json)?\s*(\{.*?\})\s*```|(\{.*?\})",
-                        content,
-                        re.DOTALL | re.IGNORECASE
-                    )
-                    if json_match:
-                        json_str = json_match.group(1) or json_match.group(2)
-                        try:
-                            parsed_json = json.loads(json_str)
-                            if isinstance(parsed_json, dict):
-                                if "tool" in parsed_json and "tool_input" in parsed_json:
-                                    tool_name = parsed_json.get("tool")
-                                    tool_input = parsed_json.get("tool_input", {})
-                                elif "code" in parsed_json:
-                                    tool_name = "code_interpreter"
-                                    tool_input = parsed_json
-                                elif "answer" in parsed_json:
-                                    tool_name = "final_answer_tool"
-                                    tool_input = parsed_json
-                                if tool_name:
-                                    print(f"🔧 Fallback (Format 2): Parsed tool call for '{tool_name}'")
-                        except json.JSONDecodeError as e:
-                             print(f"⚠️ Fallback (Format 2): Failed to parse JSON: {e}")
-                # --- If any fallback parser succeeded, build the tool call ---
-                if tool_name and tool_input is not None and any(t.name == tool_name for t in self.tools):
-                    print(f"🔧 Fallback SUCCESS: Rebuilding tool call for '{tool_name}'")
-                    tool_call = ToolCall(
-                        name=tool_name,
-                        args=tool_input,
-                        id=str(uuid.uuid4())
-                    )
-                    ai_message.tool_calls = [tool_call]
-                    ai_message.content = ""
-                elif not tool_name:
-                    # We still want to log if it's just dribbling text
-                    print(f"⚠️ Fallback FAILED: Could not parse any tool call from content:\n{content[:200]}...")
-            # --- END OF REPLACEMENT BLOCK ---
-            # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-            # --- Logging ---
             if ai_message.tool_calls:
-                for tc in ai_message.tool_calls:
-                    print(f"🔧 Tool Call: {tc.get('name')}")
-                    print(f" Args: {tc.get('args', {})}")
-            elif ai_message.content:
-                content_preview = ai_message.content[:300]
-                if len(ai_message.content) > 300:
-                    content_preview += "..."
-                print(f"💭 Agent Reasoning:\n{content_preview}")
-            return {"messages": [ai_message], "turn": current_turn}
-            # --- Tool Node ---
         tool_node = ToolNode(self.tools)
-            # --- Build Graph ---
-        print("Building agent graph...")
         graph_builder = StateGraph(AgentState)
-        graph_builder.add_node("agent", agent_node)
         graph_builder.add_node("tools", tool_node)
-        graph_builder.add_edge(START, "agent")
-        graph_builder.add_edge("tools", "agent")
         graph_builder.add_conditional_edges(
-            "agent",
-            should_continue,
             {
-                "tools": "tools",
-                "agent": "agent",
                 END: END
             }
         )
         self.graph = graph_builder.compile()
-        print("✅ Graph compiled successfully")
     def __call__(self, question: str) -> str:
         print(f"\n--- Starting Agent Run for Question ---")
         print(f"Agent received question (first 100 chars): {question[:100]}...")
-        # Initialize graph input with turn counter
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=question)
             ],
             "turn": 0
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
-            # Add config for recursion limit (LangGraph default is 25, but our turn limit is softer)
-            config = {"recursion_limit": MAX_TURNS + 5} # Allow slightly more graph steps than turns
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
                 last_message = event["messages"][-1]
-                # Check for final answer extraction
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
                     if last_message.tool_calls[0].get("name") == "final_answer_tool":
                         final_answer = last_message.tool_calls[0]['args'].get('answer', "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER")
                         print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
-                        # We can break here since the graph condition should lead to END anyway
                         break
-                # Log other message types (optional but helpful)
                 elif isinstance(last_message, ToolMessage):
                     print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
                 elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
-                    # This is now expected (the "plan" or "think" step)
-                    print(f"AI Message (Plan/Thought): {last_message.content[:500]}...")
-                    # Don't set final_answer here anymore, only final_answer_tool counts
-            # --- Cleaning step (Keep as is) ---
             cleaned_answer = str(final_answer).strip()
-            # ... (keep existing prefix removal and fence removal logic) ...
             prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
             original_cleaned = cleaned_answer
             for prefix in prefixes_to_remove:
                 if cleaned_answer.lower().startswith(prefix.lower()):
                     potential_answer = cleaned_answer[len(prefix):].strip()
                     if potential_answer: cleaned_answer = potential_answer; break
-            if cleaned_answer == original_cleaned and any(cleaned_answer.lower().startswith(p.lower()) for p in prefixes_to_remove):
-                print(f"Warning: Prefix found but not stripped: '{original_cleaned[:100]}...'")
-            # Simple fence removal
             cleaned_answer = remove_fences_simple(cleaned_answer)
             if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
-                cleaned_answer = cleaned_answer[1:-1].strip()
             print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
             return cleaned_answer
         except Exception as e:
             print(f"Error running agent graph: {e}")
             tb_str = traceback.format_exc()
             print(tb_str)
-            # Check if it was specifically our turn limit message
-            if isinstance(e, SystemMessage) and f"maximum turn limit ({MAX_TURNS})" in str(e.content):
-                return f"AGENT STOPPED: Reached maximum turn limit ({MAX_TURNS})."
             return f"AGENT GRAPH ERROR: {e}"
-        # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 # --- (Original Template Code - Mock Questions Version) ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -846,13 +766,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     username = profile.username if profile else "local_test_user"
     print(f"User: {username}{'' if profile else ' (dummy)'}")
-    submit_url = f"{DEFAULT_API_URL}/submit"
-    print("Instantiating agent...")
-    try:
-        agent = BasicAgent()
-        if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
-    except Exception as e: print(f"Error instantiating agent: {e}"); import traceback; traceback.print_exc(); return f"Error initializing agent: {e}", None
-    print("Agent instantiated successfully.")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
     print(f"Agent code URL: {agent_code}")
     print("--- USING MOCK QUESTIONS ---")
@@ -953,12 +872,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             file_path = item.get("file_path")
             question_text_with_context = question_text
             if file_path:
-                 base_dir = os.getcwd()
-                 potential_path = os.path.join(base_dir, file_path)
-                 file_context = f"[Attached File (provided): {file_path}]"
-                 if os.path.exists(potential_path): file_context = f"[Attached File (exists): {file_path}]"
-                 else: file_context = f"[Attached File (NOT FOUND): {file_path}]"
-                 question_text_with_context = f"{question_text}\n\n{file_context}"
                  print(f"Q includes file: {file_path}")
             submitted_answer = agent(question_text_with_context)
@@ -1010,4 +924,4 @@ if __name__ == "__main__":
     except FileNotFoundError: print("Warning: CWD listing failed.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface...")
-    demo.queue().launch(debug=True, share=False)

 from langchain_core.tools import tool
 from langchain_groq import ChatGroq
+# --- RAG Imports ---
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_TURNS = 20
+MAX_MESSAGE_LENGTH = 8000
 # --- Initialize ASR Pipeline ---
 asr_pipeline = None
     print(f"⚠️ Warning: Could not load ASR pipeline globally. Error: {e}")
     asr_pipeline = None
+# Global agent declaration for RAG tool access
+agent = None
 # ====================================================
 # --- Tool Definitions ---
     try:
         search = DuckDuckGoSearchRun()
         result = search.run(query)
         if len(result) > MAX_MESSAGE_LENGTH:
             result = result[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(result)} total chars]"
         return result
     except Exception as e:
         return f"Error running search for '{query}': {str(e)}"
         if pattern in code_lower:
             return f"Error: Potentially dangerous operation '{pattern}' is not allowed."
     if 'open(' in code_lower and any(mode in code for mode in ["'w'", '"w"', "'a'", '"a"', "'wb'", '"wb"']):
         return "Error: Writing files is not allowed in code_interpreter. Use write_file tool instead."
             return f"Error in execution:\n{stderr}\n\nStdout (if any):\n{stdout}"
         if stdout:
             if len(stdout) > MAX_MESSAGE_LENGTH:
                 stdout = stdout[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(stdout)} total chars]"
             return f"Success:\n{stdout}"
     except Exception as e:
         tb_str = traceback.format_exc()
+        return f"Execution failed:\n{tb_str}"
 @tool
         script_dir = os.getcwd()
         safe_path = os.path.normpath(path)
         paths_to_try = [
+            os.path.join(script_dir, safe_path),
+            safe_path,
+            os.path.join(os.getcwd(), os.path.basename(safe_path))
         ]
         full_path = None
                 break
         if not full_path:
+            cwd_files = os.listdir(".")
             return (f"Error: File not found: '{path}'\n"
+                    f"Tried paths:\n" + "\n".join(f"  - {p}" for p in paths_to_try) +
+                    f"\n\nFiles in current directory: {cwd_files}")
         print(f"Reading file: {full_path}")
         _, ext = os.path.splitext(full_path)
         try:
             with open(full_path, 'r', encoding='utf-8') as f:
                 content = f.read()
             if len(content) > MAX_MESSAGE_LENGTH:
                 content = content[:MAX_MESSAGE_LENGTH] + f"\n...[truncated, {len(content)} total chars]"
             return content
         except UnicodeDecodeError:
             try:
                 with open(full_path, 'rb') as f:
                     binary_content = f.read()
                 return f"File appears to be binary ({len(binary_content)} bytes). Cannot display as text.\nFile type: {ext}\nConsider using audio_transcription_tool for audio files."
             except Exception as bin_e:
                 return f"Error: Could not read file as text or binary: {str(bin_e)}"
         except Exception as read_e:
+            return f"Error reading file: {str(read_e)}"
     except Exception as e:
         return f"Unexpected error accessing file '{path}': {str(e)}"
         base_dir = os.getcwd()
         full_path = os.path.join(base_dir, path)
         dir_path = os.path.dirname(full_path)
         if dir_path:
             os.makedirs(dir_path, exist_ok=True)
         return f"Successfully wrote {len(content)} characters to '{path}'."
     except Exception as e:
+        return f"Error writing file '{path}': {str(e)}"
 @tool
         if not items:
             return f"Directory '{path}' is empty."
+        files, directories = [], []
         for item in sorted(items):
             item_path = os.path.join(full_path, item)
             if os.path.isdir(item_path):
         return result
     except Exception as e:
+        return f"Error listing directory '{path}': {str(e)}"
 @tool
         return "Error: ASR pipeline is not available. Audio transcription cannot be performed."
     try:
         script_dir = os.getcwd()
         safe_path = os.path.normpath(file_path)
         result_text = transcription.get("text", "")
         if not result_text:
+            return "Error: Transcription produced no text."
         if len(result_text) > MAX_MESSAGE_LENGTH:
+            result_text = result_text[:MAX_MESSAGE_LENGTH] + f"\n...[truncated]"
         return f"Transcription:\n{result_text}"
     except Exception as e:
+        return f"Error transcribing '{file_path}': {str(e)}"
 @tool
     print(f"--- Calling YouTube Transcript: {video_url} ---")
     try:
         video_id = None
         if "watch?v=" in video_url:
             video_id = video_url.split("v=")[1].split("&")[0]
         elif "youtu.be/" in video_url:
             video_id = video_url.split("youtu.be/")[1].split("?")[0]
         if not video_id:
+            return f"Error: Could not extract YouTube video ID from '{video_url}'."
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
         if not transcript_list:
+            return "Error: No transcript found for this video."
         full_transcript = " ".join([item["text"] for item in transcript_list])
         if len(full_transcript) > MAX_MESSAGE_LENGTH:
+            full_transcript = full_transcript[:MAX_MESSAGE_LENGTH] + f"\n...[truncated]"
         return f"YouTube Transcript:\n{full_transcript}"
     except Exception as e:
+        return f"Error getting transcript for '{video_url}': {str(e)}"
+# --- NEW RAG-BASED SCRAPER TOOL ---
 @tool
+def scrape_and_retrieve(url: str, query: str) -> str:
+    """
+    Scrapes a webpage, chunks its content, and performs a RAG (Retrieval-Augmented Generation)
+    search to find the most relevant information related to a query.
+    Use this to "ask a question" of a webpage.
+    Args:
+        url (str): The URL to scrape (must start with http:// or https://).
+        query (str): The specific question to answer or information to find on the page.
+    """
+    if not (url.lower().startswith(('http://', 'https://'))):
         return f"Error: Invalid URL. Must start with http:// or https://. Got: '{url}'"
+    if not query:
+        return "Error: A query is required to search the page content."
+    if not agent or not agent.embeddings or not agent.text_splitter:
+        return "Error: RAG components are not initialized. Cannot perform retrieval."
+    print(f"--- Calling RAG Scraper: {url} for query: {query} ---")
     try:
+        # 1. Scrape
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(url, headers=headers, timeout=20)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        for tag in soup(["script", "style", "nav", "footer", "aside", "header"]):
             tag.extract()
+        main_content = soup.find('main') or soup.find('article') or soup.body
         if not main_content:
+            return "Error: Could not find main content on the page."
         text = main_content.get_text(separator='\n', strip=True)
+        text = '\n'.join(chunk for chunk in (line.strip() for line in text.splitlines()) if chunk)
         if not text:
+            return "Error: Scraped content was empty."
+        # 2. Split
+        docs = agent.text_splitter.create_documents([text])
+        if not docs:
+            return "Error: Text could not be split into documents."
+        # 3. Embed & Create Vector Store
+        db = FAISS.from_documents(docs, agent.embeddings)
+        # 4. Retrieve
+        retriever = db.as_retriever(search_kwargs={"k": 5}) # Get top 5 chunks
+        retrieved_docs = retriever.invoke(query)
+        if not retrieved_docs:
+            return "Error: No relevant information found on the page for that query."
+        # 5. Format and Return
+        context = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])
+        return f"Relevant Context from {url} for query '{query}':\n\n{context}"
     except Exception as e:
         tb_str = traceback.format_exc()
+        return f"Error scraping or retrieving from {url}: {str(e)}\n{tb_str}"
 @tool
     """
     Call this tool ONLY when you have the final, definitive answer.
     The 'answer' must be EXACTLY what was asked for, with no extra text.
     """
     if not isinstance(answer, str):
         try:
     """Remove code fences from text."""
     original_text = text
     text = text.strip()
     if text.startswith("```") and text.endswith("```"):
         text = text[3:-3].strip()
         if '\n' in text:
             first_line, rest = text.split('\n', 1)
             if first_line.strip().replace('_','').isalnum() and len(first_line.strip()) < 15:
                 text = rest.strip()
         return text
     return original_text
     list_directory,
     audio_transcription_tool,
     get_youtube_transcript,
+    scrape_and_retrieve,  # Replaced scrape_web_page
     final_answer_tool
 ]
 # --- LangGraph Agent State ---
 class AgentState(TypedDict):
     messages: Annotated[List[AnyMessage], add_messages]
+    plan: List[str]  # A list of steps to execute
     turn: int
 # --- Conditional Edge Function ---
+def route_from_planner(state: AgentState):
+    """
+    Routes to the executor if a plan exists, or ends the graph if the plan is complete.
+    """
+    plan = state.get('plan', [])
+    if plan:
+        print("--- Condition: Plan has steps. Routing to executor. ---")
+        return "executor"
+    else:
+        print("--- Condition: Plan is empty. Ending. ---")
         return END
 # ====================================================
 # --- Basic Agent Class ---
 class BasicAgent:
     def __init__(self):
+        print("BasicAgent (Planner-Executor) initializing...")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         if not GROQ_API_KEY:
         self.tools = defined_tools
+        # --- Initialize RAG Components ---
+        print("Initializing RAG components...")
+        try:
+            self.embeddings = HuggingFaceEmbeddings(
+                model_name="sentence-transformers/all-MiniLM-L6-v2",
+                model_kwargs={'device': 'cpu'}
+            )
+            self.text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=1000,
+                chunk_overlap=200
+            )
+            print("✅ RAG components initialized.")
+        except Exception as e:
+            print(f"⚠️ Warning: Could not initialize RAG components. Error: {e}")
+            self.embeddings = None
+            self.text_splitter = None
         # Build tool descriptions
         tool_desc_list = []
         for tool in self.tools:
+            desc = f"- {tool.name}: {tool.description}"
             tool_desc_list.append(desc)
         tool_descriptions = "\n".join(tool_desc_list)
+        # ==================== SYSTEM PROMPT V7 (Simplified) ====================
         self.system_prompt = f"""You are a highly intelligent AI assistant for the GAIA benchmark.
 Your goal: Provide the EXACT answer in the EXACT format requested.
 **PROTOCOL:**
+1. **ANALYZE:** Read the question. What info is needed? What is the answer format?
+2. **ACT:** Call ONE tool to get information.
+3. **EVALUATE:** Look at the tool's output. Do you have the final answer?
+   - **If NO:** Go back to Step 2.
+   - **If YES:** Call final_answer_tool immediately.
 **CRITICAL RULES:**
+- **TOOL USE:** You MUST use tools to find the answer. Do NOT use your own knowledge.
+- **FINAL ANSWER:** When you have the answer, use final_answer_tool. The 'answer' argument must be the answer ONLY (e.g., "42", "red, blue, green").
+- **JSON FORMAT:** All tool calls MUST be in this exact JSON format:
+  {{ "name": "tool_name", "arguments": {{"key": "value"}} }}
+**EXAMPLE: CODE INTERPRETER**
+{{ "name": "code_interpreter", "arguments": {{"code": "print(1 + 1)"}} }}
+**EXAMPLE: FINAL ANSWER**
+{{ "name": "final_answer_tool", "arguments": {{"answer": "28"}} }}
+**EXAMPLE: RAG SCRAPER**
+{{ "name": "scrape_and_retrieve", "arguments": {{"url": "https://example.com", "query": "what is X?"}} }}
 **TOOLS:**
 {tool_descriptions}
+**REMEMBER:** Use tools. Format JSON correctly.
 """
+        print("Initializing Groq LLMs...")
         try:
+            # LLM 1: The Executor (binds to tools)
+            self.executor_llm = ChatGroq(
+                temperature=0,
                 groq_api_key=GROQ_API_KEY,
+                model_name="llama-3.3-70b-versatile",
+                max_tokens=4096,
+                timeout=60
+            ).bind_tools(self.tools)
+            print("✅ Executor LLM (with tools) initialized.")
+            # LLM 2: The Planner (no tools, just reasoning)
+            self.planner_llm = ChatGroq(
+                temperature=0,
+                groq_api_key=GROQ_API_KEY,
+                model_name="llama-3.3-70b-versatile",
                 max_tokens=4096,
                 timeout=60
             )
+            print("✅ Planner LLM (no tools) initialized.")
         except Exception as e:
             print(f"❌ Error initializing Groq: {e}")
             raise
+        # --- Define Planner Prompt ---
+        self.planner_prompt = f"""You are a master planner. Your job is to create a step-by-step plan
+to solve the user's request. You will be given the user's question and a history of
+all executed steps and their results.
+Your system prompt (which you must obey) is:
+{self.system_prompt}
+Review the chat history.
+- If the last message was a tool result, analyze it.
+- If the original goal is not yet met, create an updated, numbered list of the *next* steps.
+- If the goal IS met, or if the last tool call (like final_answer_tool)
+  achieved the goal, you must respond with an empty plan list: []
+**CRITICAL:**
+- Your plan should be a Python list of strings: ["Step 1", "Step 2"]
+- If the user's request is simple (e.g., "What is 2+2?"), your plan might be a single step.
+- If the goal is complete, return an empty list: []
+Current Chat History:
+[HISTORY]
+"""
+        # --- Node 1: The Planner ---
+        def planner_node(state: AgentState):
             current_turn = state.get('turn', 0) + 1
             print(f"\n{'='*60}")
+            print(f"PLANNER TURN {current_turn}/{MAX_TURNS}")
             print('='*60)
+            if current_turn > MAX_TURNS:
+                print("--- Condition: Max turns reached. Ending. ---")
+                return {"plan": []}
+            # Format history for the prompt
+            history_str = "\n".join([msg.pretty_repr() for msg in state['messages']])
+            prompt = self.planner_prompt.replace("[HISTORY]", history_str)
+            # Planner just generates text (the plan)
+            plan_str = self.planner_llm.invoke(prompt).content
+            # Try to parse the plan string into a list
+            try:
+                match = re.search(r"(\[.*?\])", plan_str, re.DOTALL)
+                if match:
+                    plan_list = json.loads(match.group(1))
+                else:
+                    plan_list = []
+                if not isinstance(plan_list, list):
+                    plan_list = []
+            except Exception as e:
+                print(f"⚠️ Planner Error: Could not parse plan. Defaulting to empty plan. Error: {e}")
+                print(f"Raw plan string: {plan_str}")
+                plan_list = []
+            print(f"📋 Plan Generated: {plan_list}")
+            return {"plan": plan_list, "turn": current_turn}
+        # --- Node 2: The Executor ---
+        def executor_node(state: AgentState):
+            print(f"\n--- EXECUTOR ---")
+            plan = state['plan']
+            current_step = plan[0]
+            remaining_plan = plan[1:]
+            print(f"Executing Step: {current_step}")
+            executor_messages = state['messages'] + [
+                HumanMessage(
+                    content=f"My current task is to: {current_step}\n\n"
+                            "Based on this task and the chat history, "
+                            "call the ONE most appropriate tool."
+                )
+            ]
             max_retries = 3
             ai_message = None
             for attempt in range(max_retries):
                 try:
+                    # Executor calls the tool-bound LLM
+                    ai_message = self.executor_llm.invoke(executor_messages)
                     break
                 except Exception as e:
+                    print(f"⚠️ Executor LLM attempt {attempt+1}/{max_retries} failed: {e}")
                     if attempt == max_retries - 1:
+                        ai_message = AIMessage(
+                            content=f"Error: Executor LLM failed: {e}"
                         )
+                    time.sleep(2 ** attempt)
             if ai_message.tool_calls:
+                 print(f"🔧 Executor Tool Call: {ai_message.tool_calls[0]['name']}")
+            else:
+                print("⚠️ Executor: No tool call. Passing reasoning to planner.")
+            return {"messages": [ai_message], "plan": remaining_plan}
+        # --- Tool Node ---
         tool_node = ToolNode(self.tools)
+        # --- Build Graph ---
+        print("Building Planner-Executor graph...")
         graph_builder = StateGraph(AgentState)
+        graph_builder.add_node("planner", planner_node)
+        graph_builder.add_node("executor", executor_node)
         graph_builder.add_node("tools", tool_node)
+        graph_builder.add_edge(START, "planner")
         graph_builder.add_conditional_edges(
+            "planner",
+            route_from_planner,
             {
+                "executor": "executor",
                 END: END
             }
         )
+        graph_builder.add_edge("executor", "tools")
+        graph_builder.add_edge("tools", "planner") # Loop back to planner
         self.graph = graph_builder.compile()
+        print("✅ Planner-Executor graph compiled successfully.")
     def __call__(self, question: str) -> str:
         print(f"\n--- Starting Agent Run for Question ---")
         print(f"Agent received question (first 100 chars): {question[:100]}...")
+        # Initialize graph input
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=question)
             ],
+            "plan": [], # Start with an empty plan
             "turn": 0
         }
         final_answer = "AGENT FAILED TO PRODUCE ANSWER"
         try:
+            config = {"recursion_limit": MAX_TURNS + 5}
             for event in self.graph.stream(graph_input, stream_mode="values", config=config):
                 last_message = event["messages"][-1]
                 if isinstance(last_message, AIMessage) and last_message.tool_calls:
                     if last_message.tool_calls[0].get("name") == "final_answer_tool":
                         final_answer = last_message.tool_calls[0]['args'].get('answer', "ERROR: FINAL_ANSWER_TOOL CALLED WITHOUT ANSWER")
                         print(f"--- Final Answer Captured from tool call: '{final_answer}' ---")
                         break
                 elif isinstance(last_message, ToolMessage):
                     print(f"Tool Result ({last_message.tool_call_id}): {last_message.content[:500]}...")
                 elif isinstance(last_message, AIMessage) and not last_message.tool_calls:
+                    print(f"AI Message (Executor): {last_message.content[:500]}...")
             cleaned_answer = str(final_answer).strip()
             prefixes_to_remove = ["The answer is:", "Here is the answer:", "Based on the information:", "Final Answer:", "Answer:"]
             original_cleaned = cleaned_answer
             for prefix in prefixes_to_remove:
                 if cleaned_answer.lower().startswith(prefix.lower()):
                     potential_answer = cleaned_answer[len(prefix):].strip()
                     if potential_answer: cleaned_answer = potential_answer; break
             cleaned_answer = remove_fences_simple(cleaned_answer)
             if cleaned_answer.startswith("`") and cleaned_answer.endswith("`"):
+                    cleaned_answer = cleaned_answer[1:-1].strip()
             print(f"Agent returning final answer (cleaned): '{cleaned_answer}'")
             return cleaned_answer
         except Exception as e:
             print(f"Error running agent graph: {e}")
             tb_str = traceback.format_exc()
             print(tb_str)
             return f"AGENT GRAPH ERROR: {e}"
+# ====================================================
+# --- Global Agent Instantiation ---
+try:
+    agent = BasicAgent()
+    print("��� Global BasicAgent instantiated successfully.")
+    if asr_pipeline is None: print("⚠️ Global ASR Pipeline failed load.")
+except Exception as e:
+    print(f"❌ FATAL: Could not instantiate global agent: {e}")
+    traceback.print_exc()
+    agent = None
+# ====================================================
 # --- (Original Template Code - Mock Questions Version) ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     space_id = os.getenv("SPACE_ID")
     username = profile.username if profile else "local_test_user"
     print(f"User: {username}{'' if profile else ' (dummy)'}")
+    # Check if global agent initialized
+    if not agent:
+        return "FATAL ERROR: Global agent failed to initialize. Check logs.", None
+    print("Using globally instantiated agent.")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run"
     print(f"Agent code URL: {agent_code}")
     print("--- USING MOCK QUESTIONS ---")
             file_path = item.get("file_path")
             question_text_with_context = question_text
             if file_path:
+                 question_text_with_context = f"{question_text}\n\n[Attached File: {file_path}]"
                  print(f"Q includes file: {file_path}")
             submitted_answer = agent(question_text_with_context)
     except FileNotFoundError: print("Warning: CWD listing failed.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface...")
+    demo.queue().launch(debug=True, share=False)