Final_Assignment_Template

Runtime error

App Files Files Community

rqueraud commited on Sep 30, 2025

Commit

4d5f444

1 Parent(s): 81917a3

Before refactoring tools

Browse files

Files changed (13) hide show

.gitignore +10 -0
AGENTS.md +6 -0
README.md +13 -1
app.py +7 -17
app_raw.py +196 -0
poetry.lock +0 -0
pyproject.toml +39 -0
requirements.txt +0 -2
src/__init__.py +1 -0
src/flexible_agent.py +586 -0
src/gaio.py +85 -0
src/gaio_chat_model.py +319 -0
src/tools.py +272 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+.cursor/
+env/
+.env
+gemini-sa-key.json
+questions/
+questions_eval/
+.envrc
+.vscode/
+.cursor/
+__pycache__/

AGENTS.md ADDED Viewed

	@@ -0,0 +1,6 @@

+# Project Instructions
+This project aims at implementing a custom agent to answer a list of question.
+A raw file has been given, which is the app_raw.py. You should create a app.py which enrich the app_raw.py by creating the working agent.
+All python execution you do should be prepended by `direnv exec . poetry run`

README.md CHANGED Viewed

@@ -12,4 +12,16 @@ hf_oauth: true
 hf_oauth_expiration_minutes: 480
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 hf_oauth_expiration_minutes: 480
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Getting started
+```bash
+python3 -m venv env
+source env/bin/activate
+pip install -r requirements.txt
+python app.py
+```
+TODO :
+* If a url is provided, it's not an attachement, in needs to classify wether it's a web search (HTML) or a youtube video for example.

app.py CHANGED Viewed

@@ -1,24 +1,13 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -39,8 +28,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -80,7 +70,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -142,19 +132,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )

 import os
 import gradio as gr
 import requests
 import pandas as pd
+from src.flexible_agent import FlexibleAgent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
+    # Note: Langfuse tracing is automatically configured in FlexibleAgent when environment variables are set
     try:
+        agent = FlexibleAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, task_id=task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Flexible Tool-Based Agent")
     gr.Markdown(
         """
         **Instructions:**
+        1.  This flexible agent intelligently chooses from multiple tools to answer complex questions
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This flexible agent uses LLM-powered tool selection for optimal question handling.
         """
     )

app_raw.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class BasicAgent:
+    def __init__(self):
+        print("BasicAgent initialized.")
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        fixed_answer = "This is a default answer."
+        print(f"Agent returning fixed answer: {fixed_answer}")
+        return fixed_answer
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    if profile:
+        username= f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,39 @@

+[tool.poetry]
+name = "final-assignment-template"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <you@example.com>"]
+readme = "README.md"
+package-mode = false
+[tool.poetry.dependencies]
+python = ">=3.11,<3.12"
+gradio = "*"
+requests = "*"
+langgraph = "*"
+langchain_openai = "*"
+langchain_huggingface = "*"
+langchain_community = "*"
+langchain_google_genai = "*"
+wikipedia = "*"
+youtube-search-python = "*"
+pillow = "*"
+langchain_experimental = "*"
+langchain-tavily = ">=0.2.11,<0.3.0"
+langchain-anthropic = ">=0.3.20,<0.4.0"
+openai-whisper = "*"
+speechrecognition = "*"
+pydub = "*"
+python-magic = "*"
+librosa = "*"
+rizaio = "*"
+langfuse = "*"
+langchain = "*"
+tesseract = ">=0.1.3,<0.2.0"
+unstructured = {extras = ["all-docs"], version = "*"}
+langchain-google-community = "^2.0.10"
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

requirements.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- gradio
2	- requests

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Core modules for the Final Assignment Template."""

src/flexible_agent.py ADDED Viewed

	@@ -0,0 +1,586 @@

+import os
+import shutil
+from typing import TypedDict, Optional, List, Annotated
+from datetime import datetime
+from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
+from langgraph.graph import START, END, StateGraph
+from langgraph.graph.message import add_messages
+from langgraph.prebuilt import ToolNode, tools_condition
+from langfuse.langchain import CallbackHandler
+try:
+    # Try relative imports first (when used as package)
+    from .tools import (
+        wikipedia_search, youtube_search, decode_text,
+        download_and_process_file, web_search
+    )
+except ImportError:
+    # Fall back to absolute imports (when run directly)
+    from tools import (
+        wikipedia_search, youtube_search, decode_text,
+        download_and_process_file, web_search
+    )
+from langchain_google_genai import ChatGoogleGenerativeAI
+# --- Agent State following LangGraph pattern ---
+class AgentState(TypedDict):
+    # The original question from the user
+    question: str
+    # Task ID for file downloads
+    task_id: Optional[str]
+    # File classification results
+    requires_file: Optional[bool]
+    # File content if downloaded and processed
+    file_content: Optional[str]
+    # Search attempt counter to prevent infinite loops
+    search_attempts: int
+    # Final answer
+    final_answer: Optional[str]
+    # Messages for LLM interactions (for logging)
+    messages: Annotated[List[BaseMessage], add_messages]
+# --- Flexible Tool-Based Agent ---
+class FlexibleAgent:
+    def __init__(self):
+        # Initialize Gemini chat model for LangChain integration
+        self.chat = ChatGoogleGenerativeAI(
+            # google_api_key=os.getenv("GEMINI_API_KEY"),
+            # model="gemini-2.0-flash-lite",
+            model="gemini-2.5-flash-lite",
+            temperature=0.0,
+            max_tokens=None
+        )
+        # Define available tools (excluding file detection - now handled by graph nodes)
+        self.tools = [
+            wikipedia_search, youtube_search, decode_text, web_search
+        ]
+        # Bind tools to the LLM
+        self.chat_with_tools = self.chat.bind_tools(self.tools)
+        # Initialize Langfuse CallbackHandler for tracing
+        try:
+            self.langfuse_handler = CallbackHandler()
+            print("✅ Langfuse CallbackHandler initialized successfully")
+        except Exception as e:
+            print(f"⚠️  Warning: Could not initialize Langfuse CallbackHandler: {e}")
+            print("   Continuing without Langfuse tracing...")
+            self.langfuse_handler = None
+        # Create questions directory for logging
+        self.questions_dir = "questions"
+        # Clear previous question files
+        if os.path.exists(self.questions_dir):
+            shutil.rmtree(self.questions_dir)
+        os.makedirs(self.questions_dir, exist_ok=True)
+        self.question_counter = 0
+        # Build the graph following LangGraph pattern
+        self._build_graph()
+        print("FlexibleAgent initialized with Gemini LLM and LangGraph workflow.")
+    def log_full_conversation(self, question: str, final_state: dict, answer: str):
+        """Log the complete conversation including all tool calls and LLM interactions"""
+        self.question_counter += 1
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"question_{self.question_counter:03d}_{timestamp}.txt"
+        filepath = os.path.join(self.questions_dir, filename)
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write(f"Question #{self.question_counter}\n")
+            f.write(f"Timestamp: {datetime.now().isoformat()}\n")
+            f.write(f"Question: {question}\n")
+            f.write("="*60 + "\n")
+            f.write("FULL CONVERSATION TRACE:\n")
+            f.write("="*60 + "\n\n")
+            # Log all messages in the conversation
+            messages = final_state.get("messages", [])
+            for i, message in enumerate(messages):
+                f.write(f"--- Message {i+1}: {type(message).__name__} ---\n")
+                f.write(f"Content: {message.content}\n")
+                # If it's an AI message with tool calls, log the tool calls
+                if hasattr(message, 'tool_calls') and message.tool_calls:
+                    f.write(f"Tool Calls: {len(message.tool_calls)}\n")
+                    for j, tool_call in enumerate(message.tool_calls):
+                        # Handle both dict and object formats
+                        if hasattr(tool_call, 'name'):
+                            f.write(f"  Tool {j+1}: {tool_call.name}\n")
+                            f.write(f"  Arguments: {tool_call.args}\n")
+                            f.write(f"  ID: {tool_call.id}\n")
+                        elif isinstance(tool_call, dict):
+                            f.write(f"  Tool {j+1}: {tool_call.get('name', 'unknown')}\n")
+                            f.write(f"  Arguments: {tool_call.get('args', {})}\n")
+                            f.write(f"  ID: {tool_call.get('id', 'unknown')}\n")
+                        else:
+                            f.write(f"  Tool {j+1}: {str(tool_call)}\n")
+                # If it's a tool message, show which tool it came from
+                if hasattr(message, 'tool_call_id'):
+                    f.write(f"Tool Call ID: {message.tool_call_id}\n")
+                f.write("\n")
+            f.write("="*60 + "\n")
+            f.write(f"FINAL ANSWER: {answer}\n")
+            f.write("="*60 + "\n")
+        print(f"Logged full conversation to: {filename}")
+    def classify_file_requirement(self, state: AgentState):
+        """LLM-based classification of whether the question requires a file attachment"""
+        question = state["question"]
+        # For the first message, include the question
+        if not state.get("messages"):
+            # Initial message with question
+            first_message = HumanMessage(content=question)
+            # Classification prompt - no need to repeat the question
+            classification_prompt = """
+            Analyze the question above and determine if it requires accessing an attached file.
+            Determine if the question mentions attached files (like "I've attached", "attached as", "see attached", etc.)
+            If the question requires a file, answer "yes". If not, answer "no".
+            If a url is provided, answer "no".
+            """
+            # Call the LLM with both messages
+            messages = [first_message, HumanMessage(content=classification_prompt)]
+            response = self.chat.invoke(messages)
+            # Update messages for tracking
+            new_messages = [first_message, HumanMessage(content=classification_prompt), response]
+        else:
+            # Subsequent call - messages already exist
+            classification_prompt = """
+            Analyze the question and determine if it requires accessing an attached file.
+            If the question requires a file, answer "yes". If not, answer "no".
+            If a url is provided, answer "no".
+            """
+            # Call the LLM
+            messages = state["messages"] + [HumanMessage(content=classification_prompt)]
+            response = self.chat.invoke(messages)
+            # Update messages for tracking
+            new_messages = state.get("messages", []) + [
+                HumanMessage(content=classification_prompt),
+                response
+            ]
+        # Parse the response to determine if file is required
+        response_text = response.content.lower()
+        requires_file = response_text == "yes"
+        # Return state updates
+        return {
+            "requires_file": requires_file,
+            "messages": new_messages
+        }
+    def download_file_content(self, state: AgentState):
+        """Download and process the file content"""
+        task_id = state["task_id"]
+        if not task_id:
+            error_msg = "Error: No task_id provided for file download"
+            # Add error message to conversation
+            new_messages = state.get("messages", []) + [
+                HumanMessage(content=error_msg)
+            ]
+            return {
+                "file_content": error_msg,
+                "messages": new_messages
+            }
+        try:
+            # Use the download tool (but call it directly instead of as a tool)
+            file_result = download_and_process_file(task_id)
+            # Add file content to conversation without repeating the question
+            file_message = f"File Content:\n{file_result}"
+            new_messages = state.get("messages", []) + [
+                HumanMessage(content=file_message)
+            ]
+            return {
+                "file_content": file_result,
+                "messages": new_messages
+            }
+        except Exception as e:
+            error_msg = f"Error downloading file: {str(e)}"
+            new_messages = state.get("messages", []) + [
+                HumanMessage(content=error_msg)
+            ]
+            return {
+                "file_content": error_msg,
+                "messages": new_messages
+            }
+    def answer_with_tools(self, state: AgentState):
+        """Use tools to answer the question (with or without file content)"""
+        # Increment search attempts
+        search_attempts = state.get("search_attempts", 0) + 1
+        # Create system prompt for tool usage - question is already in conversation
+        system_prompt = f"""
+Use your tools to answer the question above.
+"""
+        # Use existing conversation context
+        messages = state.get("messages", []) + [HumanMessage(content=system_prompt)]
+        # Let the LLM decide what tools to use
+        response = self.chat_with_tools.invoke(messages)
+        # Update messages for tracking
+        new_messages = state.get("messages", []) + [
+            HumanMessage(content=system_prompt),
+            response
+        ]
+        return {"messages": new_messages, "search_attempts": search_attempts}
+    def plan_approach(self, state: AgentState):
+        """Decide whether to use tools or answer directly"""
+        # Create system prompt for decision making - no need to repeat the question
+        planning_prompt = """Now you need to decide how to answer the question above.
+Should you use tools to answer this question? Respond with ONLY "tools" or "direct":
+- ALWAYS use "tools" if:
+  * The user explicitly mentions "search", "Wikipedia", "YouTube", or any tool name
+  * The question asks about factual information that would benefit from Wikipedia search
+  * The question mentions YouTube videos or asks about video content
+  * The question provides image URLs to analyze
+  * The question involves encoded/backwards text
+  * The user specifically requests using external sources
+- Use "direct" if:
+  * It's a simple math calculation AND no search is requested
+  * It's a general knowledge question you can answer confidently AND no search is requested
+  * It's asking for an opinion or creative content
+  * No tools would significantly improve the answer AND no search is requested
+"""
+        # Get LLM decision using existing conversation context
+        messages = state.get("messages", []) + [HumanMessage(content=planning_prompt)]
+        response = self.chat.invoke(messages)
+        # Update messages for tracking
+        new_messages = state.get("messages", []) + [
+            HumanMessage(content=planning_prompt),
+            response
+        ]
+        return {"messages": new_messages}
+    def answer_directly(self, state: AgentState):
+        """Answer the question directly without tools"""
+        # Create system prompt - question is already in conversation
+        system_prompt = "You are a helpful assistant. Answer the question above directly and accurately."
+        # Use existing conversation context
+        messages = state.get("messages", []) + [AIMessage(content=system_prompt)]
+        # Get response
+        response = self.chat.invoke(messages)
+        # Update messages for tracking
+        new_messages = state.get("messages", []) + [
+            AIMessage(content=system_prompt),
+            response
+        ]
+        return {"messages": new_messages}
+    def provide_final_answer(self, state: AgentState):
+        """Provide a final answer based on tool results, or request more searches if needed"""
+        search_attempts = state.get("search_attempts", 0)
+        # If we've reached the search limit, force a final answer
+        if search_attempts >= 5:
+            final_prompt = """You have reached the maximum number of search attempts (5).
+Based on all the information gathered in this conversation, provide the best possible answer to the original question.
+If you could not find the specific information requested, clearly state that the information could not be found."""
+            # Use regular chat (without tools) to force a final answer
+            messages = state.get("messages", []) + [HumanMessage(content=final_prompt)]
+            response = self.chat.invoke(messages)
+            new_messages = state.get("messages", []) + [
+                HumanMessage(content=final_prompt),
+                response
+            ]
+            return {"messages": new_messages}
+        else:
+            # Allow more searches if under the limit
+            final_prompt = f"""Based on the conversation above and any tool results, either:
+1. Provide a clear and direct answer to the original question if you have enough information, OR
+2. Use additional tools to search for missing information
+SEARCH ATTEMPTS: {search_attempts}/5 (Maximum 5 attempts)
+SEARCH STRATEGY FOR COMPLEX QUESTIONS:
+- If you couldn't find information with one search, try breaking it down:
+  * For questions about actors in different shows, search each show/movie separately
+  * For questions about adaptations, search for the original work first, then the adaptation
+  * Use simpler, more specific search terms
+  * Try different keyword combinations if first search fails
+CURRENT SITUATION:
+- Review what searches you've already tried
+- If previous searches failed, try different, simpler search terms
+- Break complex questions into their component parts and search each separately
+If you need more information, use the tools. If you have enough information, provide the final answer."""
+            # Use the chat with tools so it can decide to search more
+            messages = state.get("messages", []) + [HumanMessage(content=final_prompt)]
+            response = self.chat_with_tools.invoke(messages)
+            # Update messages for tracking
+            new_messages = state.get("messages", []) + [
+                HumanMessage(content=final_prompt),
+                response
+            ]
+            return {"messages": new_messages}
+    def route_after_classification(self, state: AgentState) -> str:
+        """Determine the next step based on file requirement classification"""
+        if state["requires_file"]:
+            return "file_required"
+        else:
+            return "no_file_required"
+    def route_after_planning(self, state: AgentState) -> str:
+        """Determine whether to use tools or answer directly based on LLM decision"""
+        messages = state.get("messages", [])
+        # Get the last AI message (the planning decision)
+        for msg in reversed(messages):
+            if isinstance(msg, AIMessage):
+                decision = msg.content.lower().strip()
+                if "tools" in decision:
+                    return "use_tools"
+                elif "direct" in decision:
+                    return "answer_direct"
+                break
+        # Default to direct if unclear
+        return "answer_direct"
+    def extract_final_answer(self, state: AgentState):
+        """Extract ONLY the final answer from the conversation"""
+        # Create a dedicated extraction prompt that looks at the entire conversation
+        extraction_prompt = """Look at the entire conversation above and extract ONLY the final answer to the original question.
+Return just the answer with no extra words, explanations, or formatting.
+If the answer is a number, write it in digits.
+Examples:
+- If the conversation concludes "The capital is Paris", return: Paris
+- If the conversation concludes "2 + 2 equals 4", return: 4
+- If the conversation concludes "The opposite of left is right", return: right
+- If the conversation concludes "Based on search results, the answer is 42", return: 42
+Final answer only:"""
+        try:
+            # Use the full conversation context for extraction
+            messages = state["messages"] + [HumanMessage(content=extraction_prompt)]
+            response = self.chat.invoke(messages)
+            answer = response.content.strip()
+            return answer
+        except Exception as e:
+            print(f"Answer extraction error: {e}")
+            # Fallback: get the last AI message content
+            messages = state["messages"]
+            for msg in reversed(messages):
+                if isinstance(msg, AIMessage) and not getattr(msg, 'tool_calls', None):
+                    return msg.content.strip()
+            return "No answer found"
+    def _build_graph(self):
+        """Build the LangGraph workflow with proper planning approach"""
+        graph = StateGraph(AgentState)
+        # Add nodes
+        graph.add_node("classify_file_requirement", self.classify_file_requirement)
+        graph.add_node("download_file_content", self.download_file_content)
+        graph.add_node("plan_approach", self.plan_approach)
+        graph.add_node("answer_with_tools", self.answer_with_tools)
+        graph.add_node("answer_directly", self.answer_directly)
+        graph.add_node("tools", ToolNode(self.tools))
+        # Define the flow - Start with file classification
+        graph.add_edge(START, "classify_file_requirement")
+        # Add conditional branching after classification
+        graph.add_conditional_edges(
+            "classify_file_requirement",
+            self.route_after_classification,
+            {
+                "file_required": "download_file_content",
+                "no_file_required": "plan_approach"
+            }
+        )
+        # After downloading file, plan the approach
+        graph.add_edge("download_file_content", "plan_approach")
+        # After planning, decide whether to use tools or answer directly
+        graph.add_conditional_edges(
+            "plan_approach",
+            self.route_after_planning,
+            {
+                "use_tools": "answer_with_tools",
+                "answer_direct": "answer_directly"
+            }
+        )
+        # From answer_with_tools, either use tools or end
+        graph.add_conditional_edges(
+            "answer_with_tools",
+            tools_condition,
+            {
+                "tools": "tools",
+                END: END,
+            }
+        )
+        # From answer_directly, just end (no tool checking after direct answer)
+        graph.add_edge("answer_directly", END)
+        # After tools, check if more tools are needed or provide final answer
+        graph.add_node("provide_final_answer", self.provide_final_answer)
+        graph.add_conditional_edges(
+            "tools",
+            tools_condition,
+            {
+                "tools": "tools",  # Allow multiple tool cycles
+                END: "provide_final_answer",
+            }
+        )
+        # Allow provide_final_answer to also use more tools if needed
+        graph.add_conditional_edges(
+            "provide_final_answer",
+            tools_condition,
+            {
+                "tools": "tools",  # Can go back to tools for more searches
+                END: END,
+            }
+        )
+        # Compile the graph
+        self.compiled_graph = graph.compile()
+        # self.compiled_graph.get_graph().draw_mermaid_png()
+    def __call__(self, question: str, task_id: Optional[str] = None) -> str:
+        """Process question using LangGraph workflow"""
+        print(f"Processing: {question[:50]}...")
+        # Create initial state following the new structure
+        initial_state = {
+            "question": question,
+            "task_id": task_id,
+            "requires_file": None,
+            "file_content": None,
+            "search_attempts": 0,
+            "final_answer": None,
+            "messages": []
+        }
+        try:
+            # Run the graph with recursion limit configuration and Langfuse tracing
+            config = {"recursion_limit": 25}  # Higher limit for multiple tool usage
+            # Add Langfuse callback handler if available
+            if self.langfuse_handler:
+                config["callbacks"] = [self.langfuse_handler]
+                print("🔌 Running with Langfuse tracing enabled")
+            result = self.compiled_graph.invoke(initial_state, config=config)
+            # Extract the final answer
+            answer = self.extract_final_answer(result)
+            print(f"Answer: {answer[:50]}...")
+            # Log the complete conversation for review
+            self.log_full_conversation(question, result, answer)
+            return answer
+        except Exception as e:
+            print(f"Error: {e}")
+            error_answer = "Error occurred."
+            # Create a minimal state for error logging
+            error_state = {
+                "question": question,
+                "messages": [
+                    HumanMessage(content=question),
+                    AIMessage(content=f"Error: {str(e)}")
+                ]
+            }
+            self.log_full_conversation(question, error_state, error_answer)
+            return error_answer
+if __name__ == "__main__":
+    print("Testing FlexibleAgent with a simple question...")
+    try:
+        # Create an instance of the agent
+        agent = FlexibleAgent()
+        # Test with a simple math question
+        test_question = "How much is 2+2?"
+        print(f"\nQuestion: {test_question}")
+        # Get the answer
+        answer = agent(test_question)
+        print(f"Answer: {answer}")
+        # Check if the answer is correct
+        if answer == "4":
+            print("✅ Test passed! The agent correctly answered the math question.")
+        else:
+            print("❌ Test failed. Expected the answer to be '4'.")
+        answer = agent("What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?")
+        print(f"Answer: {answer}")
+        if answer == "Louvrier":
+            print("✅ Test passed! The agent correctly answered the question.")
+        else:
+            print("❌ Test failed. Expected the answer to contain 'Louvrier'.")
+    except Exception as e:
+        import traceback
+        print(f"❌ Test failed with error: {e}")
+        print("Full traceback:")
+        traceback.print_exc()

src/gaio.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# DEPRECATED: This file has been replaced by gemini_chat_model.py
+# Please use GeminiChatModel instead of Gaio for LLM integration
+import os
+import requests
+class Gaio:
+    def __init__(self, api_key, api_url):
+        self.api_key = api_key
+        self.api_url = api_url
+    def InvokeGaio(self, userPrompt):
+        payload = {
+            "model": "azure/gemini-2.5-pro",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": userPrompt
+                }
+            ],
+            "temperature": 0.00,
+            "max_tokens": 100000,
+            "stream": False
+        }
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+        # Make the POST request
+        response = requests.post(
+            self.api_url,
+            headers=headers,
+            json=payload,
+            timeout=30
+        )
+        # Parse the JSON response
+        result = response.json()
+        message = result["choices"][0]["message"]["content"]
+        return message
+def main():
+    """Test Gaio with a simple question and verify the answer."""
+    print("Testing Gaio with a simple math question...")
+    # Get API credentials from environment variables
+    api_key = os.getenv("GAIO_API_TOKEN")
+    api_url = os.getenv("GAIO_URL")
+    if not api_key or not api_url:
+        print("❌ Test failed: Missing environment variables.")
+        print("Please set the following environment variables:")
+        print("- GAIO_API_TOKEN: Your API token")
+        print("- GAIO_URL: The API URL")
+        return
+    try:
+        # Create Gaio instance
+        gaio = Gaio(api_key, api_url)
+        # Test with the specific question
+        test_question = "How much is 2 + 2 ? Only answer with the response number and nothing else."
+        print(f"\nQuestion: {test_question}")
+        # Get the answer
+        answer = gaio.InvokeGaio(test_question)
+        print(f"Answer: '{answer}'")
+        # Check if the answer is exactly "4"
+        answer_stripped = answer.strip()
+        if answer_stripped == "4":
+            print("✅ Test passed! The answer is exactly '4'.")
+        else:
+            print(f"❌ Test failed. Expected '4', but got '{answer_stripped}'.")
+    except Exception as e:
+        print(f"❌ Test failed with error: {e}")
+if __name__ == "__main__":
+    main()

src/gaio_chat_model.py ADDED Viewed

	@@ -0,0 +1,319 @@

+# DEPRECATED: This file has been replaced by gemini_chat_model.py
+# Please use GeminiChatModel instead of GaioChatModel for LLM integration
+import os
+import json
+import re
+from typing import Any, Dict, Iterator, List, Optional
+from pydantic import Field, SecretStr
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage, SystemMessage, ToolMessage
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.messages.tool import ToolCall
+try:
+    # Try relative import first (when used as package)
+    from .gaio import Gaio
+except ImportError:
+    # Fall back to absolute import (when run directly)
+    from gaio import Gaio
+class GaioChatModel(BaseChatModel):
+    """Custom LangChain chat model wrapper for Gaio API.
+    This model integrates with the Gaio API service to provide chat completion
+    capabilities within the LangChain framework.
+    Example:
+        ```python
+        model = GaioChatModel(
+            api_key="your-api-key",
+            api_url="https://your-gaio-endpoint.com/chat/completions"
+        )
+        response = model.invoke([HumanMessage(content="Hello!")])
+        ```
+    """
+    api_key: SecretStr = Field(description="API key for Gaio service")
+    api_url: str = Field(description="API endpoint URL for Gaio service")
+    model_name: str = Field(default="azure/gpt-4o", description="Name of the model to use")
+    temperature: float = Field(default=0.05, ge=0.0, le=2.0, description="Sampling temperature")
+    max_tokens: int = Field(default=1000, gt=0, description="Maximum number of tokens to generate")
+    gaio_client: Optional[Gaio] = Field(default=None, exclude=True)
+    class Config:
+        """Pydantic model configuration."""
+        arbitrary_types_allowed = True
+    def __init__(self, api_key: str, api_url: str, **kwargs):
+        # Set the fields before calling super().__init__
+        kwargs['api_key'] = SecretStr(api_key)
+        kwargs['api_url'] = api_url
+        super().__init__(**kwargs)
+        # Initialize the Gaio client after parent initialization
+        self.gaio_client = Gaio(api_key, api_url)
+    @property
+    def _llm_type(self) -> str:
+        """Return identifier of the LLM."""
+        return "gaio"
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Return a dictionary of identifying parameters.
+        This information is used by the LangChain callback system for tracing.
+        Note: API key is excluded for security reasons.
+        """
+        return {
+            "model_name": self.model_name,
+            "api_url": self.api_url,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+        }
+    def _format_messages_for_gaio(self, messages: List[BaseMessage]) -> str:
+        """Convert LangChain messages to a single prompt string for gaio."""
+        formatted_parts = []
+        for message in messages:
+            if isinstance(message, HumanMessage):
+                formatted_parts.append(f"user: {message.content}")
+            elif isinstance(message, AIMessage):
+                formatted_parts.append(f"assistant: {message.content}")
+            elif isinstance(message, SystemMessage):
+                formatted_parts.append(f"system: {message.content}")
+            elif isinstance(message, ToolMessage):
+                formatted_parts.append(f"tool_result: {message.content}")
+                # Add instruction after tool result
+                formatted_parts.append("Now provide your final answer based on the tool result above. Do NOT make another tool call.")
+            else:
+                raise RuntimeError(f"Unknown message type: {type(message)}")
+        # If tools are bound, add tool information to the prompt
+        if hasattr(self, '_bound_tools') and self._bound_tools:
+            tool_descriptions = []
+            for tool in self._bound_tools:
+                tool_name = tool.name
+                tool_desc = tool.description
+                tool_descriptions.append(f"- {tool_name}: {tool_desc}")
+            tool_format = '{"tool_call": {"name": "tool_name", "arguments": {"parameter_name": "value"}}}'
+            wikipedia_example = '{"tool_call": {"name": "wikipedia_search", "arguments": {"query": "capital of France"}}}'
+            youtube_example = '{"tool_call": {"name": "youtube_search", "arguments": {"query": "python tutorial"}}}'
+            decode_example = '{"tool_call": {"name": "decode_text", "arguments": {"text": "backwards text here"}}}'
+            tools_prompt = f"""
+You have access to the following tools:
+{chr(10).join(tool_descriptions)}
+When you need to use a tool, you MUST respond with exactly this format:
+{tool_format}
+Examples:
+- To search Wikipedia: {wikipedia_example}
+- To search YouTube: {youtube_example}
+- To decode text: {decode_example}
+CRITICAL: Use the correct parameter names:
+- wikipedia_search and youtube_search use "query"
+- decode_text uses "text"
+Always try tools first for factual information before saying you cannot help."""
+            formatted_parts.append(tools_prompt)
+        return "\n\n".join(formatted_parts)
+    def _parse_tool_calls(self, response_content: str) -> tuple[str, List[ToolCall]]:
+        """Parse tool calls from the response content."""
+        tool_calls = []
+        remaining_content = response_content
+        # Look for JSON tool call pattern - more flexible regex
+        tool_call_pattern = r'\{"tool_call":\s*\{"name":\s*"([^"]+)",\s*"arguments":\s*(\{[^}]*\})\}\}'
+        matches = list(re.finditer(tool_call_pattern, response_content))
+        for i, match in enumerate(matches):
+            tool_name = match.group(1)
+            try:
+                arguments_str = match.group(2)
+                arguments = json.loads(arguments_str)
+                tool_call = ToolCall(
+                    name=tool_name,
+                    args=arguments,
+                    id=f"call_{len(tool_calls)}"
+                )
+                tool_calls.append(tool_call)
+                # Remove the tool call from the content
+                remaining_content = remaining_content.replace(match.group(0), "").strip()
+            except json.JSONDecodeError:
+                continue
+        return remaining_content, tool_calls
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Generate a response from the model."""
+        # Convert messages to prompt format
+        prompt = self._format_messages_for_gaio(messages)
+        # Call gaio API
+        try:
+            response_content = self.gaio_client.InvokeGaio(prompt)
+            # Parse any tool calls from the response
+            content, tool_calls = self._parse_tool_calls(response_content)
+            # Estimate token usage (simple approximation)
+            input_tokens = self._estimate_tokens(prompt)
+            output_tokens = self._estimate_tokens(content)
+            usage_metadata = {
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "total_tokens": input_tokens + output_tokens
+            }
+            # Create AI message with tool calls if any
+            if tool_calls:
+                ai_message = AIMessage(
+                    content=content,
+                    tool_calls=tool_calls,
+                    usage_metadata=usage_metadata,
+                    response_metadata={"model": self.model_name}
+                )
+            else:
+                ai_message = AIMessage(
+                    content=content,
+                    usage_metadata=usage_metadata,
+                    response_metadata={"model": self.model_name}
+                )
+            # Create chat generation
+            generation = ChatGeneration(
+                message=ai_message,
+                generation_info={"model": self.model_name}
+            )
+            return ChatResult(generations=[generation])
+        except Exception as e:
+            raise RuntimeError(f"Error calling Gaio API: {e}")
+    def _estimate_tokens(self, text: str) -> int:
+        """Simple token estimation (roughly 4 characters per token for English)."""
+        return max(1, len(text) // 4)
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Async generate - for now, just call the sync version."""
+        # For simplicity, we'll use the sync version
+        # In production, you might want to implement true async using aiohttp
+        return self._generate(messages, stop, run_manager, **kwargs)
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        """Stream the response. Since Gaio doesn't support streaming, simulate it."""
+        # Get the full response first
+        result = self._generate(messages, stop, run_manager, **kwargs)
+        message = result.generations[0].message
+        # Stream character by character to simulate streaming
+        content = message.content
+        for i, char in enumerate(content):
+            chunk_content = char
+            if i == len(content) - 1:  # Last chunk gets full metadata
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(
+                        content=chunk_content,
+                        usage_metadata=message.usage_metadata,
+                        response_metadata=message.response_metadata,
+                        tool_calls=getattr(message, 'tool_calls', None) if i == len(content) - 1 else None
+                    )
+                )
+            else:
+                chunk = ChatGenerationChunk(
+                    message=AIMessageChunk(content=chunk_content)
+                )
+            if run_manager:
+                run_manager.on_llm_new_token(char, chunk=chunk)
+            yield chunk
+    def bind_tools(self, tools: List[Any], **kwargs: Any) -> "GaioChatModel":
+        """Bind tools to the model."""
+        # Create a copy of the current model with tools bound
+        bound_model = GaioChatModel(
+            api_key=self.api_key.get_secret_value(),
+            api_url=self.api_url,
+            model_name=self.model_name,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens
+        )
+        # Store the tools for potential use in generation
+        bound_model._bound_tools = tools
+        return bound_model
+def main():
+    """Test GaioChatModel with a simple question and verify the answer."""
+    print("Testing GaioChatModel with a simple math question...")
+    # Get API credentials from environment variables
+    api_key = os.getenv("GAIO_API_TOKEN")
+    api_url = os.getenv("GAIO_URL")
+    if not api_key or not api_url:
+        print("❌ Test failed: Missing environment variables.")
+        print("Please set the following environment variables:")
+        print("- GAIO_API_TOKEN: Your API token")
+        print("- GAIO_URL: The API URL")
+        return
+    try:
+        # Create GaioChatModel instance
+        chat_model = GaioChatModel(api_key=api_key, api_url=api_url)
+        # Test with the specific question using LangChain message format
+        test_question = "How much is 2 + 2 ? Only answer with the response number and nothing else."
+        messages = [HumanMessage(content=test_question)]
+        print(f"\nQuestion: {test_question}")
+        print("Using LangChain message format...")
+        # Get the answer using LangChain's invoke method
+        result = chat_model.invoke(messages)
+        answer = result.content
+        print(f"Answer: '{answer}'")
+        # Check if the answer is exactly "4"
+        answer_stripped = answer.strip()
+        if answer_stripped == "4":
+            print("✅ Test passed! GaioChatModel correctly answered '4'.")
+        else:
+            print(f"❌ Test failed. Expected '4', but got '{answer_stripped}'.")
+    except Exception as e:
+        print(f"❌ Test failed with error: {e}")
+if __name__ == "__main__":
+    main()

src/tools.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""
+Tools for the FlexibleAgent
+All tool functions that the agent can use
+"""
+import os
+import re
+import requests
+import tempfile
+import mimetypes
+from pathlib import Path
+from langchain_core.tools import tool
+from langchain_community.retrievers import WikipediaRetriever
+from langchain_community.document_loaders import (
+    UnstructuredFileLoader,
+    TextLoader,
+    CSVLoader,
+    PDFPlumberLoader,
+    UnstructuredImageLoader,
+    UnstructuredMarkdownLoader,
+    UnstructuredWordDocumentLoader,
+    UnstructuredPowerPointLoader,
+    UnstructuredExcelLoader
+)
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_core.tools import Tool
+from langchain_google_community import GoogleSearchAPIWrapper
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_community.document_loaders import WebBaseLoader
+@tool
+def wikipedia_search(query: str) -> str:
+    """Search Wikipedia for information. Use this for factual information and encyclopedic content.
+    Args:
+        query: The search query."""
+    try:
+        retriever = WikipediaRetriever(load_max_docs=10)
+        docs = retriever.invoke(query)
+        if not docs:
+            return f"No Wikipedia articles found for '{query}'"
+        output = f"Wikipedia search results for '{query}':\n\n"
+        # Format the search results as HTML
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in docs
+            ]
+        )
+        return output + formatted_search_docs
+    except Exception as e:
+        return f"Wikipedia search failed: {str(e)}"
+@tool
+def youtube_search(query: str) -> str:
+    """Search YouTube for videos and get video information. Use this when you need YouTube-specific content."""
+    try:
+        from youtubesearchpython import VideosSearch
+        search = VideosSearch(query, limit=3)
+        results = search.result()
+        output = f"YouTube search results for '{query}':\n"
+        for video in results['result']:
+            output += f"- {video['title']} by {video['channel']['name']}\n"
+            output += f"  Duration: {video['duration']}, Views: {video['viewCount']['text']}\n"
+            output += f"  URL: {video['link']}\n\n"
+        return output
+    except Exception as e:
+        return f"YouTube search failed: {str(e)}"
+@tool
+def web_search(query: str) -> str:
+    """Search the web for a query and return the first results.
+    Args:
+        query: The search query."""
+    result = "Results from web search:\n\n"
+    search = DuckDuckGoSearchResults(output_format="list")
+    search_results = search.invoke(query)
+    urls = [search_result['link'] for search_result in search_results[:3]]
+    loader = WebBaseLoader(web_paths=urls)
+    for doc in loader.lazy_load():
+        result += f"{doc.metadata}\n\n"
+        result += f"{doc.page_content}\n\n"
+        result += f"--------------------------------\n\n"
+    return result
+@tool
+def decode_text(text: str) -> str:
+    """Decode or reverse text that might be encoded backwards or in other ways."""
+    try:
+        # Try reversing words
+        words = text.split()
+        reversed_words = [word[::-1] for word in words]
+        reversed_text = " ".join(reversed_words)
+        # Try reversing the entire string
+        fully_reversed = text[::-1]
+        return f"Original: {text}\nWord-by-word reversed: {reversed_text}\nFully reversed: {fully_reversed}"
+    except Exception as e:
+        return f"Text decoding failed: {str(e)}"
+@tool
+def download_and_process_file(task_id: str) -> str:
+    """Download and process a file from the GAIA API using the task_id.
+    Use this tool when detect_file_requirement indicates a file is needed."""
+    api_url = "https://agents-course-unit4-scoring.hf.space"
+    try:
+        # Download file from API
+        file_url = f"{api_url}/files/{task_id}"
+        print(f"Downloading file from: {file_url}")
+        response = requests.get(file_url, timeout=30)
+        response.raise_for_status()
+        # Get filename from Content-Disposition header or use task_id
+        filename = task_id
+        if 'Content-Disposition' in response.headers:
+            cd = response.headers['Content-Disposition']
+            filename_match = re.search(r'filename="?([^"]+)"?', cd)
+            if filename_match:
+                filename = filename_match.group(1)
+        # Create temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file:
+            tmp_file.write(response.content)
+            temp_path = tmp_file.name
+        # Process the file based on type
+        file_content = _process_downloaded_file(temp_path, filename)
+        # Clean up
+        os.unlink(temp_path)
+        return f"FILE PROCESSED: {filename}\n\nContent:\n{file_content}"
+    except requests.exceptions.RequestException as e:
+        return f"File download failed: {str(e)}"
+    except Exception as e:
+        return f"File processing failed: {str(e)}"
+def _process_downloaded_file(file_path: str, filename: str) -> str:
+    """Process a downloaded file based on its type and return content."""
+    try:
+        # Determine file type
+        mime_type, _ = mimetypes.guess_type(filename)
+        file_extension = Path(filename).suffix.lower()
+        # Handle audio files
+        if mime_type and mime_type.startswith('audio') or file_extension in ['.mp3', '.wav', '.m4a', '.ogg']:
+            return _process_audio_file(file_path)
+        # Handle image files
+        elif mime_type and mime_type.startswith('image') or file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
+            return _process_image_file(file_path)
+        # Handle documents
+        elif file_extension in ['.pdf']:
+            loader = PDFPlumberLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        elif file_extension in ['.docx', '.doc']:
+            loader = UnstructuredWordDocumentLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        elif file_extension in ['.pptx', '.ppt']:
+            loader = UnstructuredPowerPointLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        elif file_extension in ['.xlsx', '.xls']:
+            loader = UnstructuredExcelLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        elif file_extension in ['.csv']:
+            loader = CSVLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        elif file_extension in ['.md', '.markdown']:
+            loader = UnstructuredMarkdownLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        elif file_extension in ['.txt'] or mime_type and mime_type.startswith('text'):
+            loader = TextLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+        # Fallback: try unstructured loader
+        else:
+            loader = UnstructuredFileLoader(file_path)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+    except Exception as e:
+        return f"Error processing file {filename}: {str(e)}"
+def _process_audio_file(file_path: str) -> str:
+    """Process audio files using speech recognition."""
+    try:
+        import speech_recognition as sr
+        from pydub import AudioSegment
+        # Convert to WAV if needed
+        audio = AudioSegment.from_file(file_path)
+        wav_path = file_path + ".wav"
+        audio.export(wav_path, format="wav")
+        # Use speech recognition
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(wav_path) as source:
+            audio_data = recognizer.record(source)
+            text = recognizer.recognize_google(audio_data)
+        # Clean up temporary WAV file
+        if os.path.exists(wav_path):
+            os.unlink(wav_path)
+        return f"Audio transcription:\n{text}"
+    except ImportError:
+        return "Audio processing requires additional dependencies (speech_recognition, pydub)"
+    except Exception as e:
+        # Fallback: try with whisper if available
+        try:
+            import whisper
+            model = whisper.load_model("base")
+            result = model.transcribe(file_path)
+            return f"Audio transcription (Whisper):\n{result['text']}"
+        except ImportError:
+            return f"Audio processing failed: {str(e)}. Consider installing speech_recognition, pydub, or openai-whisper."
+        except Exception as e2:
+            return f"Audio processing failed: {str(e2)}"
+def _process_image_file(file_path: str) -> str:
+    """Process image files."""
+    try:
+        # Use unstructured image loader
+        loader = UnstructuredImageLoader(file_path)
+        docs = loader.load()
+        content = "\n".join([doc.page_content for doc in docs])
+        if content.strip():
+            return f"Image content extracted:\n{content}"
+        else:
+            return f"Image file detected but no text content could be extracted. Consider using OCR or image analysis tools."
+    except Exception as e:
+        return f"Image processing failed: {str(e)}"