Agent_Course_Final_Assignment

Sleeping

App Files Files Community

CUMANI Paolo commited on Aug 6, 2025

Commit

17e605d

1 Parent(s): 81917a3

[CHG] Working agent implementation

Browse files

Files changed (6) hide show

.gitignore +203 -0
agent.py +155 -0
app.py +78 -54
requirements.txt +19 -2
system_prompt.yaml +7 -0
tools.py +158 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,203 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml

agent.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import mimetypes
+import base64
+import yaml
+from typing import TypedDict, Annotated
+from dotenv import load_dotenv
+from langgraph.checkpoint.memory import InMemorySaver
+from langgraph.graph.message import add_messages
+from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
+from langgraph.prebuilt import ToolNode
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import tools_condition
+from langchain_core.messages.utils import (
+    trim_messages,
+    count_tokens_approximately
+)
+# Import our custom tools from their modules
+from tools import webpage_reader_tool, python_repl_tool, transcribe_youtube_video_tool, wikipedia_query_tool, web_search_tool, read_excel_csv, arxiv_query_tool
+load_dotenv()
+class FinalAgent:
+    def __init__(self, model_type="GOOGLE", system_prompt_path="system_prompt.yaml", use_memory=False):
+        """
+        Args: model_type "GOOGLE" or "HUGGINGFACE" or "OLLAMA"
+        """
+        with open(system_prompt_path, 'r') as stream:
+            prompt_templates = yaml.safe_load(stream)
+        self.model_type = model_type
+        if model_type == "HUGGINGFACE":
+            from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
+            # Initialize the Hugging Face model
+            # Generate the chat interface, including the tools
+            llm = HuggingFaceEndpoint(
+                repo_id="Qwen/Qwen2.5-Coder-32B-Instruct"
+            )
+            chat = ChatHuggingFace(llm=llm, verbose=True)
+        elif model_type == "OLLAMA":
+            from langchain_ollama import ChatOllama
+            #chat = ChatOllama(model = "qwen2.5:14b-instruct")
+            chat = ChatOllama(model = "qwen3:8b")
+            #chat = ChatOllama(model = "gpt-oss:20b")
+        elif model_type == "GOOGLE":
+            from langchain_google_genai import ChatGoogleGenerativeAI
+            from langchain_core.rate_limiters import InMemoryRateLimiter
+            rate_limiter = InMemoryRateLimiter(
+                        # Max allowed rate per free API: 10 requests per minute, but we use 6 to avoid hitting the limit on subsquent answers.
+                        requests_per_second=6/60,
+                        # Wake up every 100 ms to check whether allowed to make a request,
+                        check_every_n_seconds=0.1,
+                        max_bucket_size=10,  # Controls the maximum burst size.
+                    )
+            chat = ChatGoogleGenerativeAI(model="gemini-2.5-flash", rate_limiter=rate_limiter)
+        tools = [webpage_reader_tool,
+                transcribe_youtube_video_tool,
+                web_search_tool,
+                wikipedia_query_tool,
+                arxiv_query_tool,
+                read_excel_csv,
+                python_repl_tool,]
+        chat_with_tools = chat.bind_tools(tools)
+        class AgentState(TypedDict):
+            messages: Annotated[list[AnyMessage], add_messages]
+        def assistant(state: AgentState):
+            messages = trim_messages(
+                state["messages"],
+                strategy="last",
+                token_counter=count_tokens_approximately,
+                max_tokens=1e6 if self.model_type == "GOOGLE" else 126000,
+                start_on="human",
+                end_on=("human", "tool"),
+            )
+            return {
+                "messages": [chat_with_tools.invoke([SystemMessage(content=prompt_templates['system_prompt'])] + messages)],
+            }
+        builder = StateGraph(AgentState)
+        builder.add_node("assistant", assistant)
+        builder.add_node("tools", ToolNode(tools))
+        builder.add_edge(START, "assistant")
+        builder.add_conditional_edges("assistant", tools_condition)
+        builder.add_edge("tools", "assistant")
+        if use_memory:
+            checkpointer = InMemorySaver()
+            self.agent = builder.compile(checkpointer=checkpointer)
+        else:
+            checkpointer = None
+            self.agent = builder.compile()
+        print("FinalAgent initialized.")
+    def clear_memory(self, thread_id: str) -> None:
+        """ Clear the memory for a given thread_id. """
+        memory = self.agent.checkpointer
+        if memory is None:
+            return
+        try:
+            # If it's an InMemorySaver (which MemorySaver is an alias for),
+            # we can directly clear the storage and writes
+            if hasattr(memory, 'storage') and hasattr(memory, 'writes'):
+                # Clear all checkpoints for this thread_id (all namespaces)
+                memory.storage.pop(thread_id, None)
+                # Clear all writes for this thread_id (for all namespaces)
+                keys_to_remove = [key for key in memory.writes.keys() if key[0] == thread_id]
+                for key in keys_to_remove:
+                    memory.writes.pop(key, None)
+                print(f"Memory cleared for thread_id: {thread_id}")
+                return
+        except Exception as e:
+            print(f"Error clearing InMemorySaver storage for thread_id {thread_id}: {e}")
+    def __call__(self, question: str, attached_file: dict, recursion_limit=9) -> str:
+        print(f"Agent received question (first 100 chars): {question[:100]}...")
+        if attached_file['name'] != "" and attached_file['content'] is not None:
+            mime_type, _ = mimetypes.guess_type(attached_file['name'])
+            if mime_type.startswith("image/") or mime_type.startswith("audio/") or mime_type.startswith("video/"):
+                # Image file - convert to base64
+                encoded_file = base64.b64encode(attached_file['content']).decode('utf-8')
+                #
+                if self.model_type == "GOOGLE":
+                    question = [{"type": "text", "text": question},
+                            {"type": "image" if mime_type.startswith("image/") else "media",
+                             "source_type": "base64",
+                             "data": encoded_file,
+                             "mime_type": mime_type,},
+                                ]
+                else:
+                    question = f"{question}\n\nAttached file extension:{attached_file['name'].split('.')[-1]} - Attached file base64 encoded: \n{encoded_file}"
+            elif mime_type.startswith("text/"):
+                # Text-based file (like .py, .txt, .json)
+                question = f"{question}\n\nAttached file extension:{attached_file['name'].split('.')[-1]} - Attached file content: \n{attached_file['content'].decode('utf-8')}"
+            else:
+                encoded_file = base64.b64encode(attached_file['content']).decode('utf-8')
+                print(f"Unsupported file {attached_file['name']} type: {mime_type}. Only images, audio, video, and text files are supported.")
+                question = f"{question}\n\nAttached file extension: {attached_file['name'].split('.')[-1]}. File path: {attached_file['path']} - Attached file base64 encoded:\n{encoded_file}"
+        if recursion_limit>0:
+            agent_reply = self.agent.invoke({"messages": [HumanMessage(content=question)]}, {"recursion_limit": recursion_limit})
+        else:
+            agent_reply = self.agent.invoke({"messages": [HumanMessage(content=question)]})
+        return str(agent_reply['messages'][-1].content)

app.py CHANGED Viewed

@@ -1,23 +1,14 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -40,7 +31,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -73,18 +64,37 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
@@ -98,46 +108,60 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
         try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---

 import os
 import gradio as gr
 import requests
 import pandas as pd
+from agent import FinalAgent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = FinalAgent(model_type="GOOGLE")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for number, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
+        if file_name != '':
+            file_url = f"{api_url}/files/{task_id}"
+            try:
+                response = requests.get(file_url, timeout=15)
+                response.raise_for_status()
+                content = response.content
+                print(f"Fetched file {file_url}.")
+            except requests.exceptions.RequestException as e:
+                print(f"Error fetching file: {e}")
+                return f"Error fetching file: {e}", None
+            except Exception as e:
+                print(f"An unexpected error occurred fetching file: {e}")
+                return f"An unexpected error occurred fetching file: {e}", None
+        else:
+            file_url = None
+            content = None
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text, attached_file={"name": file_name, "path": file_url, "content": content}, recursion_limit=-1)
+            print(f"Agent submitted {number} answer: {submitted_answer}\n")
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}\n")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    n= 0
+    while n < 5:
         try:
+            response = requests.post(submit_url, json=submission_data, timeout=60)
+            response.raise_for_status()
+            result_data = response.json()
+            final_status = (
+                f"Submission Successful!\n"
+                f"User: {result_data.get('username')}\n"
+                f"Overall Score: {result_data.get('score', 'N/A')}% "
+                f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+                f"Message: {result_data.get('message', 'No message received.')}"
+            )
+            print("Submission successful.")
+            results_df = pd.DataFrame(results_log)
+            return final_status, results_df
+        except requests.exceptions.HTTPError as e:
+            n += 1
+            if n < 5:
+                print(f"Server responded with status {e.response.status_code}. Retrying ({n})...")
+                continue
+            error_detail = f"Server responded with status {e.response.status_code}."
+            try:
+                error_json = e.response.json()
+                error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+            except requests.exceptions.JSONDecodeError:
+                error_detail += f" Response: {e.response.text[:500]}"
+            status_message = f"Submission Failed: {error_detail}"
+            print(status_message)
+            results_df = pd.DataFrame(results_log)
+            return status_message, results_df
+        except requests.exceptions.Timeout:
+            n += 1
+            if n < 5:
+                print(f"Submission Failed: The request timed out. Retrying ({n})...")
+                continue
+            status_message = "Submission Failed: The request timed out."
+            print(status_message)
+            results_df = pd.DataFrame(results_log)
+            return status_message, results_df
+        except requests.exceptions.RequestException as e:
+            n += 1
+            if n < 5:
+                print(f"Submission Failed: Network error - {e}. Retrying ({n})...")
+                continue
+            status_message = f"Submission Failed: Network error - {e}"
+            print(status_message)
+            results_df = pd.DataFrame(results_log)
+            return status_message, results_df
+        except Exception as e:
+            status_message = f"An unexpected error occurred during submission: {e}"
+            print(status_message)
+            results_df = pd.DataFrame(results_log)
+            return status_message, results_df
 # --- Build Gradio Interface using Blocks ---

requirements.txt CHANGED Viewed

@@ -1,2 +1,19 @@
-gradio
-requests

+arxiv
+datasets
+ddgs
+duckduckgo-search
+gradio[oauth]
+langchain-community
+langchain_experimental
+langchain_google_genai
+langchain_huggingface
+langchain-tavily
+langchain_ollama
+langgraph
+python-dotenv
+openpyxl
+rank_bm25
+requests
+smolagents
+wikipedia
+youtube-transcript-api

system_prompt.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+"system_prompt": |-
+  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
+  [YOUR FINAL ANSWER].
+  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.

tools.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import base64
+import io
+import pandas as pd
+from youtube_transcript_api import YouTubeTranscriptApi
+from langchain_core.tools import tool
+from langchain_experimental.utilities import PythonREPL
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_community.utilities import WikipediaAPIWrapper
+from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun
+from langchain_tavily.tavily_search import TavilySearch
+@tool
+def python_repl_tool(command: str) -> str:
+    """A tool to execute Python commands. If you want to see the output of a value, you should print it out with `print(...)`.
+    Args:
+        command (str): A valid Python command to execute.
+    Returns:
+        str: The output of the command."""
+    print('Python shell tool called')
+    result = PythonREPL.run(command)
+    return str(result)
+@tool
+def read_excel_csv(input_str: str, file_type: str = 'csv') -> str:
+    """
+    Extracts information from a base64-encoded file or a path to a csv or excel file.
+    Args:
+        input_str (str): String containing a base64-encoded file or its path.
+        file_type (str): Type of the file encoded in base64 ('csv' or 'excel').
+    Returns:
+        str: Content of input file.
+    """
+    print(f'Read excel/csv tool called {file_type} ({input_str[:20]})')
+    try:
+        # Decode the base64 string
+        byte_path = io.BytesIO(base64.b64decode(input_str))
+    except Exception as e:
+        # Assume it's a file path if decoding fails
+        byte_path = input_str
+    # Load into a DataFrame based on file type
+    if file_type == 'csv':
+        df = pd.read_csv(byte_path)
+    elif file_type in ['xlsx', 'excel']:
+        df = pd.read_excel(byte_path)
+    else:
+        raise ValueError("Unsupported file_type. Use 'csv' or 'excel'.")
+    result = f"{file_type.upper()} file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+    result += f"Columns: {', '.join(df.columns)}\n\n"
+    # Add summary statistics
+    result += "Summary statistics:\n"
+    result += str(df.describe())
+    #print(result)
+    return result
+@tool
+def wikipedia_query_tool(query: str) -> str:
+    """A tool to query Wikipedia. It returns a summary of the page, not the full content. To get the full content, you can use another tool.
+    Args:
+        query (str): A search query for Wikipedia.
+    Returns:
+        str: A summary of the related Wikipedia page."""
+    print('Wikipedia query tool called:', query)
+    wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2))
+    result = wiki.run(query)
+    print(f"Wikipedia query {query} result (limited to 10 chars): {result[:10]}")
+    return result.strip()
+@tool
+def arxiv_query_tool(query: str) -> str:
+    """A tool to query arXiv.org
+    Useful for when you need to answer physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics
+    questions from scientific articles on arxiv.
+    Args:
+        query (str): A search query for ArXiv.
+    Returns:
+        str: The text content of the ArXiv page.
+    """
+    print('ArXiv query tool called', query)
+    arxiv = ArxivQueryRun()
+    result = arxiv.run(query)
+    print(f"ArXiv query {query} result (limited to 50 chars): {result[:50]}")
+    return result.strip()
+@tool
+def webpage_reader_tool(page_url: str) -> str:
+    """A tool to read the full content of a webpage.
+    Args:
+        page_url (str): A valid URL of the webpage to read.
+    Returns:
+        str: The text content of the webpage.
+    """
+    print('Web page reader tool called', page_url)
+    loader = WebBaseLoader(web_paths=[page_url])
+    docs = []
+    for doc in loader.lazy_load():
+        docs.append(doc)
+    assert len(docs) == 1
+    doc = docs[0]
+    return f'<Document source="{page_url}" title="{doc.get("title", "")}"/>\n{doc.page_content.strip()}\n</Document>'
+@tool
+def web_search_tool(query: str) -> str:
+    """Search internet for a query and return maximum 3 results.
+    Args:
+        query: The search query.
+    Returns:
+        str: The formatted search results.
+    """
+    print('Web search tool called', query)
+    try:
+        search_docs = TavilySearch(max_results=3).invoke(query)
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
+                for doc in search_docs['results']
+            ]
+            )
+    except Exception as e:
+        print(f'\tError {e}, passing to DuckDuckgo')
+        search_docs = DuckDuckGoSearchRun().invoke(query)
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
+                for doc in search_docs['results']
+            ]
+            )
+    return formatted_search_docs
+@tool
+def transcribe_youtube_video_tool(video_id: str) -> str:
+    """A tool to transcribe the audio of a YouTube video.
+    Args:
+        video_id (str): A valid YouTube video ID or URL.
+    Returns:
+        str: The transcribed text of the video.
+    """
+    print(f"Transcribing YouTube video with ID: {video_id}")
+    if 'youtube' in video_id or 'watch' in video_id:
+        # Extract video ID from URL
+        video_id = video_id.split('v=')[-1].split('&')[0]
+    transcript_api = YouTubeTranscriptApi()
+    try:
+        transcript = transcript_api.fetch(video_id)
+        transcript_text = ' '.join([entry.text for entry in transcript])
+        print(f"\t {transcript_text}")
+        return transcript_text.strip()
+    except transcript_api._errors.TranscriptsDisabled as e:
+        return f"Transcription is disabled for this video: {e}"