rookie_fix_this

Sleeping

App Files Files Community

ErdemTheFixer commited on Nov 17, 2025

Commit

2144dc0

verified ·

1 Parent(s): 9a907a1

Create agent_tools.py

Browse files

Files changed (1) hide show

agent_tools.py +353 -0

agent_tools.py ADDED Viewed

	@@ -0,0 +1,353 @@

+import io
+import os
+import re
+import sys
+from typing import List, Callable, Any
+import openai
+import pandas as pd
+import requests
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+from langchain_community.document_loaders import WebBaseLoader, ImageCaptionLoader, WikipediaLoader, ArxivLoader
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_core.tools import tool
+from langchain_text_splitters import CharacterTextSplitter
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+load_dotenv()
+@tool(description="Multiply two integers and return the result")
+def multiply(a: int, b: int) -> int:
+    return a * b
+@tool(description="Add two integers and return the result")
+def add(a: int, b: int) -> int:
+    return a + b
+@tool(description="Subtract the second integer from the first and return the result")
+def subtract(a: int, b: int) -> int:
+    return a - b
+@tool(
+    description="Divide the first integer by the second and return the result; raises an error if the second integer is zero")
+def divide(a: int, b: int) -> float:
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool(description="Return the remainder of dividing the first integer by the second")
+def modulus(a: int, b: int) -> int:
+    return a % b
+@tool(description="""
+    Searches for a Wikipedia articles using the provided query and returns the content of the corresponding Wikipedia pages.
+    Args:
+        query (str): The search term to look up on Wikipedia.
+    Returns:
+        str: The text content of the Wikipedia articles related to the query.
+    """)
+def wiki_search(query: str) -> str:
+    print("wiki_search called with:", query)
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+@tool(description="""
+    Fetches raw HTML content of a web page.
+    Args:
+        url: the webpage url
+    Returns:
+        str: The combined raw text content of the webpage
+    """)
+def visit_webpage(url: str) -> str:
+    try:
+        response = requests.get(url, timeout=5)
+        return response.text[:5000]
+    except Exception as e:
+        return f"[ERROR fetching {url}]: {str(e)}"
+@tool(description="""
+    Performs a web search using the given query, downloads the content of two relevant web pages,
+    and returns their combined content as a raw string.
+    This is useful when the task requires analysis of web page content, such as retrieving poems,
+    changelogs, or other textual resources.
+    Args:
+        query (str): The search query.
+    Returns:
+        str: The combined raw text content of the two retrieved web pages.
+    """)
+def duckduck_websearch(query: str) -> str:
+    search_engine = DuckDuckGoSearchResults(output_format="list", num_results=2)
+    results = search_engine.invoke({"query": query})
+    page_urls = [url["link"] for url in results]
+    loader = WebBaseLoader(web_paths=page_urls)
+    docs = loader.load()
+    combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)
+    # Clean up excessive newlines, spaces and strip leading/trailing whitespace
+    cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
+    cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)
+    # Strip leading/trailing whitespace
+    cleaned_text = cleaned_text.strip()
+    return cleaned_text
+@tool(description="""
+    Splits text into chunks using LangChain's CharacterTextSplitter.
+    Args:
+        text: A string of text to split.
+    Returns:
+        List[str]: a list of split text
+    """)
+def text_splitter(text: str) -> List[str]:
+    splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10)
+    return splitter.split_text(text)
+@tool(description="""
+    First download the file, then read its content
+    Args:
+        dir: the task_id
+    Returns:
+        str: the file content
+    """)
+def read_file(task_id: str) -> str:
+    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
+    r = requests.get(file_url, timeout=15, allow_redirects=True)
+    with open('temp', "wb") as fp:
+        fp.write(r.content)
+    with open('temp') as f:
+        return f.read()
+@tool(description="""
+    First download the excel file, then read its content
+    Args:
+        task_id: the task_id
+    Returns:
+        str: the content of excel file
+    """)
+def excel_read(task_id: str) -> str:
+    try:
+        file_url = f'{DEFAULT_API_URL}/files/{task_id}'
+        r = requests.get(file_url, timeout=15, allow_redirects=True)
+        with open('temp.xlsx', "wb") as fp:
+            fp.write(r.content)
+        # Read the Excel file
+        df = pd.read_excel('temp.xlsx')
+        # Run various analyses based on the query
+        result = (
+            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+        )
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        # Add summary statistics
+        result += "Summary statistics:\n"
+        result += str(df.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing Excel file: {str(e)}"
+@tool(description="""
+    First download the csv file, then read its content
+    Args:
+        dir: the task_id
+    Returns:
+        str: the content of csv file
+    """)
+def csv_read(task_id: str) -> str:
+    try:
+        file_url = f'{DEFAULT_API_URL}/files/{task_id}'
+        r = requests.get(file_url, timeout=15, allow_redirects=True)
+        with open('temp.csv', "wb") as fp:
+            fp.write(r.content)
+        # Read the CSV file
+        df = pd.read_csv('temp.csv')
+        # Run various analyses based on the query
+        result = (
+            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
+        )
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        # Add summary statistics
+        result += "Summary statistics:\n"
+        result += str(df.describe())
+        return result
+    except Exception as e:
+        return f"Error analyzing CSV file: {str(e)}"
+@tool(description="""
+    Understand the content of the provided image
+    Args:
+        dir: the image url link
+    Returns:
+        str: the image caption
+    """)
+def image_caption(task_id: str) -> str:
+    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
+    loader = ImageCaptionLoader(images=[file_url])
+    metadata = loader.load()
+    return metadata[0].page_content
+@tool(description="""
+    Analyzes a YouTube video from the provided URL and returns an answer
+    to the given question based on the analysis results.
+    Args:
+        youtube_url (str): The URL of the YouTube video, in the format
+            "https://www.youtube.com/...".
+        question (str): A question related to the content of the video.
+    Returns:
+        str: An answer to the question based on the video's content.
+    """)
+def youtube_search(youtube_url: str, question: str) -> str:
+    client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
+    response = client.models.generate_content(
+        model='models/gemini-2.5-flash',
+        contents=types.Content(
+            parts=[
+                types.Part(
+                    file_data=types.FileData(file_uri=youtube_url)
+                ),
+                types.Part(text=question)
+            ]
+        )
+    )
+    return response.text
+@tool(description=
+      """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query.""")
+def arvix_search(query: str) -> str:
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+@tool(description= """
+    First download the mp3 file, then listen to it
+    Args:
+        dir: the task_id
+    Returns:
+        str: the content of mp3 file
+    """)
+def whisper_transcribe_api(task_id: str) -> str:
+    openai.api_key = os.getenv("OPENAI_API_KEY")
+    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
+    try:
+        r = requests.get(file_url, timeout=15, allow_redirects=True)
+        temp_path = 'temp.mp3'
+        with open(temp_path, "wb") as fp:
+            fp.write(r.content)
+        with open(temp_path, "rb") as audio_file:
+            transcript = openai.audio.transcriptions.create(
+                file=audio_file,
+                model="whisper-1"
+            )
+        return transcript.text
+    except Exception as e:
+        return f"Error transcribing audio: {e}"
+@tool(description="""
+Execute Python code from a file identified by task_id and file_name.
+Returns the numeric result if defined, otherwise stdout.
+""")
+def run_python_file(task_id: str, file_name: str) -> str:
+    file_path = file_name
+    buffer = io.StringIO()
+    old_stdout = sys.stdout
+    ns = {"__builtins__": __builtins__, "__name__": "__main__"}
+    try:
+        file_url = f"{DEFAULT_API_URL}/files/{task_id}"
+        r = requests.get(file_url, timeout=15, allow_redirects=True)
+        if r.status_code != 200:
+            return f"❌ Failed to download file: {r.status_code}"
+        with open(file_path, "wb") as f:
+            f.write(r.content)
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
+            code = f.read()
+        sys.stdout = buffer
+        try:
+            compiled = compile(code, file_path, "exec")
+            exec(compiled, ns, ns)
+        finally:
+            sys.stdout = old_stdout
+        if "result" in ns:
+            return str(ns["result"])
+        else:
+            output = buffer.getvalue().strip()
+            return output or "No output produced."
+    except Exception as e:
+        # Prefer returning a computed result or any partial stdout if available
+        try:
+            sys.stdout = old_stdout
+        except Exception:
+            pass
+        if "result" in ns:
+            return str(ns["result"])
+        output = buffer.getvalue().strip()
+        if output:
+            return output
+        return f"❌ Error executing Python file: {e}"
+    finally:
+        # Ensure the downloaded code file is removed after execution
+        try:
+            if os.path.exists(file_path):
+                os.remove(file_path)
+        except Exception:
+            pass
+TOOLS: List[Callable[..., Any]] = [
+    multiply,
+    add,
+    subtract,
+    divide,
+    modulus,
+    duckduck_websearch,
+    arvix_search,
+    wiki_search,
+    visit_webpage,
+    youtube_search,
+    text_splitter,
+    read_file,
+    excel_read,
+    csv_read,
+    image_caption,
+    whisper_transcribe_api,
+    run_python_file
+]