Final_Assignment_Template_LAC

Sleeping

App Files Files Community

lcapriles commited on Jun 23, 2025

Commit

9b1db64

verified ·

1 Parent(s): cc8200f

commit 3a

Browse files

Files changed (1) hide show

tools.py +184 -0

tools.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import datetime
+import time
+import xml.etree.ElementTree as ET
+from http.client import responses
+import pandas as pd
+import pytz
+import requests
+import os
+import base64
+import io
+from PIL import Image
+from openai import AzureOpenAI
+from six import binary_type
+from smolagents import tool, DuckDuckGoSearchTool
+from tavily import TavilyClient
+from langchain_community.document_loaders import WikipediaLoader
+# === Tools ===
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query."""
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ]
+    )
+    return formatted_search_docs
+# tool for websearch capabilities
+# must improve fall back for timeout errors
+client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return up to 3 results.
+    Args:
+        query: The search query.
+    """
+    try:
+        results = client.search(query=query, max_results=3)
+        formatted = "\n\n---\n\n".join(
+            f"<Document source='{item.get('url', '')}'>\n{item.get('content', '').strip()}\n</Document>"
+            for item in results.get("results", [])
+        )
+        return formatted or "No relevant search results found."
+    except Exception as e:
+        return f"[web_search error]: {str(e)}"
+# tool to obtain real current time zone
+@tool
+def get_current_time_in_timezone(timezone: str) -> str:
+    """Fetches the current local time in a specified timezone.
+    Args:
+        timezone: A string representing a valid timezone (e.g., 'America/New_York').
+    """
+    try:
+        tz = pytz.timezone(timezone)
+        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
+        return f"The current local time in {timezone} is: {local_time}"
+    except Exception as e:
+        return f"Error fetching time for timezone '{timezone}': {str(e)}"
+# tool to get the HTML content of a web page
+@tool
+def visit_webpage(url: str) -> str:
+    """Fetches raw HTML content of a web page.
+    Args:
+        url: The url of the webpage.
+    """
+    try:
+        response = requests.get(url, timeout=5)
+        return response.text#[:5000]  # Limit length
+    except Exception as e:
+        return f"[ERROR fetching {url}]: {str(e)}"
+# tool for add operations
+@tool
+def calculator_add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+# tool for image understanding
+@tool
+def ocr(base64_image: str) -> str:
+    """Analyzes the content of an image using gpt-4o.
+    Args:
+        base64_image: A base64-encoded string of the image.
+    Returns: a string summary or description of what the image contains.
+    """
+    client = AzureOpenAI(
+        azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
+        api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
+        api_version=os.environ.get("OPENAI_API_VERSION")
+    )
+    response = client.chat.completions.create(
+        model=os.environ["AZURE_OPENAI_MODEL"],
+        messages=[
+            {"role": "user", "content": [
+                {"type": "text", "text": "Describe the image"},
+                {"type": "image_url", "image_url": {
+                    "url": "data:image/jpeg;base64," + base64_image
+                }}
+            ]}
+        ]
+    )
+    return response.choices[0].message.content
+# tool for data parsing
+@tool
+def parse_excel(base64_excel: str) -> str:
+    """
+    Parses a base64-encoded Excel file and returns the first few rows as text.
+    Args:
+        base64_excel: Base64-encoded Excel file (.xlxs or .xls)
+    Returns: a preview of the Excel data (first 5 rows).
+    """
+    try:
+        # decode base64 and read into a df
+        binary_data = base64.b64decode(base64_excel)
+        df = pd.read_excel(io.BytesIO(binary_data))
+        #optional customize logic based on column names
+        preview = df.head().to_string(index=False)
+        return f"Excel preview: \n{preview}"
+    except Exception as e:
+        return f"[ERROR] Failed to parse Excel file: {str(e)}"
+@tool
+def arxiv_search(query: str) -> str:
+    """
+    Search ArXiv for a query and return a summary of up to 3 papers.
+    Args:
+        query: The search string used to find relevant papers on ArXiv.
+    Returns:
+        A formatted string summarizing up to 3 relevant papers.
+    """
+    try:
+        # api url and query parameters
+        url = "http://export.arxiv.org/api/query"
+        params = {
+        "search_query": query,
+        "start": 0,
+        "max_results": 3,
+        "sortBy": "relevance"
+        }
+        # making the api request
+        response = requests.get(url, params=params, timeout=10)
+        response.raise_for_status()
+        # parse the xml response
+        root = ET.fromstring(response.text)                 # converts the xml string into an element tree object
+        ns = {"atom": "http://www.w3.org/2005/Atom"}        # declares xml namespaces (required for correct parsing)
+        entries = root.findall("atom:entry", ns)       # retrieves all <entry> elements from the feed
+        if not entries:
+            return "No results found on ArXiv"
+        results = []
+        for entry in entries:
+            title = entry.find("atom:title", ns).text.strip()
+            summary = entry.find("atom:summary", ns).text.strip()
+            link = entry.find("atom:id", ns).text.strip()
+            results.append(f"📄 **{title}**\n🔗 {link}\n\n{summary[:1000]}")
+        return "\n\n---\n\n".join(results)
+    except Exception as e:
+        return f"[ArXiv tool error]: {str(e)}"