FinalAgentProject

Sleeping

File size: 5,216 Bytes

from langchain_core.tools import tool
import wikipediaapi
import pandas as pd
import requests
import fitz  # PyMuPDF
import io
from urllib.parse import urlparse


@tool
def add(a: int, b: int) -> int:
    """
    Sums two values and returns the result of the sum

    Args:
        a: first number
        b: second number
    """
    return a + b

@tool
def subtract(a: int, b: int) -> int:
    """
    Subtracts one value from another and returns the result of the sum

    Args:
        a: first number
        b: second number
    """
    return a - b

@tool
def multiply(a: int, b: int) -> int:
    """
    Multiplies two values and returns the result of the sum

    Args:
        a: first number
        b: second number
    """
    return a * b

@tool
def divide(a: int, b: int) -> int:
    """
    Divides two values and returns the result of the sum

    Args:
        a: numerator
        b: denominator
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def search_wikipedia(page_title: str, language: str) -> str:
    """
    This tool searches Wikipedia for a specific page and returns its text and any HTML tables it contains.

    Args:
        page_title: Title of the Wikipedia page.
        language: Language code (e.g., "en", "es", "fr").

    Returns:
        A string containing the page title, text, and any extracted tables in markdown format.
    """
    try:
        wiki_wiki = wikipediaapi.Wikipedia(
            user_agent='AIAgent (gabriel_abilleira@tutanota.com)',
            language=language,
            extract_format=wikipediaapi.ExtractFormat.HTML
        )

        page = wiki_wiki.page(page_title)

        if not page.exists():
            return f"Error: Page '{page_title}' not found in language '{language}'."

        # Use the URL to read tables
        tables = pd.read_html(page.fullurl)
        markdown_tables = []

        for i, table in enumerate(tables):
            if isinstance(table, pd.DataFrame):
                markdown = table.iloc[:10, :5].to_markdown(index=False)
                markdown_tables.append(f"\n---\n**Table {i + 1}:**\n{markdown}")

        table_output = "\n".join(markdown_tables) if markdown_tables else "No tables found on this page."

        return f"Text: {page.summary[:75]}\n\n{table_output}"

    except Exception as e:
        return f"Error retrieving Wikipedia content: {str(e)}"

@tool
def duckduckgo_search(query: str) -> str:
    """Use DuckDuckGo to search the web for up-to-date information.
        Args:
            query: The query to search for on the web
    """
    url = "https://api.duckduckgo.com/"
    params = {
        "q": query,
        "format": "json",
        "no_redirect": 1,
        "no_html": 1,
        "skip_disambig": 1,
    }

    try:
        response = requests.get(url, params=params)
        data = response.json()

        # Try the most useful fields
        if data.get("AbstractText"):
            return data["AbstractText"]
        elif data.get("Answer"):
            return data["Answer"]
        elif data.get("RelatedTopics"):
            # Return some related results
            results = data["RelatedTopics"][:3]
            return "\n".join(rt.get("Text", "") for rt in results if "Text" in rt)
        else:
            return "No good results found."

    except Exception as e:
        return f"Search failed: {e}"


@tool
def search_papers(query: str) -> str:
    """Search for academic papers and retrieve their content when possible."""

    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": 3,
        "fields": "title,abstract,authors,url,year"
    }

    try:
        response = requests.get(url, params=params)
        data = response.json()

        if not data.get("data"):
            return "No papers found."

        results = []

        for paper in data["data"]:
            title = paper.get("title", "No title")
            authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
            year = paper.get("year", "n.d.")
            abstract = paper.get("abstract", "No abstract available.")
            link = paper.get("url", "")

            full_text = "Full text not available."

            # Attempt to download and parse PDF (for arXiv)
            if "arxiv.org" in link:
                pdf_url = link.replace("abs", "pdf") + ".pdf"
                try:
                    pdf_response = requests.get(pdf_url)
                    doc = fitz.open(stream=pdf_response.content, filetype="pdf")
                    full_text = "\n".join(page.get_text() for page in doc[:3])  # Only first 3 pages
                    doc.close()
                except Exception as pdf_err:
                    full_text = f"Failed to retrieve full text: {pdf_err}"

            result = f"""**{title}** ({year}) by {authors}
                        Abstract: {abstract}
                        Link: {link}
                        Full Text (first pages):\n{full_text}"""

            results.append(result)

        return "\n\n---\n\n".join(results)

    except Exception as e:
        return f"Error fetching papers: {e}"