Final_Assignment_Template

Sleeping

File size: 8,663 Bytes

e1dc6ad

import os
import re
import requests
import pandas as pd
from typing import List
from dotenv import load_dotenv

from google import genai
from google.genai import types

from langchain_core.tools import tool
from langchain.document_loaders import WebBaseLoader
from langchain_experimental.tools import PythonREPLTool
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.retrievers import WikipediaRetriever
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_community.document_loaders import ImageCaptionLoader, AssemblyAIAudioTranscriptLoader


load_dotenv()
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


def duckduck_websearch(query: str) -> str:
    """
    Performs a web search using the given query, downloads the content of two relevant web pages,
    and returns their combined content as a raw string.

    This is useful when the task requires analysis of web page content, such as retrieving poems, 
    changelogs, or other textual resources.

    Args:
        query (str): The search query.

    Returns:
        str: The combined raw text content of the two retrieved web pages.
    """
    search_engine = DuckDuckGoSearchResults(output_format="list", num_results=2)
    page_urls = [url["link"] for url in search_engine(query)]

    loader = WebBaseLoader(web_paths=(page_urls))
    docs = loader.load()

    combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)

    # Clean up excessive newlines, spaces and strip leading/trailing whitespace
    cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
    cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)

    # Strip leading/trailing whitespace
    cleaned_text = cleaned_text.strip()
    return cleaned_text


def serper_websearch(query: str) -> str:
    """
    Performs a web search using the given query with SERPER Search Engine

    Args:
        query (str): The search query.
    
    Returns:
        str: the search result
    """
    search = GoogleSerperAPIWrapper(serper_api_key=os.getenv("SERPER_API_KEY"))
    results = search.run(query)
    return results

def visit_webpage(url: str) -> str:
    """
    Fetches raw HTML content of a web page.
    
    Args:
        url: the webpage url
    
    Returns:
        str: The combined raw text content of the webpage
    """
    try:
        response = requests.get(url, timeout=5)
        return response.text[:5000]
    except Exception as e:
        return f"[ERROR fetching {url}]: {str(e)}"

def wiki_search(query: str) -> str:
    """
    Searches for a Wikipedia articles using the provided query and returns the content of the corresponding Wikipedia pages.

    Args:
        query (str): The search term to look up on Wikipedia.

    Returns:
        str: The text content of the Wikipedia articles related to the query.
    """
    retriever = WikipediaRetriever()
    docs = retriever.invoke(query)
    combined_text = "\n\n".join(doc.page_content for doc in docs)
    return combined_text

def youtube_viewer(youtube_url: str, question: str) -> str:
    """
    Analyzes a YouTube video from the provided URL and returns an answer 
    to the given question based on the analysis results.

    Args:
        youtube_url (str): The URL of the YouTube video, in the format 
            "https://www.youtube.com/...".
        question (str): A question related to the content of the video.

    Returns:
        str: An answer to the question based on the video's content.
    """
    client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
    response = client.models.generate_content(
        model='models/gemini-2.5-flash-preview-04-17',
        contents=types.Content(
            parts=[
                types.Part(
                    file_data=types.FileData(file_uri=youtube_url)
                ),
                types.Part(text=question)
            ]
        )
    )
    return response.text

def text_splitter(text: str) -> List[str]:
    """
    Splits text into chunks using LangChain's CharacterTextSplitter.
    
    Args:
        text: A string of text to split.
    
    Returns:
        List[str]: a list of split text
    """
    splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10)
    return splitter.split_text(text)

def read_file(task_id: str) -> str:
    """
    First download the file, then read its content
    
    Args:
        dir: the task_id
    
    Returns:
        str: the file content
    """
    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
    r = requests.get(file_url, timeout=15, allow_redirects=True)
    with open('temp', "wb") as fp:
        fp.write(r.content)
    with open('temp') as f:
        return f.read()

def excel_read(task_id: str) -> str:
    """
    First download the excel file, then read its content
    
    Args:
        dir: the task_id
    
    Returns:
        str: the content of excel file
    """
    try:
        file_url = f'{DEFAULT_API_URL}/files/{task_id}'
        r = requests.get(file_url, timeout=15, allow_redirects=True)
        with open('temp.xlsx', "wb") as fp:
            fp.write(r.content)
        # Read the Excel file
        df = pd.read_excel('temp.xlsx')
        # Run various analyses based on the query
        result = (
            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        )
        result += f"Columns: {', '.join(df.columns)}\n\n"
        # Add summary statistics
        result += "Summary statistics:\n"
        result += str(df.describe())
        return result
    except Exception as e:
        return f"Error analyzing Excel file: {str(e)}"
   
def csv_read(task_id: str) -> str:
    """
    First download the csv file, then read its content
    
    Args:
        dir: the task_id
    
    Returns:
        str: the content of csv file
    """
    try:
        file_url = f'{DEFAULT_API_URL}/files/{task_id}'
        r = requests.get(file_url, timeout=15, allow_redirects=True)
        with open('temp.csv', "wb") as fp:
            fp.write(r.content)
        # Read the CSV file
        df = pd.read_csv('temp.csv')
        # Run various analyses based on the query
        result = (
            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        )
        result += f"Columns: {', '.join(df.columns)}\n\n"
        # Add summary statistics
        result += "Summary statistics:\n"
        result += str(df.describe())
        return result
    except Exception as e:
        return f"Error analyzing CSV file: {str(e)}"


def mp3_listen(task_id: str) -> str:
    """
    First download the mp3 file, then listen to it
    
    Args:
        dir: the task_id
    
    Returns:
        str: the content of mp3 file
    """
    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
    r = requests.get(file_url, timeout=15, allow_redirects=True)
    with open('temp.mp3', "wb") as fp:
        fp.write(r.content)
    loader = AssemblyAIAudioTranscriptLoader(file_path="temp.mp3", api_key=os.getenv("AssemblyAI_API_KEY"))
    docs = loader.load()
    contents = [doc.page_content for doc in docs]
    return "\n".join(contents)
    

def image_caption(dir: str) -> str:
    """
    Understand the content of the provided image
    
    Args:
        dir: the image url link
    
    Returns:
        str: the image caption
    """
    loader = ImageCaptionLoader(images=[dir])
    metadata = loader.load()
    return metadata[0].page_content


def run_python(code: str):
    """ Run the given python code
    
    Args:
        code: the python code
    """
    return PythonREPLTool().run(code)

def multiply(a: float, b: float) -> float:
    """
    Multiply two numbers.
    
    Args:
        a: first float
        b: second float
    
    Returns:
        float: the multiplication of a and b
    """
    return a * b

def add(a: float, b: float) -> float:
    """
    Add two numbers.
    
    Args:
        a: first float
        b: second float
    
    Returns:
        float: the sum of a and b
    """
    return a + b

def subtract(a: float, b: float) -> float:
    """
    Subtract two numbers.
    
    Args:
        a: first float
        b: second float
    
    Returns:
        float: the result after a subtracted by b
    """
    return a - b

def divide(a: float, b: float) -> float:
    """Divide two numbers.
    
    Args:
        a: first float
        b: second float
    
    Returns:
        float: the result after a divided by b
    """
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b