import os
import re
import requests
import openai
from typing import List
from dotenv import load_dotenv
from langchain_core.tools import tool
from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ImageCaptionLoader, ArxivLoader
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_text_splitters import CharacterTextSplitter
load_dotenv()
@tool
def multiply(a: int, b: int) -> int:
"""
Multiply two integers and return the result
Args:
a: The first integer to multiply
b: The second integer to multiply
Returns:
int: The result of the multiplication
"""
return a * b
@tool
def add(a: int, b: int) -> int:
"""
Add two integers and return the result
Args:
a: The first integer to add
b: The second integer to add
Returns:
int: The result of the addition
"""
return a + b
@tool
def subtract(a: int, b: int) -> int:
"""
Subtract two integers and return the result
Args:
a: The first integer to subtract
b: The second integer to subtract
Returns:
int: The result of the subtraction
"""
return a - b
@tool
def divide(a: int, b: int) -> int:
"""
Divide the first integer by the second integer and return the result
Args:
a: The first integer to divide
b: The second integer to divide
Returns:
int: The result of the division
"""
return a / b
FILE_URL = "https://agents-course-unit4-scoring.hf.space/files/"
@tool
def read_file(task_id: str) -> str:
"""
Download a file based on the task_id and then read the content of the file
Args:
task_id: The id of the task to download the file from
Returns:
str: The content of the file
"""
file_url = f"{FILE_URL}{task_id}"
response = requests.get(file_url, timeout=10, allow_redirects=True)
with open('temp', 'wb') as fp:
fp.write(response.content)
with open('temp') as file:
return file.read()
@tool
def analyze_image(task_id: str) -> str:
"""
Analyze an image based on the task_id and return a description of the content of the image
Args:
task_id: The id of the task to analyze the image from
Returns:
str: The description of the content of the image
"""
file_url = f"{FILE_URL}{task_id}"
image = ImageCaptionLoader(images=[file_url])
return image.load()[0].page_content
@tool
def analyze_audio(task_id: str) -> str:
"""
Analyze an mp3 file based on the task_id and return a description of the content of the audio file
Args:
task_id: The id of the task to analyze the audio file from
Returns:
str: The description of the content of the audio file
"""
file_url = f"{FILE_URL}{task_id}"
response = requests.get(file_url, timeout=10, allow_redirects=True)
temp_file = 'temp.mp3'
with open(temp_file, 'wb') as fp:
fp.write(response.content)
with open(temp_file, "rb") as audio_file:
transcript = openai.audio.transcriptions.create(
file=audio_file,
model="whisper-1"
)
return transcript.text
@tool
def analyze_youtube_video(youtube_url: str, question: str) -> str:
"""
Analyze a youtube video based on the youtube_url and the question and return the answer to the question
Args:
youtube_url: The url of the youtube video to analyze
question: The question to answer based on the youtube video
Returns:
str: The answer to the question
"""
@tool
def web_search(query: str) -> str:
"""
Search the web for the given query and return the results
Args:
query: The query to search the web for
Returns:
str: The text content of the web search results
"""
search_engine = DuckDuckGoSearchResults(output_type="list", num_results=3)
results = search_engine.invoke({"query": query})
page_urls = [url["link"] for url in results]
loader = WebBaseLoader(web_paths=page_urls)
docs = loader.load()
combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)
# Clean up excessive newlines, spaces and strip leading/trailing whitespace
cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)
# Strip leading/trailing whitespace
cleaned_text = cleaned_text.strip()
return cleaned_text
@tool
def wikipedia_search(query: str) -> str:
"""
Search Wikipedia articles with the given query and return the pages
Args:
query: The query to search Wikipedia for
Returns:
str: The text content of the Wikipedia articles related to the query
"""
print("Searching Wikipedia for the query: ", query)
search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return formatted_search_docs
@tool
def arxiv_search(query: str) -> str:
"""
Search arxiv for the given query and return the results
Args:
query: The query to search arxiv for
Returns:
str: The text content of the arxiv search results
"""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content[:1000]}\n'
for doc in search_docs
])
return formatted_search_docs
@tool
def text_splitter(text: str) -> List[str]:
"""
Split a large text into smaller chunks using Langchain's CharacterTextSplitter
Args:
text: The large text to split into smaller chunks
Returns:
List[str]: a list container the smaller chunks of the text
"""
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=10)
return splitter.split_text(text)