FinalAgentProject / tools.py
WeByT3's picture
Update tools.py
e9461d2 verified
raw
history blame
5.62 kB
from langchain_core.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_community.document_loaders import WikipediaLoader
import pandas as pd
import whisper
import os
import tempfile
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from PIL import Image
import torch
whisper_model = whisper.load_model("base")
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load model and processor once
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", device_map="auto").to(device)
@tool
def add(a: int, b: int) -> int:
"""
Sums two values and returns the result of the sum
Args:
a: first number
b: second number
"""
return a + b
@tool
def substract(a: int, b: int) -> int:
"""
Subtracts one value from another and returns the result of the sum
Args:
a: first number
b: second number
"""
return a - b
@tool
def multiply(a: int, b: int) -> int:
"""
Multiplies two values and returns the result of the sum
Args:
a: first number
b: second number
"""
return a * b
@tool
def divide(a: int, b: int) -> int:
"""
Divides two values and returns the result of the sum
Args:
a: numerator
b: denominator
"""
if b == 0:
raise ValueError("Cannot divide by zero.")
return a / b
@tool
def analyze_image(image_path: str, question: str = "What’s in this image?") -> str:
"""
Analyzes an image and answers a question about it using BLIP-2.
Args:
image_path (str): Path to the image file.
question (str): A natural language question about the image.
"""
try:
image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, text=question, return_tensors="pt").to(device)
generated_ids = model.generate(**inputs, max_new_tokens=100)
response = processor.decode(generated_ids[0], skip_special_tokens=True)
return response
except Exception as e:
return f"Error analyzing image: {str(e)}"
@tool
def read_excel_summary(file_path: str) -> str:
"""
Reads an Excel file and returns basic summary statistics, column names, and row count.
Args:
file_path: The path to the Excel file
"""
try:
df = pd.read_excel(file_path, engine="openpyxl")
info = {
"columns": df.columns.tolist(),
"num_rows": len(df),
"summary": df.describe(include='all').to_dict()
}
return str(info)
except Exception as e:
return f"Error reading Excel file: {str(e)}"
@tool
def web_search(query: str) -> str:
"""Search Tavily for a query and return maximum 3 results.
Args:
query: The search query."""
search_docs = TavilySearchResults(max_results=3).invoke(query=query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"web_results": formatted_search_docs}
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results.
Args:
query: The search query."""
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
@tool
def transcribe_audio(file_path: str) -> str:
"""
Transcribes an audio file (MP3, WAV, etc.) to text using Whisper.
Returns the transcribed text.
Args:
file_path: the path to the audio file
"""
try:
result = whisper_model.transcribe(file_path)
return result["text"]
except Exception as e:
return f"Error during transcription: {str(e)}"
@tool
def arvix_search(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
This tool allows for searching papers and articles
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
for doc in search_docs
])
return {"arvix_results": formatted_search_docs}
@tool
def fetch_youtube_transcript(video_url: str) -> str:
"""
Fetch transcript of a YouTube video by URL.
Returns plain text if transcript is available.
Args:
video_url: The YouTube url link to the video
"""
import re
video_id_match = re.search(r"(?:v=|youtu\.be/)([\w-]+)", video_url)
if not video_id_match:
return "Invalid YouTube URL."
video_id = video_id_match.group(1)
try:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
transcript_text = " ".join([entry['text'] for entry in transcript_list])
return transcript_text[:4000] # Limit to fit into context window
except Exception as e:
return f"Transcript not available or error: {str(e)}"