GAIA_agents / tools /tools.py
ManasiPandit's picture
Update tools/tools.py
f4dca53 verified
import os
import numpy
import tempfile
import requests
import whisper
import imageio
import yt_dlp
from PIL import Image
from typing import List, Optional
from urllib.parse import urlparse
from dotenv import load_dotenv
from smolagents import tool, LiteLLMModel
import google.generativeai as genai
from pytesseract import image_to_string
load_dotenv()
MODEL_ID = "gemini-2.5-flash"
# Vision Tool
@tool
def vision_tool(prompt: str, image_list: List[Image.Image]) -> str:
"""
Analyzes one or more images using a multimodal model.
Args:
prompt (str): The user question or task.
image_list (List[PIL.Image.Image]): A list of image objects.
Returns:
str: Model's response to the prompt about the images.
"""
model = LiteLLMModel(model_id=MODEL_ID, api_key=os.getenv("GEMINI_API"), temperature=0.2)
payload = [{"type": "text", "text": prompt}] + [{"type": "image", "image": img} for img in image_list]
return model([{"role": "user", "content": payload}]).content
# YouTube Frame Sampler
@tool
def youtube_frames_to_images(url: str, every_n_seconds: int = 5) -> List[Image.Image]:
"""
Downloads a YouTube video and extracts frames at regular intervals.
Args:
url (str): The URL of the YouTube video to process.
every_n_seconds (int): The time interval in seconds between extracted frames.
Returns:
List[Image.Image]: A list of sampled frames as PIL images.
"""
with tempfile.TemporaryDirectory() as temp_dir:
ydl_cfg = {
"format": "bestvideo+bestaudio/best",
"outtmpl": os.path.join(temp_dir, "yt_video.%(ext)s"),
"merge_output_format": "mp4",
"quiet": True,
"force_ipv4": True
}
with yt_dlp.YoutubeDL(ydl_cfg) as ydl:
ydl.extract_info(url, download=True)
video_file = next((os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.mp4')), None)
reader = imageio.get_reader(video_file)
fps = reader.get_meta_data().get("fps", 30)
interval = int(fps * every_n_seconds)
return [Image.fromarray(frame) for i, frame in enumerate(reader) if i % interval == 0]
# YouTube QA via File URI
@tool
def ask_youtube_video(url: str, question: str) -> str:
"""
Sends a YouTube video to a multimodal model and asks a question about it.
Args:
url (str): The URI of the video file (already uploaded and hosted).
question (str): The natural language question to ask about the video.
Returns:
str: The model's answer to the question.
"""
try:
client = genai.Client(api_key=os.getenv('GEMINI_API'))
response = client.generate_content(
model=MODEL_ID,
contents=[
{"role": "user", "parts": [
{"text": question},
{"file_data": {"file_uri": url}}
]}
]
)
return response.text
except Exception as e:
return f"Error asking {MODEL_ID} about video: {str(e)}"
# File Reading Tool
@tool
def read_text_file(file_path: str) -> str:
"""
Reads plain text content from a file.
Args:
file_path (str): The full path to the text file.
Returns:
str: The contents of the file, or an error message.
"""
try:
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
except Exception as e:
return f"Error reading file: {e}"
# File Downloader
@tool
def file_from_url(url: str, save_as: Optional[str] = None) -> str:
"""
Downloads a file from a URL and saves it locally.
Args:
url (str): The URL of the file to download.
save_as (Optional[str]): Optional filename to save the file as.
Returns:
str: The local file path or an error message.
"""
try:
if not save_as:
parsed = urlparse(url)
save_as = os.path.basename(parsed.path) or f"file_{os.urandom(4).hex()}"
file_path = os.path.join(tempfile.gettempdir(), save_as)
response = requests.get(url, stream=True)
response.raise_for_status()
with open(file_path, "wb") as f:
for chunk in response.iter_content(1024):
f.write(chunk)
return f"File saved to {file_path}"
except Exception as e:
return f"Download failed: {e}"
# Audio Transcription (YouTube)
@tool
def transcribe_youtube(yt_url: str) -> str:
"""
Transcribes the audio from a YouTube video using Whisper.
Args:
yt_url (str): The URL of the YouTube video.
Returns:
str: The transcribed text of the video.
"""
model = whisper.load_model("small")
with tempfile.TemporaryDirectory() as tempdir:
ydl_opts = {
"format": "bestaudio",
"outtmpl": os.path.join(tempdir, "audio.%(ext)s"),
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav"
}],
"quiet": True,
"force_ipv4": True
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.extract_info(yt_url, download=True)
wav_file = next((os.path.join(tempdir, f) for f in os.listdir(tempdir) if f.endswith(".wav")), None)
return model.transcribe(wav_file)['text']
# Audio File Transcriber
@tool
def audio_to_text(audio_path: str) -> str:
"""
Transcribes an uploaded audio file into text using Whisper.
Args:
audio_path (str): The local file path to the audio file.
Returns:
str: The transcribed text or an error message.
"""
try:
model = whisper.load_model("small")
result = model.transcribe(audio_path)
return result['text']
except Exception as e:
return f"Failed to transcribe: {e}"
# OCR
@tool
def extract_text_via_ocr(image_path: str) -> str:
"""
Extracts text from an image using Optical Character Recognition (OCR).
Args:
image_path (str): The local path to the image file.
Returns:
str: The extracted text or an error message.
"""
try:
img = Image.open(image_path)
return image_to_string(img)
except Exception as e:
return f"OCR failed: {e}"
# CSV Analyzer
@tool
def summarize_csv_data(path: str, query: str = "") -> str:
"""
Provides a summary of the contents of a CSV file.
Args:
path (str): The file path to the CSV file.
query (str): Optional query to run on the data.
Returns:
str: Summary statistics and column details or an error message.
"""
try:
import pandas as pd
df = pd.read_csv(path)
return f"Loaded CSV with {len(df)} rows. Columns: {list(df.columns)}\n\n{df.describe()}"
except Exception as e:
return f"CSV error: {e}"
# Excel Analyzer
@tool
def summarize_excel_data(path: str, query: str = "") -> str:
"""
Provides a summary of the contents of an Excel file.
Args:
path (str): The file path to the Excel file (.xls or .xlsx).
query (str): Optional query to run on the data.
Returns:
str: Summary statistics and column details or an error message.
"""
try:
import pandas as pd
df = pd.read_excel(path)
return f"Excel file with {len(df)} rows. Columns: {list(df.columns)}\n\n{df.describe()}"
except Exception as e:
return f"Excel error: {e}"