File size: 2,247 Bytes
697ab39 5fbd0a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
from smolagents import tool
from google import genai
from google.genai import types
# Initialize client once
client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
@tool
def analyze_video(video_source: str, question: str) -> str:
"""
Analyzes a video (YouTube URL or local file path) to answer a specific question.
Args:
video_source: The YouTube URL or the local path to the video file.
question: The question you want to ask about the video content.
"""
# 1. Handle YouTube vs Local
if "youtube.com" in video_source or "youtu.be" in video_source:
video_part = types.Part(file_data=types.FileData(file_uri=video_source))
else:
# Upload local file to Gemini's File API (stored for 48h)
uploaded_file = client.files.upload(file=video_source)
video_part = types.Part(file_data=types.FileData(file_uri=uploaded_file.uri))
# 2. Generate content
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[video_part, question]
)
return response.text
@tool
def analyze_image(image_path: str, question: str) -> str:
"""
Uses native vision to analyze an image file and answer questions about it.
Args:
image_path: Path to the image file (jpg, png, webp).
question: What you want to know about the image.
"""
uploaded_file = client.files.upload(file=image_path)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[uploaded_file, question]
)
return response.text
@tool
def analyze_audio(audio_path: str, question: str) -> str:
"""
Analyzes audio files (mp3, wav) to transcribe or answer questions about content and tone.
Args:
audio_path: Path to the audio file.
question: The question or instruction (e.g., 'Summarize the mood' or 'Transcribe this').
"""
uploaded_file = client.files.upload(file=audio_path)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[uploaded_file, question]
)
return response.text
# approach inspired by: https://huggingface.co/spaces/DeekshithN05/Final_Assignment_Template/blob/main/agent.py
|