|
|
import os |
|
|
from smolagents import tool |
|
|
from google import genai |
|
|
from google.genai import types |
|
|
|
|
|
|
|
|
client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY")) |
|
|
|
|
|
@tool |
|
|
def analyze_video(video_source: str, question: str) -> str: |
|
|
""" |
|
|
Analyzes a video (YouTube URL or local file path) to answer a specific question. |
|
|
Args: |
|
|
video_source: The YouTube URL or the local path to the video file. |
|
|
question: The question you want to ask about the video content. |
|
|
""" |
|
|
|
|
|
if "youtube.com" in video_source or "youtu.be" in video_source: |
|
|
video_part = types.Part(file_data=types.FileData(file_uri=video_source)) |
|
|
else: |
|
|
|
|
|
uploaded_file = client.files.upload(file=video_source) |
|
|
video_part = types.Part(file_data=types.FileData(file_uri=uploaded_file.uri)) |
|
|
|
|
|
|
|
|
response = client.models.generate_content( |
|
|
model="gemini-2.5-flash", |
|
|
contents=[video_part, question] |
|
|
) |
|
|
return response.text |
|
|
|
|
|
@tool |
|
|
def analyze_image(image_path: str, question: str) -> str: |
|
|
""" |
|
|
Uses native vision to analyze an image file and answer questions about it. |
|
|
Args: |
|
|
image_path: Path to the image file (jpg, png, webp). |
|
|
question: What you want to know about the image. |
|
|
""" |
|
|
uploaded_file = client.files.upload(file=image_path) |
|
|
response = client.models.generate_content( |
|
|
model="gemini-2.5-flash", |
|
|
contents=[uploaded_file, question] |
|
|
) |
|
|
return response.text |
|
|
|
|
|
@tool |
|
|
def analyze_audio(audio_path: str, question: str) -> str: |
|
|
""" |
|
|
Analyzes audio files (mp3, wav) to transcribe or answer questions about content and tone. |
|
|
Args: |
|
|
audio_path: Path to the audio file. |
|
|
question: The question or instruction (e.g., 'Summarize the mood' or 'Transcribe this'). |
|
|
""" |
|
|
uploaded_file = client.files.upload(file=audio_path) |
|
|
response = client.models.generate_content( |
|
|
model="gemini-2.5-flash", |
|
|
contents=[uploaded_file, question] |
|
|
) |
|
|
return response.text |
|
|
|
|
|
|
|
|
|
|
|
|