import os from smolagents import tool from google import genai from google.genai import types # Initialize client once client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY")) @tool def analyze_video(video_source: str, question: str) -> str: """ Analyzes a video (YouTube URL or local file path) to answer a specific question. Args: video_source: The YouTube URL or the local path to the video file. question: The question you want to ask about the video content. """ # 1. Handle YouTube vs Local if "youtube.com" in video_source or "youtu.be" in video_source: video_part = types.Part(file_data=types.FileData(file_uri=video_source)) else: # Upload local file to Gemini's File API (stored for 48h) uploaded_file = client.files.upload(file=video_source) video_part = types.Part(file_data=types.FileData(file_uri=uploaded_file.uri)) # 2. Generate content response = client.models.generate_content( model="gemini-2.5-flash", contents=[video_part, question] ) return response.text @tool def analyze_image(image_path: str, question: str) -> str: """ Uses native vision to analyze an image file and answer questions about it. Args: image_path: Path to the image file (jpg, png, webp). question: What you want to know about the image. """ uploaded_file = client.files.upload(file=image_path) response = client.models.generate_content( model="gemini-2.5-flash", contents=[uploaded_file, question] ) return response.text @tool def analyze_audio(audio_path: str, question: str) -> str: """ Analyzes audio files (mp3, wav) to transcribe or answer questions about content and tone. Args: audio_path: Path to the audio file. question: The question or instruction (e.g., 'Summarize the mood' or 'Transcribe this'). """ uploaded_file = client.files.upload(file=audio_path) response = client.models.generate_content( model="gemini-2.5-flash", contents=[uploaded_file, question] ) return response.text # approach inspired by: https://huggingface.co/spaces/DeekshithN05/Final_Assignment_Template/blob/main/agent.py