GaiaAgent_Final_Assignment

Sleeping

App Files Files Community

GaiaAgent_Final_Assignment / tools /gemini_native_tools.py

Francesco-A

Update gemini_native_tools.py

697ab39 about 1 month ago

raw

history blame contribute delete

2.25 kB

	import os
	from smolagents import tool
	from google import genai
	from google.genai import types

	# Initialize client once
	client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))

	@tool
	def analyze_video(video_source: str, question: str) -> str:
	"""
	Analyzes a video (YouTube URL or local file path) to answer a specific question.
	Args:
	video_source: The YouTube URL or the local path to the video file.
	question: The question you want to ask about the video content.
	"""
	# 1. Handle YouTube vs Local
	if "youtube.com" in video_source or "youtu.be" in video_source:
	video_part = types.Part(file_data=types.FileData(file_uri=video_source))
	else:
	# Upload local file to Gemini's File API (stored for 48h)
	uploaded_file = client.files.upload(file=video_source)
	video_part = types.Part(file_data=types.FileData(file_uri=uploaded_file.uri))

	# 2. Generate content
	response = client.models.generate_content(
	model="gemini-2.5-flash",
	contents=[video_part, question]
	)
	return response.text

	@tool
	def analyze_image(image_path: str, question: str) -> str:
	"""
	Uses native vision to analyze an image file and answer questions about it.
	Args:
	image_path: Path to the image file (jpg, png, webp).
	question: What you want to know about the image.
	"""
	uploaded_file = client.files.upload(file=image_path)
	response = client.models.generate_content(
	model="gemini-2.5-flash",
	contents=[uploaded_file, question]
	)
	return response.text

	@tool
	def analyze_audio(audio_path: str, question: str) -> str:
	"""
	Analyzes audio files (mp3, wav) to transcribe or answer questions about content and tone.
	Args:
	audio_path: Path to the audio file.
	question: The question or instruction (e.g., 'Summarize the mood' or 'Transcribe this').
	"""
	uploaded_file = client.files.upload(file=audio_path)
	response = client.models.generate_content(
	model="gemini-2.5-flash",
	contents=[uploaded_file, question]
	)
	return response.text


	# approach inspired by: https://huggingface.co/spaces/DeekshithN05/Final_Assignment_Template/blob/main/agent.py