Final_Assignment_Template

Build error

Final_Assignment_Template / tools /image_video_tools.py

Add image processing tool

a52d511 8 months ago

1.22 kB

	"""This module contains tools for processing images or videos."""

	import os
	import base64
	import mimetypes
	from langchain_core.tools import tool
	from langchain_google_genai import ChatGoogleGenerativeAI

	@tool
	def query_image(image_path: str, query: str) -> str:
	"""Uses a multimodal LLM to answer a query for a given image.

	Args:
	image_path (str): The path to the image to process
	query (str): The query to be answered based on the image

	Returns:
	str: Answer of the query based on the image
	"""
	llm = ChatGoogleGenerativeAI(
	model="gemini-2.0-flash-001",
	temperature=0.8,
	max_tokens=None,
	timeout=None,
	max_retries=2,
	google_api_key=os.getenv("GOOGLE_API_KEY") # Get API key from environment variable
	)
	with open(image_path, "rb") as f:
	image_bytes = f.read()

	mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg"
	image_b64 = base64.b64encode(image_bytes).decode("utf-8")
	image_dict = {
	"mime_type": mime_type,
	"data": image_b64
	}

	response = llm.invoke(
	input=query,
	images=[image_dict]
	)
	return response.content