Spaces:
Build error
Build error
| """This module contains tools for processing images or videos.""" | |
| import os | |
| import base64 | |
| import mimetypes | |
| from langchain_core.tools import tool | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| def query_image(image_path: str, query: str) -> str: | |
| """Uses a multimodal LLM to answer a query for a given image. | |
| Args: | |
| image_path (str): The path to the image to process | |
| query (str): The query to be answered based on the image | |
| Returns: | |
| str: Answer of the query based on the image | |
| """ | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.0-flash-001", | |
| temperature=0.8, | |
| max_tokens=None, | |
| timeout=None, | |
| max_retries=2, | |
| google_api_key=os.getenv("GOOGLE_API_KEY") # Get API key from environment variable | |
| ) | |
| with open(image_path, "rb") as f: | |
| image_bytes = f.read() | |
| mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg" | |
| image_b64 = base64.b64encode(image_bytes).decode("utf-8") | |
| image_dict = { | |
| "mime_type": mime_type, | |
| "data": image_b64 | |
| } | |
| response = llm.invoke( | |
| input=query, | |
| images=[image_dict] | |
| ) | |
| return response.content | |