Spaces:

DakshChaudhary
/

Agent_GAIA_Benchmark

Sleeping

File size: 2,062 Bytes

1b34f03

import os
import base64
from llama_index.core.tools import FunctionTool
from llama_index.readers.file.image import ImageReader
from agent_models.models import get_vision_model_client
from agent_prompts.SystemPrompt import vision_model_system_prompt

def get_image_description(image_path: str) -> str:
    """
    Analyzes a local image and returns a text description. This tool is used to "see" what is in an image file.
    Args:
        image_path (str): The local file path of the image to analyze.
    """
    try:
        print(f"Analyzing image at path: {image_path}")

        # Read and encode the image
        with open(image_path, "rb") as img_file:
            b64_image = base64.b64encode(img_file.read()).decode("utf-8")
        b64_url = f"data:image/png;base64,{b64_image}"

        # Get Nebius client
        client = get_vision_model_client()

        # Call Nebius API
        response = client.chat.completions.create(
            model="Qwen/Qwen2-VL-72B-Instruct", 
            messages=[
                {
                    "role": "system",
                    "content": vision_model_system_prompt
                },
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Here is an image."},
                        {"type": "image_url", "image_url": {"url": b64_url}}
                    ]
                }
            ]
        )

        description = response.choices[0].message.content
        print(f"Vision model response: {description}")
        return description

    except Exception as e:
        return f"Error analyzing image: {e}"

# Wrapper function to create the tool for our agent
def get_image_interpreter_tool() -> FunctionTool:
    return FunctionTool.from_defaults(
        fn=get_image_description,
        name="image_interpreter",
        description="A tool to analyze an image from a local file path and return a detailed text description. Use this to 'see' what is in an image file that has already been downloaded."
    )