File size: 2,062 Bytes
1b34f03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import base64
from llama_index.core.tools import FunctionTool
from llama_index.readers.file.image import ImageReader
from agent_models.models import get_vision_model_client
from agent_prompts.SystemPrompt import vision_model_system_prompt

def get_image_description(image_path: str) -> str:
    """
    Analyzes a local image and returns a text description. This tool is used to "see" what is in an image file.
    Args:
        image_path (str): The local file path of the image to analyze.
    """
    try:
        print(f"Analyzing image at path: {image_path}")

        # Read and encode the image
        with open(image_path, "rb") as img_file:
            b64_image = base64.b64encode(img_file.read()).decode("utf-8")
        b64_url = f"data:image/png;base64,{b64_image}"

        # Get Nebius client
        client = get_vision_model_client()

        # Call Nebius API
        response = client.chat.completions.create(
            model="Qwen/Qwen2-VL-72B-Instruct", 
            messages=[
                {
                    "role": "system",
                    "content": vision_model_system_prompt
                },
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Here is an image."},
                        {"type": "image_url", "image_url": {"url": b64_url}}
                    ]
                }
            ]
        )

        description = response.choices[0].message.content
        print(f"Vision model response: {description}")
        return description

    except Exception as e:
        return f"Error analyzing image: {e}"

# Wrapper function to create the tool for our agent
def get_image_interpreter_tool() -> FunctionTool:
    return FunctionTool.from_defaults(
        fn=get_image_description,
        name="image_interpreter",
        description="A tool to analyze an image from a local file path and return a detailed text description. Use this to 'see' what is in an image file that has already been downloaded."
    )