File size: 2,857 Bytes
dea4bf5
 
 
 
 
 
 
 
5c3f18e
 
dea4bf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c3f18e
 
dea4bf5
 
 
 
 
 
 
 
 
 
 
5c3f18e
dea4bf5
 
5c3f18e
 
dea4bf5
 
 
 
 
 
 
 
 
5c3f18e
dea4bf5
 
 
5c3f18e
dea4bf5
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import requests
from annif_client import AnnifClient
import os


# Get VLM API base URL and API key from environment variables
VLM_API_BASE_URL = os.getenv("VLM_API_BASE_URL")
if not VLM_API_BASE_URL:
    raise RuntimeError("VLM_API_BASE_URL environment variable must be set.")
VLM_API_KEY = os.getenv("VLM_API_KEY", "")
VLM_API_ENDPOINT = f"{VLM_API_BASE_URL}/v1/chat/completions"


# Initialize Annif client (no arguments)
annif = AnnifClient()


def get_caption(image):
    # Convert image to base64 JPEG
    import io
    import base64

    buf = io.BytesIO()
    image.save(buf, format="JPEG")
    img_bytes = buf.getvalue()
    img_b64 = base64.b64encode(img_bytes).decode("utf-8")

    # Prepare payload for VLM (OpenAI schema)
    payload = {
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What is in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
                    },
                ],
            }
        ],
        "max_tokens": 300,
    }
    headers = {"X-API-Key": VLM_API_KEY} if VLM_API_KEY else {}
    try:
        response = requests.post(VLM_API_ENDPOINT, json=payload, headers=headers)
        response.raise_for_status()
        data = response.json()
        # Assume caption is in data['choices'][0]['message']['content']
        caption = data["choices"][0]["message"]["content"]
    except Exception as e:
        print(f"VLM API error: {e}")  # Detailed error for admin
        raise gr.Error("Sorry, there was a problem generating a caption.")
    return caption


PROJECT_ID = "yso-en"  # Placeholder, update as needed


def get_subjects(caption):
    try:
        results = annif.suggest(project_id=PROJECT_ID, text=caption)
        label_scores = {result["label"]: result["score"] for result in results}
        if not label_scores:
            return {}
        return label_scores
    except Exception as e:
        print(f"Annif API error: {e}")  # Detailed error for admin
        raise gr.Error("Sorry, there was a problem getting subject suggestions.")


def process_image(image):
    caption = get_caption(image)
    subjects = get_subjects(caption)
    return image, caption, subjects


demo = gr.Interface(
    fn=lambda image: process_image(image)[1:],  # Only return caption and subjects
    inputs=gr.Image(type="pil", label="Upload or take a photo"),
    outputs=[
        gr.Textbox(label="Caption"),
        gr.Label(label="Subject Suggestions", show_heading=False),
    ],
    title="VLM Caption & Annif Subject Demo",
    description="Upload or take a photo. The app generates a caption using a Visual Language Model and suggests subjects using Annif.",
)

demo.launch()