import gradio as gr import requests from annif_client import AnnifClient import os # Get VLM API base URL and API key from environment variables VLM_API_BASE_URL = os.getenv("VLM_API_BASE_URL") if not VLM_API_BASE_URL: raise RuntimeError("VLM_API_BASE_URL environment variable must be set.") VLM_API_KEY = os.getenv("VLM_API_KEY", "") VLM_API_ENDPOINT = f"{VLM_API_BASE_URL}/v1/chat/completions" # Initialize Annif client (no arguments) annif = AnnifClient() def get_caption(image): # Convert image to base64 JPEG import io import base64 buf = io.BytesIO() image.save(buf, format="JPEG") img_bytes = buf.getvalue() img_b64 = base64.b64encode(img_bytes).decode("utf-8") # Prepare payload for VLM (OpenAI schema) payload = { "messages": [ { "role": "user", "content": [ {"type": "text", "text": "What is in this image?"}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}, }, ], } ], "max_tokens": 300, } headers = {"X-API-Key": VLM_API_KEY} if VLM_API_KEY else {} try: response = requests.post(VLM_API_ENDPOINT, json=payload, headers=headers) response.raise_for_status() data = response.json() # Assume caption is in data['choices'][0]['message']['content'] caption = data["choices"][0]["message"]["content"] except Exception as e: print(f"VLM API error: {e}") # Detailed error for admin raise gr.Error("Sorry, there was a problem generating a caption.") return caption PROJECT_ID = "yso-en" # Placeholder, update as needed def get_subjects(caption): try: results = annif.suggest(project_id=PROJECT_ID, text=caption) label_scores = {result["label"]: result["score"] for result in results} if not label_scores: return {} return label_scores except Exception as e: print(f"Annif API error: {e}") # Detailed error for admin raise gr.Error("Sorry, there was a problem getting subject suggestions.") def process_image(image): caption = get_caption(image) subjects = get_subjects(caption) return image, caption, subjects with gr.Blocks(title="VLM Caption & Annif Subject Demo") as demo: gr.Markdown("# VLM Caption & Annif Subject Demo") gr.Markdown( """ **How it works:** 1. Upload or take a photo in the input section below. 2. The image is sent to a Visual Language Model to generate a caption. 3. Annif suggests subjects based on the caption. """ ) with gr.Row(): with gr.Column(): gr.Markdown("### Input") image_input = gr.Image( type="pil", label="Image Input (upload or take a photo)" ) submit_btn = gr.Button("Submit") clear_btn = gr.Button("Clear") with gr.Column(): gr.Markdown("### Output") caption_output = gr.Textbox(label="Caption", lines=10, interactive=False) subjects_output = gr.Label(label="Subject Suggestions", show_heading=False) def run_app(image): caption, subjects = process_image(image)[1:] return caption, subjects submit_btn.click( run_app, inputs=image_input, outputs=[caption_output, subjects_output] ) clear_btn.click(lambda: ("", {}), outputs=[caption_output, subjects_output]) demo.launch()