Spaces:

NatLibFi
/

Caption-Annif-Demo

Running

Juho Inkinen

Improved error handling and deduplication of image display etc.

5c3f18e 2 months ago

2.86 kB

	import gradio as gr
	import requests
	from annif_client import AnnifClient
	import os


	# Get VLM API base URL and API key from environment variables
	VLM_API_BASE_URL = os.getenv("VLM_API_BASE_URL")
	if not VLM_API_BASE_URL:
	raise RuntimeError("VLM_API_BASE_URL environment variable must be set.")
	VLM_API_KEY = os.getenv("VLM_API_KEY", "")
	VLM_API_ENDPOINT = f"{VLM_API_BASE_URL}/v1/chat/completions"


	# Initialize Annif client (no arguments)
	annif = AnnifClient()


	def get_caption(image):
	# Convert image to base64 JPEG
	import io
	import base64

	buf = io.BytesIO()
	image.save(buf, format="JPEG")
	img_bytes = buf.getvalue()
	img_b64 = base64.b64encode(img_bytes).decode("utf-8")

	# Prepare payload for VLM (OpenAI schema)
	payload = {
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "What is in this image?"},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
	},
	],
	}
	],
	"max_tokens": 300,
	}
	headers = {"X-API-Key": VLM_API_KEY} if VLM_API_KEY else {}
	try:
	response = requests.post(VLM_API_ENDPOINT, json=payload, headers=headers)
	response.raise_for_status()
	data = response.json()
	# Assume caption is in data['choices'][0]['message']['content']
	caption = data["choices"][0]["message"]["content"]
	except Exception as e:
	print(f"VLM API error: {e}") # Detailed error for admin
	raise gr.Error("Sorry, there was a problem generating a caption.")
	return caption


	PROJECT_ID = "yso-en" # Placeholder, update as needed


	def get_subjects(caption):
	try:
	results = annif.suggest(project_id=PROJECT_ID, text=caption)
	label_scores = {result["label"]: result["score"] for result in results}
	if not label_scores:
	return {}
	return label_scores
	except Exception as e:
	print(f"Annif API error: {e}") # Detailed error for admin
	raise gr.Error("Sorry, there was a problem getting subject suggestions.")


	def process_image(image):
	caption = get_caption(image)
	subjects = get_subjects(caption)
	return image, caption, subjects


	demo = gr.Interface(
	fn=lambda image: process_image(image)[1:], # Only return caption and subjects
	inputs=gr.Image(type="pil", label="Upload or take a photo"),
	outputs=[
	gr.Textbox(label="Caption"),
	gr.Label(label="Subject Suggestions", show_heading=False),
	],
	title="VLM Caption & Annif Subject Demo",
	description="Upload or take a photo. The app generates a caption using a Visual Language Model and suggests subjects using Annif.",
	)

	demo.launch()