Spaces:

NatLibFi
/

Caption-Annif-Demo

Sleeping

Juho Inkinen

Add non-abbreviated project names & Kauno project

5e22b19 unverified about 2 months ago

5.2 kB

	import gradio as gr
	import requests
	from annif_client import AnnifClient
	import os


	# Get VLM API base URL and API key from environment variables
	VLM_API_BASE_URL = os.getenv("VLM_API_BASE_URL")
	if not VLM_API_BASE_URL:
	raise RuntimeError("VLM_API_BASE_URL environment variable must be set.")
	VLM_API_KEY = os.getenv("VLM_API_KEY", "")
	VLM_API_ENDPOINT = f"{VLM_API_BASE_URL}/v1/chat/completions"


	# Get Annif API base URL from environment variable, fallback to default
	ANNIF_API_BASE_URL = os.getenv("ANNIF_API_BASE_URL")
	if ANNIF_API_BASE_URL:
	if not ANNIF_API_BASE_URL.endswith("v1/"):
	raise RuntimeError("ANNIF_API_BASE_URL should end with 'v1/'")
	annif = AnnifClient(api_base=ANNIF_API_BASE_URL)
	else:
	annif = AnnifClient()


	def get_caption(image, prompt):
	# Convert image to base64 JPEG
	import io
	import base64

	buf = io.BytesIO()
	image.save(buf, format="JPEG")
	img_bytes = buf.getvalue()
	img_b64 = base64.b64encode(img_bytes).decode("utf-8")

	# Prepare payload for VLM (OpenAI schema)
	payload = {
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
	},
	],
	}
	],
	"max_tokens": 300,
	}
	headers = {"X-API-Key": VLM_API_KEY} if VLM_API_KEY else {}
	try:
	response = requests.post(VLM_API_ENDPOINT, json=payload, headers=headers)
	response.raise_for_status()
	data = response.json()
	# Assume caption is in data['choices'][0]['message']['content']
	caption = data["choices"][0]["message"]["content"]
	except Exception as e:
	print(f"VLM API error: {e}") # Detailed error for admin
	raise gr.Error("Sorry, there was a problem generating a caption.")
	return caption


	def get_subjects(caption, project_id):
	try:
	results = annif.suggest(project_id=project_id, text=caption)
	label_scores = {result["label"]: result["score"] for result in results}
	if not label_scores:
	return {}
	return label_scores
	except Exception as e:
	print(f"Annif API error: {e}") # Detailed error for admin
	raise gr.Error("Sorry, there was a problem getting subject suggestions.")


	def process_image(image, project_id):
	prompt = (
	'Luo vaihtoehtoinen tekstikuvaus, joka on tarkoitettu henkilöille, jotka eivät näe kuvaa. '
	'Kuvaile kuvan todellista sisältöä, älä tulkitse mitään. '
	'Aloita yleisellä kuvauksella ja siirry sitten yksityiskohtiin. '
	'Kuvaile yksityiskohtia ainakin viiden lauseen verran. '
	'Jos kuvassa näkyy tekstiä, kerro mitä siinä lukee ja jos teksti ei ole suomea, käännä se myös suomeksi. '
	'Vastaa vain lopullisella alt-tekstillä, älä lisää "tässä on alt-teksti", selityksiä tai väliotsikoita. '
	)
	caption = get_caption(image, prompt)
	subjects = get_subjects(caption, project_id)
	return image, caption, subjects


	with gr.Blocks(title="VLM Caption & Annif Demo") as demo:
	gr.Markdown("# VLM Caption & Annif Demo")
	gr.Markdown(
	"""
	How it works:
	1. Upload or take a photo in the input section below.
	2. The image is sent to a Visual Language Model to generate a caption.
	3. Annif suggests subjects based on the caption.
	"""
	)
	with gr.Row():
	with gr.Column():
	gr.Markdown("### Input")
	image_input = gr.Image(
	type="pil", label="Image Input (upload or take a photo)", mirror_webcam=False,
	)
	project_dropdown = gr.Dropdown(
	choices=[("YSO Finnish - Yleinen suomalainen ontologia", "yso-fi"),
	("YKL Finnish - Yleisten kirjastojen luokitusjärjestelmä ", "ykl-fi"),
	("KAUNO Finnish - Fiktiivisen aineiston ontologia ", "kauno-fi")
	],
	value="yso-fi",
	label="Annif Project",
	info="Select the vocabulary from where subject suggestions are drawn",
	)
	submit_btn = gr.Button("Submit", interactive=False)
	clear_btn = gr.Button("Clear")
	with gr.Column():
	gr.Markdown("### Output")
	caption_output = gr.Textbox(label="Caption", lines=10, interactive=False)
	subjects_output = gr.Label(label="Subject Suggestions", show_heading=False)

	def run_app(image, project_id):
	caption, subjects = process_image(image, project_id)[1:]
	return caption, subjects

	submit_btn.click(
	run_app,
	inputs=[image_input, project_dropdown],
	outputs=[caption_output, subjects_output],
	)
	clear_btn.click(lambda: ("", {}), outputs=[caption_output, subjects_output])

	def update_submit_btn(img):
	return gr.update(interactive=img is not None)

	image_input.upload(update_submit_btn, inputs=image_input, outputs=submit_btn)

	demo.launch()