Juho Inkinen commited on
Commit
dea4bf5
·
1 Parent(s): 8453868

Initial working app

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from annif_client import AnnifClient
4
+ import os
5
+
6
+
7
+ # Get VLM API base URL and API key from environment variables
8
+ VLM_API_BASE_URL = os.getenv("VLM_API_BASE_URL")
9
+ VLM_API_KEY = os.getenv("VLM_API_KEY", "")
10
+ VLM_API_ENDPOINT = f"{VLM_API_BASE_URL}/v1/chat/completions"
11
+
12
+
13
+ # Initialize Annif client (no arguments)
14
+ annif = AnnifClient()
15
+
16
+
17
+ def get_caption(image):
18
+ # Convert image to base64 JPEG
19
+ import io
20
+ import base64
21
+
22
+ buf = io.BytesIO()
23
+ image.save(buf, format="JPEG")
24
+ img_bytes = buf.getvalue()
25
+ img_b64 = base64.b64encode(img_bytes).decode("utf-8")
26
+
27
+ # Prepare payload for VLM (OpenAI schema)
28
+ payload = {
29
+ "messages": [
30
+ {
31
+ "role": "user",
32
+ "content": [
33
+ {"type": "text", "text": "What is in this image?"},
34
+ {
35
+ "type": "image_url",
36
+ "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
37
+ },
38
+ ],
39
+ }
40
+ ],
41
+ "max_tokens": 300,
42
+ }
43
+ headers = {"X-API-Key": VLM_API_KEY} if VLM_API_KEY else {}
44
+ try:
45
+ response = requests.post(VLM_API_ENDPOINT, json=payload, headers=headers)
46
+ response.raise_for_status()
47
+ data = response.json()
48
+ # Assume caption is in data['choices'][0]['message']['content']
49
+ caption = data["choices"][0]["message"]["content"]
50
+ except Exception as e:
51
+ caption = f"Error: {e}"
52
+ return caption
53
+
54
+
55
+ PROJECT_ID = "yso-en" # Placeholder, update as needed
56
+
57
+
58
+ def get_subjects(caption):
59
+ try:
60
+ results = annif.suggest(project_id=PROJECT_ID, text=caption)
61
+ # Return a dict: {label: score, ...}
62
+ label_scores = {result["label"]: result["score"] for result in results}
63
+ # If no results, return a string error
64
+ if not label_scores:
65
+ return "No subjects found."
66
+ return label_scores
67
+ except Exception as e:
68
+ return f"Error: {e}"
69
+
70
+
71
+ def process_image(image):
72
+ caption = get_caption(image)
73
+ subjects = get_subjects(caption)
74
+ return image, caption, subjects
75
+
76
+
77
+ demo = gr.Interface(
78
+ fn=process_image,
79
+ inputs=gr.Image(type="pil", label="Upload or take a photo"),
80
+ outputs=[
81
+ gr.Image(type="pil", label="Input Image"),
82
+ gr.Textbox(label="Caption"),
83
+ gr.Label(label="Subject Suggestions"),
84
+ ],
85
+ title="VLM Caption & Annif Subject Demo",
86
+ description="Upload or take a photo. The app generates a caption using a Visual Language Model and suggests subjects using Annif.",
87
+ )
88
+
89
+ demo.launch()