Chris Addis commited on
Commit
8a877e4
·
1 Parent(s): 7802592

different design

Browse files
Files changed (2) hide show
  1. app-Copy1.py +380 -0
  2. app.py +311 -138
app-Copy1.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import io
5
+ import os
6
+ import requests
7
+ import json
8
+ from dotenv import load_dotenv
9
+ import openai
10
+ import base64
11
+ import csv
12
+ import tempfile
13
+ import datetime
14
+
15
+ # Load environment variables from .env file if it exists (for local development)
16
+ # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
+ if os.path.exists(".env"):
18
+ load_dotenv()
19
+
20
+ from io import BytesIO
21
+ import numpy as np
22
+ import requests
23
+ from PIL import Image
24
+
25
+ # import libraries
26
+ from library.utils_model import *
27
+ from library.utils_html import *
28
+ from library.utils_prompt import *
29
+
30
+ OR = OpenRouterAPI()
31
+ gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
32
+
33
+ # Path for storing user preferences
34
+ PREFERENCES_FILE = "data/user_preferences.csv"
35
+
36
+ # Ensure directory exists
37
+ os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
+
39
+ def get_sys_prompt(length="medium"):
40
+ if length == "short":
41
+ dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
42
+ elif length == "medium":
43
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
44
+ else:
45
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
+ return dev_prompt
47
+
48
+ # This function is no longer needed since we removed A/B testing
49
+
50
+ def create_csv_file_simple(results):
51
+ """Create a CSV file from the results and return the path"""
52
+ # Create a temporary file
53
+ fd, path = tempfile.mkstemp(suffix='.csv')
54
+
55
+ with os.fdopen(fd, 'w', newline='') as f:
56
+ writer = csv.writer(f)
57
+ # Write header
58
+ writer.writerow(['image_id', 'content'])
59
+ # Write data
60
+ for result in results:
61
+ writer.writerow([
62
+ result.get('image_id', ''),
63
+ result.get('content', '')
64
+ ])
65
+
66
+ return path
67
+
68
+ # Extract original filename without path or extension
69
+ def get_base_filename(filepath):
70
+ if not filepath:
71
+ return ""
72
+ # Get the basename (filename with extension)
73
+ basename = os.path.basename(filepath)
74
+ # Remove extension
75
+ filename = os.path.splitext(basename)[0]
76
+ return filename
77
+
78
+ # Define the Gradio interface
79
+ def create_demo():
80
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
+ # Replace the existing logo code section:
82
+ with gr.Row():
83
+ with gr.Column(scale=3):
84
+ gr.Markdown("# AI Alt-text Generator")
85
+ gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
+ gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
+ with gr.Column(scale=1):
88
+ with gr.Row():
89
+ # Use gr.Image with all interactive features disabled
90
+ gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
+ interactive=False, show_download_button=False,
92
+ show_share_button=False, show_fullscreen_button=False,
93
+ container=False)
94
+ gr.Image("images/nml_logo.png", show_label=False, height=120,
95
+ interactive=False, show_download_button=False,
96
+ show_share_button=False, show_fullscreen_button=False,
97
+ container=False)
98
+
99
+
100
+ with gr.Row():
101
+ # Left column: Controls and uploads
102
+ with gr.Column(scale=1):
103
+ # Upload interface
104
+ upload_button = gr.UploadButton(
105
+ "Click to Upload Images",
106
+ file_types=["image"],
107
+ file_count="multiple"
108
+ )
109
+
110
+ # Define choices as a list of tuples: (Display Name, Internal Value)
111
+ model_choices = [
112
+ # Gemini
113
+ ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
+ # GPT-4.1 Series
115
+ ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
+ ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
+ ("GPT-4.1", "gpt-4.1"),
118
+ ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
+ # Other Models
120
+ ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
+ ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
+ # Experimental Models
123
+ ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
+ ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
+ ]
126
+
127
+ # Find the internal value of the default choice
128
+ default_model_internal_value = "google/gemini-2.0-flash-001"
129
+
130
+ # Add model selection dropdown
131
+ model_choice = gr.Dropdown(
132
+ choices=model_choices,
133
+ label="Select Model",
134
+ value=default_model_internal_value, # Use the internal value for the default
135
+ # info="Choose the language model to use." # Optional: Add extra info tooltip
136
+ visible=True
137
+ )
138
+
139
+
140
+ # Add response length selection
141
+ length_choice = gr.Radio(
142
+ choices=["short", "medium", "long"],
143
+ label="Response Length",
144
+ value="medium",
145
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
+ )
147
+
148
+ # Preview gallery for uploaded images
149
+ gr.Markdown("### Uploaded Images")
150
+ input_gallery = gr.Gallery(
151
+ label="",
152
+ columns=3,
153
+ height=150,
154
+ object_fit="contain"
155
+ )
156
+
157
+ # Analysis button
158
+ analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
159
+
160
+ # Hidden state component to store image info
161
+ image_state = gr.State([])
162
+ filename_state = gr.State([])
163
+
164
+ # CSV download component
165
+ csv_download = gr.File(label="CSV Results")
166
+
167
+ # Right column: Display area
168
+ with gr.Column(scale=2):
169
+ with gr.Column(elem_classes="image-container"):
170
+ current_image = gr.Image(
171
+ label="Current Image",
172
+ height=600, # Set the maximum desired height
173
+ type="filepath",
174
+ show_fullscreen_button=True,
175
+ show_download_button=False,
176
+ show_share_button=False
177
+ )
178
+
179
+ # Navigation row
180
+ with gr.Row():
181
+ prev_button = gr.Button("← Previous", size="sm")
182
+ image_counter = gr.Markdown("", elem_id="image-counter")
183
+ next_button = gr.Button("Next →", size="sm")
184
+
185
+ # Alt-text heading and output
186
+ gr.Markdown("### Generated Alt-text")
187
+
188
+ # Alt-text
189
+ analysis_text = gr.Textbox(
190
+ label="",
191
+ value="Please analyze images to see results",
192
+ lines=6,
193
+ max_lines=10,
194
+ interactive=False,
195
+ show_label=False
196
+ )
197
+
198
+ # Hidden state for gallery navigation
199
+ current_index = gr.State(0)
200
+ all_images = gr.State([])
201
+ all_results = gr.State([])
202
+
203
+ # Handle file uploads - store files for use during analysis
204
+ def handle_upload(files):
205
+ file_paths = []
206
+ file_names = []
207
+ for file in files:
208
+ file_paths.append(file.name)
209
+ # Extract filename without path or extension for later use
210
+ file_names.append(get_base_filename(file.name))
211
+ return file_paths, file_paths, file_names
212
+
213
+ upload_button.upload(
214
+ fn=handle_upload,
215
+ inputs=[upload_button],
216
+ outputs=[input_gallery, image_state, filename_state]
217
+ )
218
+
219
+ # Function to analyze images
220
+ # Modify the analyze_images function in your code:
221
+
222
+ def analyze_images(image_paths, model_choice, length_choice, filenames):
223
+ if not image_paths:
224
+ return [], [], 0, "", "No images", "", ""
225
+
226
+ # Get system prompt based on length selection
227
+ sys_prompt = get_sys_prompt(length_choice)
228
+
229
+ image_results = []
230
+
231
+ for i, image_path in enumerate(image_paths):
232
+ # Use original filename as image_id if available
233
+ if i < len(filenames) and filenames[i]:
234
+ image_id = filenames[i]
235
+ else:
236
+ image_id = f"Image {i+1}"
237
+
238
+ try:
239
+ # Open the image file for analysis
240
+ img = Image.open(image_path)
241
+ prompt0 = prompt_new() # Using the new prompt function
242
+
243
+ # Extract the actual model name (remove any labels like "(default)")
244
+ if " (" in model_choice:
245
+ model_name = model_choice.split(" (")[0]
246
+ else:
247
+ model_name = model_choice
248
+
249
+ # Check if this is one of the Gemini models that needs special handling
250
+ is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
251
+
252
+ if is_gemini_model:
253
+ try:
254
+ # First try using the dedicated gemini client
255
+ result = gemini.generate_caption(
256
+ img,
257
+ model=model_name,
258
+ max_image_size=512,
259
+ prompt=prompt0,
260
+ prompt_dev=sys_prompt,
261
+ temperature=1
262
+ )
263
+ except Exception as gemini_error:
264
+ # If gemini client fails, fall back to standard OR client
265
+ result = OR.generate_caption(
266
+ img,
267
+ model=model_name,
268
+ max_image_size=512,
269
+ prompt=prompt0,
270
+ prompt_dev=sys_prompt,
271
+ temperature=1
272
+ )
273
+ else:
274
+ # For all other models, use OR client directly
275
+ result = OR.generate_caption(
276
+ img,
277
+ model=model_name,
278
+ max_image_size=512,
279
+ prompt=prompt0,
280
+ prompt_dev=sys_prompt,
281
+ temperature=1
282
+ )
283
+
284
+ # Add to results
285
+ image_results.append({
286
+ "image_id": image_id,
287
+ "content": result
288
+ })
289
+
290
+ except Exception as e:
291
+ error_message = f"Error: {str(e)}"
292
+ image_results.append({
293
+ "image_id": image_id,
294
+ "content": error_message
295
+ })
296
+
297
+ # Create a CSV file for download
298
+ csv_path = create_csv_file_simple(image_results)
299
+
300
+ # Set up initial display with first image
301
+ if len(image_paths) > 0:
302
+ initial_image = image_paths[0]
303
+ initial_counter = f"{1} of {len(image_paths)}"
304
+ initial_text = image_results[0]["content"]
305
+ else:
306
+ initial_image = ""
307
+ initial_text = "No images analyzed"
308
+ initial_counter = "0 of 0"
309
+
310
+ return (image_paths, image_results, 0, initial_image, initial_counter,
311
+ initial_text, csv_path)
312
+
313
+
314
+ # Function to navigate to previous image
315
+ def go_to_prev(current_idx, images, results):
316
+ if not images or len(images) == 0:
317
+ return current_idx, "", "0 of 0", ""
318
+
319
+ new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
320
+ counter_html = f"{new_idx + 1} of {len(images)}"
321
+
322
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
323
+
324
+ # Function to navigate to next image
325
+ def go_to_next(current_idx, images, results):
326
+ if not images or len(images) == 0:
327
+ return current_idx, "", "0 of 0", ""
328
+
329
+ new_idx = (current_idx + 1) % len(images)
330
+ counter_html = f"{new_idx + 1} of {len(images)}"
331
+
332
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
333
+
334
+ # Connect the analyze button
335
+ analyze_button.click(
336
+ fn=analyze_images,
337
+ inputs=[image_state, model_choice, length_choice, filename_state],
338
+ outputs=[
339
+ all_images, all_results, current_index, current_image, image_counter,
340
+ analysis_text, csv_download
341
+ ]
342
+ )
343
+
344
+ # Connect navigation buttons
345
+ prev_button.click(
346
+ fn=go_to_prev,
347
+ inputs=[current_index, all_images, all_results],
348
+ outputs=[current_index, current_image, image_counter, analysis_text]
349
+ )
350
+
351
+ next_button.click(
352
+ fn=go_to_next,
353
+ inputs=[current_index, all_images, all_results],
354
+ outputs=[current_index, current_image, image_counter, analysis_text]
355
+ )
356
+
357
+ # Optional: Add additional information
358
+ with gr.Accordion("About", open=False):
359
+ gr.Markdown("""
360
+ ## About this demo
361
+
362
+ This demo generates alternative text for images.
363
+
364
+ - Upload one or more images using the upload button
365
+ - Choose a model and response length for generation
366
+ - Navigate through the images with the Previous and Next buttons
367
+ - Download CSV with all results
368
+
369
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
370
+
371
+ If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
372
+ chris.addis@nhm.ac.uk
373
+ """)
374
+
375
+ return demo
376
+
377
+ # Launch the app
378
+ if __name__ == "__main__":
379
+ app = create_demo()
380
+ app.launch()
app.py CHANGED
@@ -13,7 +13,6 @@ import tempfile
13
  import datetime
14
 
15
  # Load environment variables from .env file if it exists (for local development)
16
- # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
  if os.path.exists(".env"):
18
  load_dotenv()
19
 
@@ -45,8 +44,6 @@ def get_sys_prompt(length="medium"):
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
48
- # This function is no longer needed since we removed A/B testing
49
-
50
  def create_csv_file_simple(results):
51
  """Create a CSV file from the results and return the path"""
52
  # Create a temporary file
@@ -75,138 +72,300 @@ def get_base_filename(filepath):
75
  filename = os.path.splitext(basename)[0]
76
  return filename
77
 
78
- # Define the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def create_demo():
80
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
- # Replace the existing logo code section:
82
- with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  with gr.Column(scale=3):
84
- gr.Markdown("# AI Alt-text Generator")
85
- gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
- gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
- with gr.Column(scale=1):
88
  with gr.Row():
89
- # Use gr.Image with all interactive features disabled
90
- gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
  interactive=False, show_download_button=False,
92
- show_share_button=False, show_fullscreen_button=False,
93
- container=False)
94
- gr.Image("images/nml_logo.png", show_label=False, height=120,
95
  interactive=False, show_download_button=False,
96
- show_share_button=False, show_fullscreen_button=False,
97
- container=False)
98
-
99
 
 
100
  with gr.Row():
101
- # Left column: Controls and uploads
102
  with gr.Column(scale=1):
103
- # Upload interface
104
- upload_button = gr.UploadButton(
105
- "Click to Upload Images",
106
- file_types=["image"],
107
- file_count="multiple"
108
- )
109
-
110
- # Define choices as a list of tuples: (Display Name, Internal Value)
111
- model_choices = [
112
- # Gemini
113
- ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
- # GPT-4.1 Series
115
- ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
- ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
- ("GPT-4.1", "gpt-4.1"),
118
- ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
- # Other Models
120
- ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
- ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
- # Experimental Models
123
- ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
- ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
- ]
126
-
127
- # Find the internal value of the default choice
128
- default_model_internal_value = "google/gemini-2.0-flash-001"
129
-
130
- # Add model selection dropdown
131
- model_choice = gr.Dropdown(
132
- choices=model_choices,
133
- label="Select Model",
134
- value=default_model_internal_value, # Use the internal value for the default
135
- # info="Choose the language model to use." # Optional: Add extra info tooltip
136
- visible=True
137
- )
138
-
139
-
140
- # Add response length selection
141
- length_choice = gr.Radio(
142
- choices=["short", "medium", "long"],
143
- label="Response Length",
144
- value="medium",
145
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
- )
147
-
148
- # Preview gallery for uploaded images
149
- gr.Markdown("### Uploaded Images")
150
- input_gallery = gr.Gallery(
151
- label="",
152
- columns=3,
153
- height=150,
154
- object_fit="contain"
155
- )
156
-
157
- # Analysis button
158
- analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
159
 
160
- # Hidden state component to store image info
161
- image_state = gr.State([])
162
- filename_state = gr.State([])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- # CSV download component
165
- csv_download = gr.File(label="CSV Results")
 
 
 
 
 
166
 
167
- # Right column: Display area
168
  with gr.Column(scale=2):
169
- with gr.Column(elem_classes="image-container"):
 
170
  current_image = gr.Image(
171
- label="Current Image",
172
- height=600, # Set the maximum desired height
173
- type="filepath",
174
- show_fullscreen_button=True,
175
- show_download_button=False,
176
- show_share_button=False
177
  )
 
 
 
 
 
 
178
 
179
- # Navigation row
180
- with gr.Row():
181
- prev_button = gr.Button(" Previous", size="sm")
182
- image_counter = gr.Markdown("", elem_id="image-counter")
183
- next_button = gr.Button("Next →", size="sm")
184
-
185
- # Alt-text heading and output
186
- gr.Markdown("### Generated Alt-text")
187
-
188
- # Alt-text
189
- analysis_text = gr.Textbox(
190
- label="",
191
- value="Please analyze images to see results",
192
- lines=6,
193
- max_lines=10,
194
- interactive=False,
195
- show_label=False
196
- )
197
-
198
- # Hidden state for gallery navigation
199
- current_index = gr.State(0)
200
- all_images = gr.State([])
201
- all_results = gr.State([])
 
 
 
 
 
 
 
 
 
202
 
203
- # Handle file uploads - store files for use during analysis
204
  def handle_upload(files):
205
  file_paths = []
206
  file_names = []
207
  for file in files:
208
  file_paths.append(file.name)
209
- # Extract filename without path or extension for later use
210
  file_names.append(get_base_filename(file.name))
211
  return file_paths, file_paths, file_names
212
 
@@ -216,12 +375,10 @@ def create_demo():
216
  outputs=[input_gallery, image_state, filename_state]
217
  )
218
 
219
- # Function to analyze images
220
- # Modify the analyze_images function in your code:
221
-
222
  def analyze_images(image_paths, model_choice, length_choice, filenames):
223
  if not image_paths:
224
- return [], [], 0, "", "No images", "", ""
225
 
226
  # Get system prompt based on length selection
227
  sys_prompt = get_sys_prompt(length_choice)
@@ -303,34 +460,39 @@ def create_demo():
303
  initial_image = image_paths[0]
304
  initial_counter = f"{1} of {len(image_paths)}"
305
  initial_text = image_results[0]["content"]
 
306
  else:
307
  initial_image = ""
308
  initial_text = "No images analyzed"
309
  initial_counter = "0 of 0"
 
310
 
311
  return (image_paths, image_results, 0, initial_image, initial_counter,
312
- initial_text, csv_path)
313
-
314
 
315
  # Function to navigate to previous image
316
  def go_to_prev(current_idx, images, results):
317
  if not images or len(images) == 0:
318
- return current_idx, "", "0 of 0", ""
319
 
320
  new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
321
  counter_html = f"{new_idx + 1} of {len(images)}"
 
 
322
 
323
- return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
324
 
325
  # Function to navigate to next image
326
  def go_to_next(current_idx, images, results):
327
  if not images or len(images) == 0:
328
- return current_idx, "", "0 of 0", ""
329
 
330
  new_idx = (current_idx + 1) % len(images)
331
  counter_html = f"{new_idx + 1} of {len(images)}"
 
 
332
 
333
- return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
334
 
335
  # Connect the analyze button
336
  analyze_button.click(
@@ -338,7 +500,7 @@ def create_demo():
338
  inputs=[image_state, model_choice, length_choice, filename_state],
339
  outputs=[
340
  all_images, all_results, current_index, current_image, image_counter,
341
- analysis_text, csv_download
342
  ]
343
  )
344
 
@@ -346,31 +508,42 @@ def create_demo():
346
  prev_button.click(
347
  fn=go_to_prev,
348
  inputs=[current_index, all_images, all_results],
349
- outputs=[current_index, current_image, image_counter, analysis_text]
350
  )
351
 
352
  next_button.click(
353
  fn=go_to_next,
354
  inputs=[current_index, all_images, all_results],
355
- outputs=[current_index, current_image, image_counter, analysis_text]
356
  )
357
 
358
- # Optional: Add additional information
359
- with gr.Accordion("About", open=False):
360
  gr.Markdown("""
361
- ## About this demo
 
 
 
 
 
 
 
 
 
 
 
362
 
363
- This demo generates alternative text for images.
 
 
 
 
364
 
365
- - Upload one or more images using the upload button
366
- - Choose a model and response length for generation
367
- - Navigate through the images with the Previous and Next buttons
368
- - Download CSV with all results
369
 
370
- Developed by the Natural History Museum in Partnership with National Museums Liverpool.
371
 
372
- If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
373
- chris.addis@nhm.ac.uk
374
  """)
375
 
376
  return demo
 
13
  import datetime
14
 
15
  # Load environment variables from .env file if it exists (for local development)
 
16
  if os.path.exists(".env"):
17
  load_dotenv()
18
 
 
44
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
45
  return dev_prompt
46
 
 
 
47
  def create_csv_file_simple(results):
48
  """Create a CSV file from the results and return the path"""
49
  # Create a temporary file
 
72
  filename = os.path.splitext(basename)[0]
73
  return filename
74
 
75
+ # Define custom CSS for the new design
76
+ custom_css = """
77
+ :root {
78
+ --primary-color: #1e2a78;
79
+ --secondary-color: #33a1fd;
80
+ --accent-color: #f5f5f5;
81
+ --text-color: #333;
82
+ --background-color: #fff;
83
+ --card-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
84
+ --border-radius: 8px;
85
+ }
86
+
87
+ .container {
88
+ max-width: 1200px;
89
+ margin: 0 auto;
90
+ padding: 20px;
91
+ }
92
+
93
+ .app-header {
94
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
95
+ color: white;
96
+ padding: 20px;
97
+ border-radius: var(--border-radius);
98
+ margin-bottom: 20px;
99
+ }
100
+
101
+ .app-title {
102
+ font-size: 28px;
103
+ font-weight: bold;
104
+ margin: 0;
105
+ }
106
+
107
+ .app-subtitle {
108
+ font-size: 16px;
109
+ margin-top: 5px;
110
+ opacity: 0.9;
111
+ }
112
+
113
+ .card {
114
+ background-color: var(--background-color);
115
+ border-radius: var(--border-radius);
116
+ box-shadow: var(--card-shadow);
117
+ padding: 20px;
118
+ margin-bottom: 20px;
119
+ }
120
+
121
+ .control-panel {
122
+ background-color: #f8f9fa;
123
+ border-radius: var(--border-radius);
124
+ padding: 15px;
125
+ }
126
+
127
+ .museum-logos {
128
+ display: flex;
129
+ align-items: center;
130
+ justify-content: flex-end;
131
+ }
132
+
133
+ .upload-area {
134
+ border: 2px dashed var(--secondary-color);
135
+ border-radius: var(--border-radius);
136
+ padding: 30px;
137
+ text-align: center;
138
+ transition: all 0.3s;
139
+ }
140
+
141
+ .upload-area:hover {
142
+ background-color: rgba(51, 161, 253, 0.1);
143
+ }
144
+
145
+ .primary-btn {
146
+ background-color: var(--primary-color);
147
+ color: white;
148
+ border: none;
149
+ padding: 10px 20px;
150
+ border-radius: 20px;
151
+ font-weight: bold;
152
+ transition: all 0.3s;
153
+ }
154
+
155
+ .primary-btn:hover {
156
+ background-color: var(--secondary-color);
157
+ transform: translateY(-2px);
158
+ }
159
+
160
+ .result-card {
161
+ border-left: 4px solid var(--secondary-color);
162
+ }
163
+
164
+ .nav-buttons {
165
+ display: flex;
166
+ justify-content: space-between;
167
+ margin: 15px 0;
168
+ }
169
+
170
+ .image-preview {
171
+ border-radius: var(--border-radius);
172
+ overflow: hidden;
173
+ box-shadow: var(--card-shadow);
174
+ }
175
+
176
+ .footer {
177
+ text-align: center;
178
+ padding: 20px;
179
+ color: var(--text-color);
180
+ opacity: 0.7;
181
+ font-size: 14px;
182
+ }
183
+
184
+ @media (max-width: 768px) {
185
+ .app-header {
186
+ text-align: center;
187
+ }
188
+
189
+ .museum-logos {
190
+ justify-content: center;
191
+ margin-top: 15px;
192
+ }
193
+ }
194
+ """
195
+
196
+ # Define the Gradio interface with new design
197
  def create_demo():
198
+ # Custom theme
199
+ theme = gr.themes.Base(
200
+ primary_hue="blue",
201
+ secondary_hue="indigo",
202
+ neutral_hue="gray",
203
+ radius_size=gr.themes.sizes.radius_sm,
204
+ text_size=gr.themes.sizes.text_md
205
+ ).set(
206
+ button_primary_background_fill="*primary_500",
207
+ button_primary_background_fill_hover="*primary_600",
208
+ button_secondary_background_fill="*neutral_100",
209
+ button_secondary_background_fill_hover="*neutral_200",
210
+ checkbox_background_color="*neutral_100",
211
+ checkbox_background_color_selected="*primary_500",
212
+ checkbox_border_color="*neutral_300",
213
+ checkbox_border_color_focus="*primary_500",
214
+ checkbox_border_color_hover="*neutral_400",
215
+ checkbox_label_background_fill="white",
216
+ block_title_text_color="*neutral_700",
217
+ block_title_background_fill="*neutral_50",
218
+ slider_color="*primary_500",
219
+ slider_color_dark="*primary_600"
220
+ )
221
+
222
+ with gr.Blocks(theme=theme, css=custom_css) as demo:
223
+ # Header section
224
+ with gr.Row(elem_classes="app-header"):
225
  with gr.Column(scale=3):
226
+ gr.Markdown("# AI Museum Alt-text Generator", elem_classes="app-title")
227
+ gr.Markdown("Generating accessible image descriptions for museum objects - WCAG 2.1 compliant",
228
+ elem_classes="app-subtitle")
229
+ with gr.Column(scale=1, elem_classes="museum-logos"):
230
  with gr.Row():
231
+ gr.Image("images/nhm_logo.png", show_label=False, height=80,
 
232
  interactive=False, show_download_button=False,
233
+ show_share_button=False, show_fullscreen_button=False)
234
+ gr.Image("images/nml_logo.png", show_label=False, height=80,
 
235
  interactive=False, show_download_button=False,
236
+ show_share_button=False, show_fullscreen_button=False)
 
 
237
 
238
+ # Main content area with two-column layout
239
  with gr.Row():
240
+ # Left column: Controls and upload
241
  with gr.Column(scale=1):
242
+ with gr.Box(elem_classes="card control-panel"):
243
+ gr.Markdown("### Configuration")
244
+
245
+ # Model selection dropdown with new styling
246
+ model_choices = [
247
+ ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
248
+ ("GPT-4.1 Nano", "gpt-4.1-nano"),
249
+ ("GPT-4.1 Mini", "gpt-4.1-mini"),
250
+ ("GPT-4.1", "gpt-4.1"),
251
+ ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
252
+ ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
253
+ ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
254
+ ("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
255
+ ("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
256
+ ]
257
+
258
+ default_model_value = "google/gemini-2.0-flash-001"
259
+
260
+ model_choice = gr.Dropdown(
261
+ choices=model_choices,
262
+ label="AI Model",
263
+ value=default_model_value,
264
+ info="Select the AI model to generate alt-text"
265
+ )
266
+
267
+ # Length selection with visual indicators
268
+ with gr.Row():
269
+ length_choice = gr.Radio(
270
+ choices=["short", "medium", "long"],
271
+ label="Description Length",
272
+ value="medium",
273
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
274
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ # Upload area with instruction
277
+ with gr.Box(elem_classes="card upload-area"):
278
+ gr.Markdown("### Upload Images")
279
+ gr.Markdown("Select one or more museum object images")
280
+
281
+ upload_button = gr.UploadButton(
282
+ "📁 Select Images",
283
+ file_types=["image"],
284
+ file_count="multiple",
285
+ elem_classes="primary-btn"
286
+ )
287
+
288
+ # Preview gallery for uploaded images
289
+ input_gallery = gr.Gallery(
290
+ label="Uploaded Images",
291
+ columns=3,
292
+ height=200,
293
+ object_fit="contain",
294
+ elem_id="upload-preview"
295
+ )
296
+
297
+ # Analyze button
298
+ analyze_button = gr.Button("🔍 Generate Alt-text",
299
+ variant="primary",
300
+ size="lg",
301
+ elem_classes="primary-btn")
302
 
303
+ # CSV download section
304
+ with gr.Box(elem_classes="card"):
305
+ gr.Markdown("### Export Results")
306
+ csv_download = gr.File(
307
+ label="Download CSV with all descriptions",
308
+ elem_id="csv-download"
309
+ )
310
 
311
+ # Right column: Results display
312
  with gr.Column(scale=2):
313
+ with gr.Box(elem_classes="card image-preview"):
314
+ # Current image display
315
  current_image = gr.Image(
316
+ label="Current Image",
317
+ height=450,
318
+ type="filepath",
319
+ show_fullscreen_button=True,
320
+ show_download_button=False,
321
+ show_share_button=False
322
  )
323
+
324
+ # Navigation controls
325
+ with gr.Row(elem_classes="nav-buttons"):
326
+ prev_button = gr.Button("← Previous", size="sm")
327
+ image_counter = gr.Markdown("0 of 0", elem_id="image-counter")
328
+ next_button = gr.Button("Next →", size="sm")
329
 
330
+ # Results display with card styling
331
+ with gr.Box(elem_classes="card result-card"):
332
+ gr.Markdown("### Generated Alt-text")
333
+
334
+ analysis_text = gr.Textbox(
335
+ label="",
336
+ value="Please upload and analyze images to see results",
337
+ lines=8,
338
+ max_lines=12,
339
+ interactive=False,
340
+ show_label=False
341
+ )
342
+
343
+ # Character count display
344
+ char_count = gr.Markdown("*Character count: 0*", elem_id="char-count")
345
+
346
+ # Hidden state components
347
+ image_state = gr.State([])
348
+ filename_state = gr.State([])
349
+ current_index = gr.State(0)
350
+ all_images = gr.State([])
351
+ all_results = gr.State([])
352
+
353
+ # Footer with attribution
354
+ gr.Markdown(
355
+ """
356
+ ---
357
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
358
+ Funded by the DCMS Pilot Scheme | Contact: chris.addis@nhm.ac.uk
359
+ """,
360
+ elem_classes="footer"
361
+ )
362
 
363
+ # Handle file uploads
364
  def handle_upload(files):
365
  file_paths = []
366
  file_names = []
367
  for file in files:
368
  file_paths.append(file.name)
 
369
  file_names.append(get_base_filename(file.name))
370
  return file_paths, file_paths, file_names
371
 
 
375
  outputs=[input_gallery, image_state, filename_state]
376
  )
377
 
378
+ # Function to analyze images with failover mechanism for Gemini
 
 
379
  def analyze_images(image_paths, model_choice, length_choice, filenames):
380
  if not image_paths:
381
+ return [], [], 0, "", "No images", "", "", "*Character count: 0*"
382
 
383
  # Get system prompt based on length selection
384
  sys_prompt = get_sys_prompt(length_choice)
 
460
  initial_image = image_paths[0]
461
  initial_counter = f"{1} of {len(image_paths)}"
462
  initial_text = image_results[0]["content"]
463
+ char_count_text = f"*Character count: {len(initial_text)}*"
464
  else:
465
  initial_image = ""
466
  initial_text = "No images analyzed"
467
  initial_counter = "0 of 0"
468
+ char_count_text = "*Character count: 0*"
469
 
470
  return (image_paths, image_results, 0, initial_image, initial_counter,
471
+ initial_text, csv_path, char_count_text)
 
472
 
473
  # Function to navigate to previous image
474
  def go_to_prev(current_idx, images, results):
475
  if not images or len(images) == 0:
476
+ return current_idx, "", "0 of 0", "", "*Character count: 0*"
477
 
478
  new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
479
  counter_html = f"{new_idx + 1} of {len(images)}"
480
+ result_text = results[new_idx]["content"]
481
+ char_count_text = f"*Character count: {len(result_text)}*"
482
 
483
+ return (new_idx, images[new_idx], counter_html, result_text, char_count_text)
484
 
485
  # Function to navigate to next image
486
  def go_to_next(current_idx, images, results):
487
  if not images or len(images) == 0:
488
+ return current_idx, "", "0 of 0", "", "*Character count: 0*"
489
 
490
  new_idx = (current_idx + 1) % len(images)
491
  counter_html = f"{new_idx + 1} of {len(images)}"
492
+ result_text = results[new_idx]["content"]
493
+ char_count_text = f"*Character count: {len(result_text)}*"
494
 
495
+ return (new_idx, images[new_idx], counter_html, result_text, char_count_text)
496
 
497
  # Connect the analyze button
498
  analyze_button.click(
 
500
  inputs=[image_state, model_choice, length_choice, filename_state],
501
  outputs=[
502
  all_images, all_results, current_index, current_image, image_counter,
503
+ analysis_text, csv_download, char_count
504
  ]
505
  )
506
 
 
508
  prev_button.click(
509
  fn=go_to_prev,
510
  inputs=[current_index, all_images, all_results],
511
+ outputs=[current_index, current_image, image_counter, analysis_text, char_count]
512
  )
513
 
514
  next_button.click(
515
  fn=go_to_next,
516
  inputs=[current_index, all_images, all_results],
517
+ outputs=[current_index, current_image, image_counter, analysis_text, char_count]
518
  )
519
 
520
+ # Add collapsible info section
521
+ with gr.Accordion("About this Tool", open=False):
522
  gr.Markdown("""
523
+ ## AI Museum Alt-text Generator
524
+
525
+ This tool helps museum professionals create high-quality alternative text descriptions for museum objects, designed to meet WCAG 2.1 accessibility guidelines. The generated descriptions are crafted to provide meaningful context for visitors with visual impairments.
526
+
527
+ ### Features
528
+
529
+ - **Multiple AI Models**: Choose from various AI vision models to generate descriptions
530
+ - **Customizable Length**: Select short, medium, or long descriptions based on your needs
531
+ - **Batch Processing**: Upload multiple images at once and navigate through results
532
+ - **Export**: Download all results as CSV for easy integration with your collection management system
533
+
534
+ ### How to Use
535
 
536
+ 1. Select your preferred AI model and description length
537
+ 2. Upload one or more images using the upload button
538
+ 3. Click "Generate Alt-text" to process the images
539
+ 4. Navigate through results with Previous/Next buttons
540
+ 5. Download the CSV with all generated descriptions
541
 
542
+ ### About the Project
 
 
 
543
 
544
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool as part of the DCMS Pilot Scheme for enhancing accessibility in cultural institutions.
545
 
546
+ For feedback, suggestions, or support: chris.addis@nhm.ac.uk
 
547
  """)
548
 
549
  return demo