Chris Addis commited on
Commit
3e18454
·
1 Parent(s): 46d47e4
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -1,27 +1,380 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def process_image(image):
4
- # Simply return the image as is for display
5
- return image
 
6
 
7
- # Create the Gradio interface
8
- with gr.Blocks() as demo:
9
- gr.Markdown("# Image Uploader and Viewer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- with gr.Row():
12
- with gr.Column():
13
- input_image = gr.Image(type="pil", label="Upload an image")
14
- upload_button = gr.Button("Display Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- with gr.Column():
17
- output_image = gr.Image(label="Displayed Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- upload_button.click(
20
- fn=process_image,
21
- inputs=input_image,
22
- outputs=output_image
23
- )
24
 
25
  # Launch the app
26
  if __name__ == "__main__":
27
- demo.launch()
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import io
5
+ import os
6
+ import requests
7
+ import json
8
+ from dotenv import load_dotenv
9
+ import openai
10
+ import base64
11
+ import csv
12
+ import tempfile
13
+ import datetime
14
 
15
+ # Load environment variables from .env file if it exists (for local development)
16
+ # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
+ if os.path.exists(".env"):
18
+ load_dotenv()
19
 
20
+ from io import BytesIO
21
+ import numpy as np
22
+ import requests
23
+ from PIL import Image
24
+
25
+ # import libraries
26
+ from library.utils_model import *
27
+ from library.utils_html import *
28
+ from library.utils_prompt import *
29
+
30
+ OR = OpenRouterAPI()
31
+ gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
32
+
33
+ # Path for storing user preferences
34
+ PREFERENCES_FILE = "data/user_preferences.csv"
35
+
36
+ # Ensure directory exists
37
+ os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
+
39
+ def get_sys_prompt(length="medium"):
40
+ if length == "short":
41
+ dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
42
+ elif length == "medium":
43
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
44
+ else:
45
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
+ return dev_prompt
47
+
48
+ # This function is no longer needed since we removed A/B testing
49
+
50
+ def create_csv_file_simple(results):
51
+ """Create a CSV file from the results and return the path"""
52
+ # Create a temporary file
53
+ fd, path = tempfile.mkstemp(suffix='.csv')
54
+
55
+ with os.fdopen(fd, 'w', newline='') as f:
56
+ writer = csv.writer(f)
57
+ # Write header
58
+ writer.writerow(['image_id', 'content'])
59
+ # Write data
60
+ for result in results:
61
+ writer.writerow([
62
+ result.get('image_id', ''),
63
+ result.get('content', '')
64
+ ])
65
 
66
+ return path
67
+
68
+ # Extract original filename without path or extension
69
+ def get_base_filename(filepath):
70
+ if not filepath:
71
+ return ""
72
+ # Get the basename (filename with extension)
73
+ basename = os.path.basename(filepath)
74
+ # Remove extension
75
+ filename = os.path.splitext(basename)[0]
76
+ return filename
77
+
78
+ # Define the Gradio interface
79
+ def create_demo():
80
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
+ # Replace the existing logo code section:
82
+ with gr.Row():
83
+ with gr.Column(scale=3):
84
+ gr.Markdown("# AI Alt-text Generator")
85
+ gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
+ gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
+ with gr.Column(scale=1):
88
+ with gr.Row():
89
+ # Use gr.Image with all interactive features disabled
90
+ gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
+ interactive=False, show_download_button=False,
92
+ show_share_button=False, show_fullscreen_button=False,
93
+ container=False)
94
+ gr.Image("images/nml_logo.png", show_label=False, height=120,
95
+ interactive=False, show_download_button=False,
96
+ show_share_button=False, show_fullscreen_button=False,
97
+ container=False)
98
+
99
+
100
+ with gr.Row():
101
+ # Left column: Controls and uploads
102
+ with gr.Column(scale=1):
103
+ # Upload interface
104
+ upload_button = gr.UploadButton(
105
+ "Click to Upload Images",
106
+ file_types=["image"],
107
+ file_count="multiple"
108
+ )
109
+
110
+ # Define choices as a list of tuples: (Display Name, Internal Value)
111
+ model_choices = [
112
+ # Gemini
113
+ ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
+ # GPT-4.1 Series
115
+ ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
+ ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
+ ("GPT-4.1", "gpt-4.1"),
118
+ ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
+ # Other Models
120
+ ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
+ ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
+ # Experimental Models
123
+ ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
+ ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
+ ]
126
+
127
+ # Find the internal value of the default choice
128
+ default_model_internal_value = "google/gemini-2.0-flash-001"
129
+
130
+ # Add model selection dropdown
131
+ model_choice = gr.Dropdown(
132
+ choices=model_choices,
133
+ label="Select Model",
134
+ value=default_model_internal_value, # Use the internal value for the default
135
+ # info="Choose the language model to use." # Optional: Add extra info tooltip
136
+ visible=True
137
+ )
138
+
139
+
140
+ # Add response length selection
141
+ length_choice = gr.Radio(
142
+ choices=["short", "medium", "long"],
143
+ label="Response Length",
144
+ value="medium",
145
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
+ )
147
+
148
+ # Preview gallery for uploaded images
149
+ gr.Markdown("### Uploaded Images")
150
+ input_gallery = gr.Gallery(
151
+ label="",
152
+ columns=3,
153
+ height=150,
154
+ object_fit="contain"
155
+ )
156
+
157
+ # Analysis button
158
+ analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
159
+
160
+ # Hidden state component to store image info
161
+ image_state = gr.State([])
162
+ filename_state = gr.State([])
163
+
164
+ # CSV download component
165
+ csv_download = gr.File(label="CSV Results")
166
+
167
+ # Right column: Display area
168
+ with gr.Column(scale=2):
169
+ with gr.Column(elem_classes="image-container"):
170
+ current_image = gr.Image(
171
+ label="Current Image",
172
+ height=600, # Set the maximum desired height
173
+ type="filepath",
174
+ show_fullscreen_button=True,
175
+ show_download_button=False,
176
+ show_share_button=False
177
+ )
178
+
179
+ # Navigation row
180
+ with gr.Row():
181
+ prev_button = gr.Button("← Previous", size="sm")
182
+ image_counter = gr.Markdown("", elem_id="image-counter")
183
+ next_button = gr.Button("Next →", size="sm")
184
+
185
+ # Alt-text heading and output
186
+ gr.Markdown("### Generated Alt-text")
187
+
188
+ # Alt-text
189
+ analysis_text = gr.Textbox(
190
+ label="",
191
+ value="Please analyze images to see results",
192
+ lines=6,
193
+ max_lines=10,
194
+ interactive=False,
195
+ show_label=False
196
+ )
197
+
198
+ # Hidden state for gallery navigation
199
+ current_index = gr.State(0)
200
+ all_images = gr.State([])
201
+ all_results = gr.State([])
202
+
203
+ # Handle file uploads - store files for use during analysis
204
+ def handle_upload(files):
205
+ file_paths = []
206
+ file_names = []
207
+ for file in files:
208
+ file_paths.append(file.name)
209
+ # Extract filename without path or extension for later use
210
+ file_names.append(get_base_filename(file.name))
211
+ return file_paths, file_paths, file_names
212
+
213
+ upload_button.upload(
214
+ fn=handle_upload,
215
+ inputs=[upload_button],
216
+ outputs=[input_gallery, image_state, filename_state]
217
+ )
218
+
219
+ # Function to analyze images
220
+ # Modify the analyze_images function in your code:
221
+
222
+ def analyze_images(image_paths, model_choice, length_choice, filenames):
223
+ if not image_paths:
224
+ return [], [], 0, "", "No images", "", ""
225
+
226
+ # Get system prompt based on length selection
227
+ sys_prompt = get_sys_prompt(length_choice)
228
+
229
+ image_results = []
230
+
231
+ for i, image_path in enumerate(image_paths):
232
+ # Use original filename as image_id if available
233
+ if i < len(filenames) and filenames[i]:
234
+ image_id = filenames[i]
235
+ else:
236
+ image_id = f"Image {i+1}"
237
+
238
+ try:
239
+ # Open the image file for analysis
240
+ img = Image.open(image_path)
241
+ prompt0 = prompt_new() # Using the new prompt function
242
+
243
+ # Extract the actual model name (remove any labels like "(default)")
244
+ if " (" in model_choice:
245
+ model_name = model_choice.split(" (")[0]
246
+ else:
247
+ model_name = model_choice
248
+
249
+ # Check if this is one of the Gemini models that needs special handling
250
+ is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
251
+
252
+ if is_gemini_model:
253
+ try:
254
+ # First try using the dedicated gemini client
255
+ result = gemini.generate_caption(
256
+ img,
257
+ model=model_name,
258
+ max_image_size=512,
259
+ prompt=prompt0,
260
+ prompt_dev=sys_prompt,
261
+ temperature=1
262
+ )
263
+ except Exception as gemini_error:
264
+ # If gemini client fails, fall back to standard OR client
265
+ result = OR.generate_caption(
266
+ img,
267
+ model=model_name,
268
+ max_image_size=512,
269
+ prompt=prompt0,
270
+ prompt_dev=sys_prompt,
271
+ temperature=1
272
+ )
273
+ else:
274
+ # For all other models, use OR client directly
275
+ result = OR.generate_caption(
276
+ img,
277
+ model=model_name,
278
+ max_image_size=512,
279
+ prompt=prompt0,
280
+ prompt_dev=sys_prompt,
281
+ temperature=1
282
+ )
283
+
284
+ # Add to results
285
+ image_results.append({
286
+ "image_id": image_id,
287
+ "content": result
288
+ })
289
+
290
+ except Exception as e:
291
+ error_message = f"Error: {str(e)}"
292
+ image_results.append({
293
+ "image_id": image_id,
294
+ "content": error_message
295
+ })
296
+
297
+ # Create a CSV file for download
298
+ csv_path = create_csv_file_simple(image_results)
299
+
300
+ # Set up initial display with first image
301
+ if len(image_paths) > 0:
302
+ initial_image = image_paths[0]
303
+ initial_counter = f"{1} of {len(image_paths)}"
304
+ initial_text = image_results[0]["content"]
305
+ else:
306
+ initial_image = ""
307
+ initial_text = "No images analyzed"
308
+ initial_counter = "0 of 0"
309
+
310
+ return (image_paths, image_results, 0, initial_image, initial_counter,
311
+ initial_text, csv_path)
312
+
313
+
314
+ # Function to navigate to previous image
315
+ def go_to_prev(current_idx, images, results):
316
+ if not images or len(images) == 0:
317
+ return current_idx, "", "0 of 0", ""
318
+
319
+ new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
320
+ counter_html = f"{new_idx + 1} of {len(images)}"
321
+
322
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
323
+
324
+ # Function to navigate to next image
325
+ def go_to_next(current_idx, images, results):
326
+ if not images or len(images) == 0:
327
+ return current_idx, "", "0 of 0", ""
328
+
329
+ new_idx = (current_idx + 1) % len(images)
330
+ counter_html = f"{new_idx + 1} of {len(images)}"
331
+
332
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
333
+
334
+ # Connect the analyze button
335
+ analyze_button.click(
336
+ fn=analyze_images,
337
+ inputs=[image_state, model_choice, length_choice, filename_state],
338
+ outputs=[
339
+ all_images, all_results, current_index, current_image, image_counter,
340
+ analysis_text, csv_download
341
+ ]
342
+ )
343
+
344
+ # Connect navigation buttons
345
+ prev_button.click(
346
+ fn=go_to_prev,
347
+ inputs=[current_index, all_images, all_results],
348
+ outputs=[current_index, current_image, image_counter, analysis_text]
349
+ )
350
+
351
+ next_button.click(
352
+ fn=go_to_next,
353
+ inputs=[current_index, all_images, all_results],
354
+ outputs=[current_index, current_image, image_counter, analysis_text]
355
+ )
356
 
357
+ # Optional: Add additional information
358
+ with gr.Accordion("About", open=False):
359
+ gr.Markdown("""
360
+ ## About this demo
361
+
362
+ This demo generates alternative text for images.
363
+
364
+ - Upload one or more images using the upload button
365
+ - Choose a model and response length for generation
366
+ - Navigate through the images with the Previous and Next buttons
367
+ - Download CSV with all results
368
+
369
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
370
+
371
+ If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
372
+ chris.addis@nhm.ac.uk
373
+ """)
374
 
375
+ return demo
 
 
 
 
376
 
377
  # Launch the app
378
  if __name__ == "__main__":
379
+ app = create_demo()
380
+ app.launch()
.ipynb_checkpoints/app2-checkpoint.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def process_image(image):
4
+ # Simply return the image as is for display
5
+ return image
6
+
7
+ # Create the Gradio interface
8
+ with gr.Blocks() as demo:
9
+ gr.Markdown("# Image Uploader and Viewer")
10
+
11
+ with gr.Row():
12
+ with gr.Column():
13
+ input_image = gr.Image(type="pil", label="Upload an image")
14
+ upload_button = gr.Button("Display Image")
15
+
16
+ with gr.Column():
17
+ output_image = gr.Image(label="Displayed Image")
18
+
19
+ upload_button.click(
20
+ fn=process_image,
21
+ inputs=input_image,
22
+ outputs=output_image
23
+ )
24
+
25
+ # Launch the app
26
+ if __name__ == "__main__":
27
+ demo.launch()
app-Copy1.py DELETED
@@ -1,380 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- from PIL import Image
4
- import io
5
- import os
6
- import requests
7
- import json
8
- from dotenv import load_dotenv
9
- import openai
10
- import base64
11
- import csv
12
- import tempfile
13
- import datetime
14
-
15
- # Load environment variables from .env file if it exists (for local development)
16
- # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
- if os.path.exists(".env"):
18
- load_dotenv()
19
-
20
- from io import BytesIO
21
- import numpy as np
22
- import requests
23
- from PIL import Image
24
-
25
- # import libraries
26
- from library.utils_model import *
27
- from library.utils_html import *
28
- from library.utils_prompt import *
29
-
30
- OR = OpenRouterAPI()
31
- gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
32
-
33
- # Path for storing user preferences
34
- PREFERENCES_FILE = "data/user_preferences.csv"
35
-
36
- # Ensure directory exists
37
- os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
-
39
- def get_sys_prompt(length="medium"):
40
- if length == "short":
41
- dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
42
- elif length == "medium":
43
- dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
44
- else:
45
- dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
- return dev_prompt
47
-
48
- # This function is no longer needed since we removed A/B testing
49
-
50
- def create_csv_file_simple(results):
51
- """Create a CSV file from the results and return the path"""
52
- # Create a temporary file
53
- fd, path = tempfile.mkstemp(suffix='.csv')
54
-
55
- with os.fdopen(fd, 'w', newline='') as f:
56
- writer = csv.writer(f)
57
- # Write header
58
- writer.writerow(['image_id', 'content'])
59
- # Write data
60
- for result in results:
61
- writer.writerow([
62
- result.get('image_id', ''),
63
- result.get('content', '')
64
- ])
65
-
66
- return path
67
-
68
- # Extract original filename without path or extension
69
- def get_base_filename(filepath):
70
- if not filepath:
71
- return ""
72
- # Get the basename (filename with extension)
73
- basename = os.path.basename(filepath)
74
- # Remove extension
75
- filename = os.path.splitext(basename)[0]
76
- return filename
77
-
78
- # Define the Gradio interface
79
- def create_demo():
80
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
- # Replace the existing logo code section:
82
- with gr.Row():
83
- with gr.Column(scale=3):
84
- gr.Markdown("# AI Alt-text Generator")
85
- gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
- gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
- with gr.Column(scale=1):
88
- with gr.Row():
89
- # Use gr.Image with all interactive features disabled
90
- gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
- interactive=False, show_download_button=False,
92
- show_share_button=False, show_fullscreen_button=False,
93
- container=False)
94
- gr.Image("images/nml_logo.png", show_label=False, height=120,
95
- interactive=False, show_download_button=False,
96
- show_share_button=False, show_fullscreen_button=False,
97
- container=False)
98
-
99
-
100
- with gr.Row():
101
- # Left column: Controls and uploads
102
- with gr.Column(scale=1):
103
- # Upload interface
104
- upload_button = gr.UploadButton(
105
- "Click to Upload Images",
106
- file_types=["image"],
107
- file_count="multiple"
108
- )
109
-
110
- # Define choices as a list of tuples: (Display Name, Internal Value)
111
- model_choices = [
112
- # Gemini
113
- ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
- # GPT-4.1 Series
115
- ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
- ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
- ("GPT-4.1", "gpt-4.1"),
118
- ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
- # Other Models
120
- ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
- ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
- # Experimental Models
123
- ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
- ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
- ]
126
-
127
- # Find the internal value of the default choice
128
- default_model_internal_value = "google/gemini-2.0-flash-001"
129
-
130
- # Add model selection dropdown
131
- model_choice = gr.Dropdown(
132
- choices=model_choices,
133
- label="Select Model",
134
- value=default_model_internal_value, # Use the internal value for the default
135
- # info="Choose the language model to use." # Optional: Add extra info tooltip
136
- visible=True
137
- )
138
-
139
-
140
- # Add response length selection
141
- length_choice = gr.Radio(
142
- choices=["short", "medium", "long"],
143
- label="Response Length",
144
- value="medium",
145
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
- )
147
-
148
- # Preview gallery for uploaded images
149
- gr.Markdown("### Uploaded Images")
150
- input_gallery = gr.Gallery(
151
- label="",
152
- columns=3,
153
- height=150,
154
- object_fit="contain"
155
- )
156
-
157
- # Analysis button
158
- analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
159
-
160
- # Hidden state component to store image info
161
- image_state = gr.State([])
162
- filename_state = gr.State([])
163
-
164
- # CSV download component
165
- csv_download = gr.File(label="CSV Results")
166
-
167
- # Right column: Display area
168
- with gr.Column(scale=2):
169
- with gr.Column(elem_classes="image-container"):
170
- current_image = gr.Image(
171
- label="Current Image",
172
- height=600, # Set the maximum desired height
173
- type="filepath",
174
- show_fullscreen_button=True,
175
- show_download_button=False,
176
- show_share_button=False
177
- )
178
-
179
- # Navigation row
180
- with gr.Row():
181
- prev_button = gr.Button("← Previous", size="sm")
182
- image_counter = gr.Markdown("", elem_id="image-counter")
183
- next_button = gr.Button("Next →", size="sm")
184
-
185
- # Alt-text heading and output
186
- gr.Markdown("### Generated Alt-text")
187
-
188
- # Alt-text
189
- analysis_text = gr.Textbox(
190
- label="",
191
- value="Please analyze images to see results",
192
- lines=6,
193
- max_lines=10,
194
- interactive=False,
195
- show_label=False
196
- )
197
-
198
- # Hidden state for gallery navigation
199
- current_index = gr.State(0)
200
- all_images = gr.State([])
201
- all_results = gr.State([])
202
-
203
- # Handle file uploads - store files for use during analysis
204
- def handle_upload(files):
205
- file_paths = []
206
- file_names = []
207
- for file in files:
208
- file_paths.append(file.name)
209
- # Extract filename without path or extension for later use
210
- file_names.append(get_base_filename(file.name))
211
- return file_paths, file_paths, file_names
212
-
213
- upload_button.upload(
214
- fn=handle_upload,
215
- inputs=[upload_button],
216
- outputs=[input_gallery, image_state, filename_state]
217
- )
218
-
219
- # Function to analyze images
220
- # Modify the analyze_images function in your code:
221
-
222
- def analyze_images(image_paths, model_choice, length_choice, filenames):
223
- if not image_paths:
224
- return [], [], 0, "", "No images", "", ""
225
-
226
- # Get system prompt based on length selection
227
- sys_prompt = get_sys_prompt(length_choice)
228
-
229
- image_results = []
230
-
231
- for i, image_path in enumerate(image_paths):
232
- # Use original filename as image_id if available
233
- if i < len(filenames) and filenames[i]:
234
- image_id = filenames[i]
235
- else:
236
- image_id = f"Image {i+1}"
237
-
238
- try:
239
- # Open the image file for analysis
240
- img = Image.open(image_path)
241
- prompt0 = prompt_new() # Using the new prompt function
242
-
243
- # Extract the actual model name (remove any labels like "(default)")
244
- if " (" in model_choice:
245
- model_name = model_choice.split(" (")[0]
246
- else:
247
- model_name = model_choice
248
-
249
- # Check if this is one of the Gemini models that needs special handling
250
- is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
251
-
252
- if is_gemini_model:
253
- try:
254
- # First try using the dedicated gemini client
255
- result = gemini.generate_caption(
256
- img,
257
- model=model_name,
258
- max_image_size=512,
259
- prompt=prompt0,
260
- prompt_dev=sys_prompt,
261
- temperature=1
262
- )
263
- except Exception as gemini_error:
264
- # If gemini client fails, fall back to standard OR client
265
- result = OR.generate_caption(
266
- img,
267
- model=model_name,
268
- max_image_size=512,
269
- prompt=prompt0,
270
- prompt_dev=sys_prompt,
271
- temperature=1
272
- )
273
- else:
274
- # For all other models, use OR client directly
275
- result = OR.generate_caption(
276
- img,
277
- model=model_name,
278
- max_image_size=512,
279
- prompt=prompt0,
280
- prompt_dev=sys_prompt,
281
- temperature=1
282
- )
283
-
284
- # Add to results
285
- image_results.append({
286
- "image_id": image_id,
287
- "content": result
288
- })
289
-
290
- except Exception as e:
291
- error_message = f"Error: {str(e)}"
292
- image_results.append({
293
- "image_id": image_id,
294
- "content": error_message
295
- })
296
-
297
- # Create a CSV file for download
298
- csv_path = create_csv_file_simple(image_results)
299
-
300
- # Set up initial display with first image
301
- if len(image_paths) > 0:
302
- initial_image = image_paths[0]
303
- initial_counter = f"{1} of {len(image_paths)}"
304
- initial_text = image_results[0]["content"]
305
- else:
306
- initial_image = ""
307
- initial_text = "No images analyzed"
308
- initial_counter = "0 of 0"
309
-
310
- return (image_paths, image_results, 0, initial_image, initial_counter,
311
- initial_text, csv_path)
312
-
313
-
314
- # Function to navigate to previous image
315
- def go_to_prev(current_idx, images, results):
316
- if not images or len(images) == 0:
317
- return current_idx, "", "0 of 0", ""
318
-
319
- new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
320
- counter_html = f"{new_idx + 1} of {len(images)}"
321
-
322
- return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
323
-
324
- # Function to navigate to next image
325
- def go_to_next(current_idx, images, results):
326
- if not images or len(images) == 0:
327
- return current_idx, "", "0 of 0", ""
328
-
329
- new_idx = (current_idx + 1) % len(images)
330
- counter_html = f"{new_idx + 1} of {len(images)}"
331
-
332
- return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
333
-
334
- # Connect the analyze button
335
- analyze_button.click(
336
- fn=analyze_images,
337
- inputs=[image_state, model_choice, length_choice, filename_state],
338
- outputs=[
339
- all_images, all_results, current_index, current_image, image_counter,
340
- analysis_text, csv_download
341
- ]
342
- )
343
-
344
- # Connect navigation buttons
345
- prev_button.click(
346
- fn=go_to_prev,
347
- inputs=[current_index, all_images, all_results],
348
- outputs=[current_index, current_image, image_counter, analysis_text]
349
- )
350
-
351
- next_button.click(
352
- fn=go_to_next,
353
- inputs=[current_index, all_images, all_results],
354
- outputs=[current_index, current_image, image_counter, analysis_text]
355
- )
356
-
357
- # Optional: Add additional information
358
- with gr.Accordion("About", open=False):
359
- gr.Markdown("""
360
- ## About this demo
361
-
362
- This demo generates alternative text for images.
363
-
364
- - Upload one or more images using the upload button
365
- - Choose a model and response length for generation
366
- - Navigate through the images with the Previous and Next buttons
367
- - Download CSV with all results
368
-
369
- Developed by the Natural History Museum in Partnership with National Museums Liverpool.
370
-
371
- If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
372
- chris.addis@nhm.ac.uk
373
- """)
374
-
375
- return demo
376
-
377
- # Launch the app
378
- if __name__ == "__main__":
379
- app = create_demo()
380
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -45,6 +45,8 @@ def get_sys_prompt(length="medium"):
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
 
 
48
  def create_csv_file_simple(results):
49
  """Create a CSV file from the results and return the path"""
50
  # Create a temporary file
@@ -73,264 +75,153 @@ def get_base_filename(filepath):
73
  filename = os.path.splitext(basename)[0]
74
  return filename
75
 
76
- # Define custom CSS for the application
77
- custom_css = """
78
- .container {
79
- max-width: 1200px;
80
- margin: 0 auto;
81
- }
82
- .header {
83
- text-align: center;
84
- margin-bottom: 20px;
85
- border-bottom: 2px solid #eee;
86
- padding-bottom: 15px;
87
- }
88
- .model-card {
89
- border: 1px solid #e0e0e0;
90
- border-radius: 8px;
91
- padding: 15px;
92
- background-color: #f9f9f9;
93
- margin-bottom: 15px;
94
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
95
- }
96
- .upload-box {
97
- border: 2px dashed #ccc;
98
- border-radius: 8px;
99
- padding: 20px;
100
- text-align: center;
101
- margin-bottom: 15px;
102
- background-color: #f7f7f7;
103
- transition: all 0.3s ease;
104
- }
105
- .upload-box:hover {
106
- border-color: #2196F3;
107
- background-color: #f0f8ff;
108
- }
109
- .gallery-container {
110
- background-color: #f5f5f5;
111
- border-radius: 8px;
112
- padding: 10px;
113
- margin-bottom: 15px;
114
- }
115
- .result-container {
116
- border: 1px solid #e0e0e0;
117
- border-radius: 8px;
118
- padding: 15px;
119
- margin-top: 20px;
120
- background-color: white;
121
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
122
- }
123
- .nav-buttons {
124
- display: flex;
125
- justify-content: space-between;
126
- align-items: center;
127
- margin: 10px 0;
128
- }
129
- .footer {
130
- text-align: center;
131
- margin-top: 30px;
132
- padding-top: 15px;
133
- border-top: 1px solid #eee;
134
- color: #666;
135
- font-size: 0.9em;
136
- }
137
- .logo-container {
138
- display: flex;
139
- justify-content: center;
140
- align-items: center;
141
- gap: 20px;
142
- margin-bottom: 10px;
143
- }
144
- .length-selector {
145
- display: flex;
146
- gap: 10px;
147
- margin-bottom: 15px;
148
- }
149
- .progress-indicator {
150
- height: 4px;
151
- background-color: #f0f0f0;
152
- border-radius: 2px;
153
- overflow: hidden;
154
- margin-bottom: 15px;
155
- }
156
- .progress-bar {
157
- height: 100%;
158
- background-color: #4CAF50;
159
- width: 0%;
160
- transition: width 0.3s ease;
161
- }
162
- """
163
-
164
- # Define the Gradio interface with the new design
165
  def create_demo():
166
- # Use the Monochrome theme with custom CSS for better compatibility
167
- theme = gr.themes.Monochrome()
168
-
169
- with gr.Blocks(theme=theme, css=custom_css) as demo:
170
- # Header section
171
- with gr.Row(elem_classes="header"):
172
  with gr.Column(scale=3):
173
  gr.Markdown("# AI Alt-text Generator")
174
- gr.Markdown("Upload images to generate accessible alternative text that meets WCAG 2.1 Guidelines")
175
-
176
- with gr.Column(scale=1, elem_classes="logo-container"):
177
- gr.Image("images/nhm_logo.png", show_label=False, height=80,
178
- interactive=False, show_download_button=False,
179
- show_share_button=False, show_fullscreen_button=False)
180
- gr.Image("images/nml_logo.png", show_label=False, height=80,
181
- interactive=False, show_download_button=False,
182
- show_share_button=False, show_fullscreen_button=False)
 
 
 
 
183
 
184
- # Main content area
185
  with gr.Row():
186
- # Left panel - Controls
187
- with gr.Column(scale=1, elem_classes="control-panel"):
188
- # Upload area with styling
189
- with gr.Column(elem_classes="upload-box"):
190
- upload_button = gr.UploadButton(
191
- "📷 Upload Images",
192
- file_types=["image"],
193
- file_count="multiple",
194
- size="lg"
195
- )
196
- gr.Markdown("*Drag and drop or click to upload multiple images*")
197
 
198
- # Options card
199
- with gr.Column(elem_classes="model-card"):
200
- gr.Markdown("### Model Settings")
201
-
202
- # Model selection dropdown
203
- model_choices = [
204
- # Gemini
205
- ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
206
- # GPT-4.1 Series
207
- ("GPT-4.1 Nano", "gpt-4.1-nano"),
208
- ("GPT-4.1 Mini", "gpt-4.1-mini"),
209
- ("GPT-4.1", "gpt-4.1"),
210
- ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
211
- # Other Models
212
- ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
213
- ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
214
- # Experimental Models
215
- ("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
216
- ("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
217
- ]
218
-
219
- default_model_internal_value = "google/gemini-2.0-flash-001"
220
-
221
- model_choice = gr.Dropdown(
222
- choices=model_choices,
223
- label="AI Model",
224
- value=default_model_internal_value,
225
- info="Select the AI model for generating descriptions",
226
- visible=True
227
- )
228
-
229
- # Length selector with visual indicators
230
- gr.Markdown("### Response Length")
231
- with gr.Row(elem_classes="length-selector"):
232
- length_choice = gr.Radio(
233
- choices=["short", "medium", "long"],
234
- label="Response Length",
235
- value="medium",
236
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
237
- )
238
 
239
- # Image preview gallery
240
- with gr.Column(elem_classes="gallery-container"):
241
- gr.Markdown("### Uploaded Images")
242
- input_gallery = gr.Gallery(
243
- label="",
244
- columns=3,
245
- height=180,
246
- object_fit="contain"
247
- )
248
-
249
- # Analysis button
250
- analyze_button = gr.Button("🔍 Analyze Images", variant="primary", size="lg")
251
 
252
- # CSV Download section
253
- with gr.Column(visible=False, elem_id="download-section", elem_classes="model-card") as download_section:
254
- gr.Markdown("### Download Results")
255
- csv_download = gr.File(label="CSV Results", elem_id="csv-download")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- # Hidden state components
 
 
 
258
  image_state = gr.State([])
259
  filename_state = gr.State([])
260
 
261
- # Right panel - Results display
262
- with gr.Column(scale=2, elem_classes="results-panel"):
263
- with gr.Column(elem_classes="result-container"):
264
- # Progress indicator
265
- with gr.Row(elem_id="progress-container", visible=False) as progress_container:
266
- with gr.Column():
267
- gr.HTML('<div class="progress-indicator"><div class="progress-bar" id="progress-bar"></div></div>')
268
- progress_text = gr.Markdown("Processing...", elem_id="progress-text")
269
-
270
- # Image display
271
  current_image = gr.Image(
272
- label="Image Preview",
273
- height=400,
274
- type="filepath",
275
- show_fullscreen_button=True,
276
- show_download_button=False,
277
- show_share_button=False,
278
- elem_classes="current-image"
279
- )
280
-
281
- # Navigation controls
282
- with gr.Row(elem_classes="nav-buttons"):
283
- prev_button = gr.Button("← Previous", size="sm", variant="secondary")
284
- image_counter = gr.Markdown("", elem_id="image-counter")
285
- next_button = gr.Button("Next →", size="sm", variant="secondary")
286
-
287
- # Alt-text results
288
- gr.Markdown("### Generated Alt-text", elem_id="result-heading")
289
- analysis_text = gr.Textbox(
290
- label="",
291
- value="Images will appear here after analysis. Please upload and analyze images to see results.",
292
- lines=6,
293
- max_lines=10,
294
- interactive=False,
295
- show_label=False,
296
- elem_classes="result-text"
297
  )
298
 
299
- # Hidden states for navigation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  current_index = gr.State(0)
301
  all_images = gr.State([])
302
  all_results = gr.State([])
303
 
304
- # Footer section
305
- with gr.Row(elem_classes="footer"):
306
- gr.Markdown("""
307
- Developed by the Natural History Museum in Partnership with National Museums Liverpool.
308
- Funded by the DCMS Pilot Scheme. For support, contact: chris.addis@nhm.ac.uk
309
- """)
310
-
311
- # Handle file uploads
312
  def handle_upload(files):
313
  file_paths = []
314
  file_names = []
315
  for file in files:
316
  file_paths.append(file.name)
 
317
  file_names.append(get_base_filename(file.name))
318
-
319
- # Show a message about the number of files uploaded
320
- upload_message = f"✅ {len(files)} image{'s' if len(files) != 1 else ''} uploaded successfully!"
321
-
322
- return file_paths, file_paths, file_names, upload_message
323
 
324
  upload_button.upload(
325
  fn=handle_upload,
326
  inputs=[upload_button],
327
- outputs=[input_gallery, image_state, filename_state, progress_text]
328
  )
329
 
330
- # Function to analyze images with visual feedback
 
 
331
  def analyze_images(image_paths, model_choice, length_choice, filenames):
332
  if not image_paths:
333
- return [], [], 0, "", "No images uploaded", "", gr.update(visible=False)
334
 
335
  # Get system prompt based on length selection
336
  sys_prompt = get_sys_prompt(length_choice)
@@ -347,12 +238,15 @@ def create_demo():
347
  try:
348
  # Open the image file for analysis
349
  img = Image.open(image_path)
350
- prompt0 = prompt_new()
351
 
352
- # Use model_choice directly since it's the internal value
353
- model_name = model_choice
 
 
 
354
 
355
- # Check if this is one of the Gemini models
356
  is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
357
 
358
  if is_gemini_model:
@@ -413,11 +307,9 @@ def create_demo():
413
  initial_text = "No images analyzed"
414
  initial_counter = "0 of 0"
415
 
416
- # Make the download section visible now that we have results
417
- download_visible = gr.update(visible=True)
418
-
419
  return (image_paths, image_results, 0, initial_image, initial_counter,
420
- initial_text, csv_path, download_visible)
 
421
 
422
  # Function to navigate to previous image
423
  def go_to_prev(current_idx, images, results):
@@ -439,24 +331,14 @@ def create_demo():
439
 
440
  return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
441
 
442
- # Show progress indicator during analysis
443
  analyze_button.click(
444
- fn=lambda: (gr.update(visible=True), "Processing images... Please wait"),
445
- inputs=[],
446
- outputs=[progress_container, progress_text],
447
- queue=False
448
- ).then(
449
  fn=analyze_images,
450
  inputs=[image_state, model_choice, length_choice, filename_state],
451
  outputs=[
452
  all_images, all_results, current_index, current_image, image_counter,
453
- analysis_text, csv_download, download_section
454
  ]
455
- ).then(
456
- fn=lambda: (gr.update(visible=False), "Analysis complete!"),
457
- inputs=[],
458
- outputs=[progress_container, progress_text],
459
- queue=False
460
  )
461
 
462
  # Connect navigation buttons
@@ -472,33 +354,22 @@ def create_demo():
472
  outputs=[current_index, current_image, image_counter, analysis_text]
473
  )
474
 
475
- # Additional information accordion
476
- with gr.Accordion("About this Tool", open=False):
477
  gr.Markdown("""
478
- ## About the AI Alt-text Generator
479
-
480
- This tool uses advanced AI models to automatically generate alternative text descriptions for images,
481
- helping museums and cultural institutions make their digital content more accessible for visually impaired users.
482
-
483
- ### Features:
484
-
485
- - **Multiple AI Models**: Choose from various AI models including Gemini, GPT-4.1, Claude, and others
486
- - **Customizable Length**: Select short, medium, or long descriptions based on your needs
487
- - **Batch Processing**: Upload and process multiple images at once
488
- - **CSV Export**: Download all generated descriptions in a single file
489
 
490
- ### How to Use:
491
 
492
- 1. Upload one or more images using the upload button
493
- 2. Select your preferred AI model and description length
494
- 3. Click "Analyze Images" to generate descriptions
495
- 4. Navigate through results with the Previous and Next buttons
496
- 5. Download all results as a CSV file
497
 
498
  Developed by the Natural History Museum in Partnership with National Museums Liverpool.
499
- Funded by the DCMS Pilot Scheme.
500
 
501
- For support, feedback, or suggestions, please contact: chris.addis@nhm.ac.uk
 
502
  """)
503
 
504
  return demo
 
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
48
+ # This function is no longer needed since we removed A/B testing
49
+
50
  def create_csv_file_simple(results):
51
  """Create a CSV file from the results and return the path"""
52
  # Create a temporary file
 
75
  filename = os.path.splitext(basename)[0]
76
  return filename
77
 
78
+ # Define the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def create_demo():
80
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
+ # Replace the existing logo code section:
82
+ with gr.Row():
 
 
 
83
  with gr.Column(scale=3):
84
  gr.Markdown("# AI Alt-text Generator")
85
+ gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
+ gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
+ with gr.Column(scale=1):
88
+ with gr.Row():
89
+ # Use gr.Image with all interactive features disabled
90
+ gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
+ interactive=False, show_download_button=False,
92
+ show_share_button=False, show_fullscreen_button=False,
93
+ container=False)
94
+ gr.Image("images/nml_logo.png", show_label=False, height=120,
95
+ interactive=False, show_download_button=False,
96
+ show_share_button=False, show_fullscreen_button=False,
97
+ container=False)
98
 
99
+
100
  with gr.Row():
101
+ # Left column: Controls and uploads
102
+ with gr.Column(scale=1):
103
+ # Upload interface
104
+ upload_button = gr.UploadButton(
105
+ "Click to Upload Images",
106
+ file_types=["image"],
107
+ file_count="multiple"
108
+ )
 
 
 
109
 
110
+ # Define choices as a list of tuples: (Display Name, Internal Value)
111
+ model_choices = [
112
+ # Gemini
113
+ ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
+ # GPT-4.1 Series
115
+ ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
+ ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
+ ("GPT-4.1", "gpt-4.1"),
118
+ ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
+ # Other Models
120
+ ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
+ ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
+ # Experimental Models
123
+ ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
+ ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ # Find the internal value of the default choice
128
+ default_model_internal_value = "google/gemini-2.0-flash-001"
 
 
 
 
 
 
 
 
 
 
129
 
130
+ # Add model selection dropdown
131
+ model_choice = gr.Dropdown(
132
+ choices=model_choices,
133
+ label="Select Model",
134
+ value=default_model_internal_value, # Use the internal value for the default
135
+ # info="Choose the language model to use." # Optional: Add extra info tooltip
136
+ visible=True
137
+ )
138
+
139
+
140
+ # Add response length selection
141
+ length_choice = gr.Radio(
142
+ choices=["short", "medium", "long"],
143
+ label="Response Length",
144
+ value="medium",
145
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
+ )
147
+
148
+ # Preview gallery for uploaded images
149
+ gr.Markdown("### Uploaded Images")
150
+ input_gallery = gr.Gallery(
151
+ label="",
152
+ columns=3,
153
+ height=150,
154
+ object_fit="contain"
155
+ )
156
 
157
+ # Analysis button
158
+ analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
159
+
160
+ # Hidden state component to store image info
161
  image_state = gr.State([])
162
  filename_state = gr.State([])
163
 
164
+ # CSV download component
165
+ csv_download = gr.File(label="CSV Results")
166
+
167
+ # Right column: Display area
168
+ with gr.Column(scale=2):
169
+ with gr.Column(elem_classes="image-container"):
 
 
 
 
170
  current_image = gr.Image(
171
+ label="Current Image",
172
+ height=600, # Set the maximum desired height
173
+ type="filepath",
174
+ show_fullscreen_button=True,
175
+ show_download_button=False,
176
+ show_share_button=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  )
178
 
179
+ # Navigation row
180
+ with gr.Row():
181
+ prev_button = gr.Button("← Previous", size="sm")
182
+ image_counter = gr.Markdown("", elem_id="image-counter")
183
+ next_button = gr.Button("Next →", size="sm")
184
+
185
+ # Alt-text heading and output
186
+ gr.Markdown("### Generated Alt-text")
187
+
188
+ # Alt-text
189
+ analysis_text = gr.Textbox(
190
+ label="",
191
+ value="Please analyze images to see results",
192
+ lines=6,
193
+ max_lines=10,
194
+ interactive=False,
195
+ show_label=False
196
+ )
197
+
198
+ # Hidden state for gallery navigation
199
  current_index = gr.State(0)
200
  all_images = gr.State([])
201
  all_results = gr.State([])
202
 
203
+ # Handle file uploads - store files for use during analysis
 
 
 
 
 
 
 
204
  def handle_upload(files):
205
  file_paths = []
206
  file_names = []
207
  for file in files:
208
  file_paths.append(file.name)
209
+ # Extract filename without path or extension for later use
210
  file_names.append(get_base_filename(file.name))
211
+ return file_paths, file_paths, file_names
 
 
 
 
212
 
213
  upload_button.upload(
214
  fn=handle_upload,
215
  inputs=[upload_button],
216
+ outputs=[input_gallery, image_state, filename_state]
217
  )
218
 
219
+ # Function to analyze images
220
+ # Modify the analyze_images function in your code:
221
+
222
  def analyze_images(image_paths, model_choice, length_choice, filenames):
223
  if not image_paths:
224
+ return [], [], 0, "", "No images", "", ""
225
 
226
  # Get system prompt based on length selection
227
  sys_prompt = get_sys_prompt(length_choice)
 
238
  try:
239
  # Open the image file for analysis
240
  img = Image.open(image_path)
241
+ prompt0 = prompt_new() # Using the new prompt function
242
 
243
+ # Extract the actual model name (remove any labels like "(default)")
244
+ if " (" in model_choice:
245
+ model_name = model_choice.split(" (")[0]
246
+ else:
247
+ model_name = model_choice
248
 
249
+ # Check if this is one of the Gemini models that needs special handling
250
  is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
251
 
252
  if is_gemini_model:
 
307
  initial_text = "No images analyzed"
308
  initial_counter = "0 of 0"
309
 
 
 
 
310
  return (image_paths, image_results, 0, initial_image, initial_counter,
311
+ initial_text, csv_path)
312
+
313
 
314
  # Function to navigate to previous image
315
  def go_to_prev(current_idx, images, results):
 
331
 
332
  return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
333
 
334
+ # Connect the analyze button
335
  analyze_button.click(
 
 
 
 
 
336
  fn=analyze_images,
337
  inputs=[image_state, model_choice, length_choice, filename_state],
338
  outputs=[
339
  all_images, all_results, current_index, current_image, image_counter,
340
+ analysis_text, csv_download
341
  ]
 
 
 
 
 
342
  )
343
 
344
  # Connect navigation buttons
 
354
  outputs=[current_index, current_image, image_counter, analysis_text]
355
  )
356
 
357
+ # Optional: Add additional information
358
+ with gr.Accordion("About", open=False):
359
  gr.Markdown("""
360
+ ## About this demo
 
 
 
 
 
 
 
 
 
 
361
 
362
+ This demo generates alternative text for images.
363
 
364
+ - Upload one or more images using the upload button
365
+ - Choose a model and response length for generation
366
+ - Navigate through the images with the Previous and Next buttons
367
+ - Download CSV with all results
 
368
 
369
  Developed by the Natural History Museum in Partnership with National Museums Liverpool.
 
370
 
371
+ If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
372
+ chris.addis@nhm.ac.uk
373
  """)
374
 
375
  return demo
.ipynb_checkpoints/app-Copy1-checkpoint.py → app2.py RENAMED
@@ -45,8 +45,6 @@ def get_sys_prompt(length="medium"):
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
48
- # This function is no longer needed since we removed A/B testing
49
-
50
  def create_csv_file_simple(results):
51
  """Create a CSV file from the results and return the path"""
52
  # Create a temporary file
@@ -75,153 +73,264 @@ def get_base_filename(filepath):
75
  filename = os.path.splitext(basename)[0]
76
  return filename
77
 
78
- # Define the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def create_demo():
80
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
- # Replace the existing logo code section:
82
- with gr.Row():
 
 
 
83
  with gr.Column(scale=3):
84
  gr.Markdown("# AI Alt-text Generator")
85
- gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
- gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
- with gr.Column(scale=1):
88
- with gr.Row():
89
- # Use gr.Image with all interactive features disabled
90
- gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
- interactive=False, show_download_button=False,
92
- show_share_button=False, show_fullscreen_button=False,
93
- container=False)
94
- gr.Image("images/nml_logo.png", show_label=False, height=120,
95
- interactive=False, show_download_button=False,
96
- show_share_button=False, show_fullscreen_button=False,
97
- container=False)
98
 
99
-
100
  with gr.Row():
101
- # Left column: Controls and uploads
102
- with gr.Column(scale=1):
103
- # Upload interface
104
- upload_button = gr.UploadButton(
105
- "Click to Upload Images",
106
- file_types=["image"],
107
- file_count="multiple"
108
- )
109
-
110
- # Define choices as a list of tuples: (Display Name, Internal Value)
111
- model_choices = [
112
- # Gemini
113
- ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
- # GPT-4.1 Series
115
- ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
- ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
- ("GPT-4.1", "gpt-4.1"),
118
- ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
- # Other Models
120
- ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
- ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
- # Experimental Models
123
- ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
- ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
- ]
126
-
127
- # Find the internal value of the default choice
128
- default_model_internal_value = "google/gemini-2.0-flash-001"
129
-
130
- # Add model selection dropdown
131
- model_choice = gr.Dropdown(
132
- choices=model_choices,
133
- label="Select Model",
134
- value=default_model_internal_value, # Use the internal value for the default
135
- # info="Choose the language model to use." # Optional: Add extra info tooltip
136
- visible=True
137
- )
138
-
139
 
140
- # Add response length selection
141
- length_choice = gr.Radio(
142
- choices=["short", "medium", "long"],
143
- label="Response Length",
144
- value="medium",
145
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # Preview gallery for uploaded images
149
- gr.Markdown("### Uploaded Images")
150
- input_gallery = gr.Gallery(
151
- label="",
152
- columns=3,
153
- height=150,
154
- object_fit="contain"
155
- )
 
 
 
 
156
 
157
- # Analysis button
158
- analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
 
 
159
 
160
- # Hidden state component to store image info
161
  image_state = gr.State([])
162
  filename_state = gr.State([])
163
 
164
- # CSV download component
165
- csv_download = gr.File(label="CSV Results")
166
-
167
- # Right column: Display area
168
- with gr.Column(scale=2):
169
- with gr.Column(elem_classes="image-container"):
 
 
 
 
170
  current_image = gr.Image(
171
- label="Current Image",
172
- height=600, # Set the maximum desired height
173
- type="filepath",
174
- show_fullscreen_button=True,
175
- show_download_button=False,
176
- show_share_button=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  )
178
 
179
- # Navigation row
180
- with gr.Row():
181
- prev_button = gr.Button("← Previous", size="sm")
182
- image_counter = gr.Markdown("", elem_id="image-counter")
183
- next_button = gr.Button("Next →", size="sm")
184
-
185
- # Alt-text heading and output
186
- gr.Markdown("### Generated Alt-text")
187
-
188
- # Alt-text
189
- analysis_text = gr.Textbox(
190
- label="",
191
- value="Please analyze images to see results",
192
- lines=6,
193
- max_lines=10,
194
- interactive=False,
195
- show_label=False
196
- )
197
-
198
- # Hidden state for gallery navigation
199
  current_index = gr.State(0)
200
  all_images = gr.State([])
201
  all_results = gr.State([])
202
 
203
- # Handle file uploads - store files for use during analysis
 
 
 
 
 
 
 
204
  def handle_upload(files):
205
  file_paths = []
206
  file_names = []
207
  for file in files:
208
  file_paths.append(file.name)
209
- # Extract filename without path or extension for later use
210
  file_names.append(get_base_filename(file.name))
211
- return file_paths, file_paths, file_names
 
 
 
 
212
 
213
  upload_button.upload(
214
  fn=handle_upload,
215
  inputs=[upload_button],
216
- outputs=[input_gallery, image_state, filename_state]
217
  )
218
 
219
- # Function to analyze images
220
- # Modify the analyze_images function in your code:
221
-
222
  def analyze_images(image_paths, model_choice, length_choice, filenames):
223
  if not image_paths:
224
- return [], [], 0, "", "No images", "", ""
225
 
226
  # Get system prompt based on length selection
227
  sys_prompt = get_sys_prompt(length_choice)
@@ -238,15 +347,12 @@ def create_demo():
238
  try:
239
  # Open the image file for analysis
240
  img = Image.open(image_path)
241
- prompt0 = prompt_new() # Using the new prompt function
242
 
243
- # Extract the actual model name (remove any labels like "(default)")
244
- if " (" in model_choice:
245
- model_name = model_choice.split(" (")[0]
246
- else:
247
- model_name = model_choice
248
 
249
- # Check if this is one of the Gemini models that needs special handling
250
  is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
251
 
252
  if is_gemini_model:
@@ -307,9 +413,11 @@ def create_demo():
307
  initial_text = "No images analyzed"
308
  initial_counter = "0 of 0"
309
 
 
 
 
310
  return (image_paths, image_results, 0, initial_image, initial_counter,
311
- initial_text, csv_path)
312
-
313
 
314
  # Function to navigate to previous image
315
  def go_to_prev(current_idx, images, results):
@@ -331,14 +439,24 @@ def create_demo():
331
 
332
  return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
333
 
334
- # Connect the analyze button
335
  analyze_button.click(
 
 
 
 
 
336
  fn=analyze_images,
337
  inputs=[image_state, model_choice, length_choice, filename_state],
338
  outputs=[
339
  all_images, all_results, current_index, current_image, image_counter,
340
- analysis_text, csv_download
341
  ]
 
 
 
 
 
342
  )
343
 
344
  # Connect navigation buttons
@@ -354,22 +472,33 @@ def create_demo():
354
  outputs=[current_index, current_image, image_counter, analysis_text]
355
  )
356
 
357
- # Optional: Add additional information
358
- with gr.Accordion("About", open=False):
359
  gr.Markdown("""
360
- ## About this demo
 
 
 
 
 
 
 
 
 
 
361
 
362
- This demo generates alternative text for images.
363
 
364
- - Upload one or more images using the upload button
365
- - Choose a model and response length for generation
366
- - Navigate through the images with the Previous and Next buttons
367
- - Download CSV with all results
 
368
 
369
  Developed by the Natural History Museum in Partnership with National Museums Liverpool.
 
370
 
371
- If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
372
- chris.addis@nhm.ac.uk
373
  """)
374
 
375
  return demo
 
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
 
 
48
  def create_csv_file_simple(results):
49
  """Create a CSV file from the results and return the path"""
50
  # Create a temporary file
 
73
  filename = os.path.splitext(basename)[0]
74
  return filename
75
 
76
+ # Define custom CSS for the application
77
+ custom_css = """
78
+ .container {
79
+ max-width: 1200px;
80
+ margin: 0 auto;
81
+ }
82
+ .header {
83
+ text-align: center;
84
+ margin-bottom: 20px;
85
+ border-bottom: 2px solid #eee;
86
+ padding-bottom: 15px;
87
+ }
88
+ .model-card {
89
+ border: 1px solid #e0e0e0;
90
+ border-radius: 8px;
91
+ padding: 15px;
92
+ background-color: #f9f9f9;
93
+ margin-bottom: 15px;
94
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
95
+ }
96
+ .upload-box {
97
+ border: 2px dashed #ccc;
98
+ border-radius: 8px;
99
+ padding: 20px;
100
+ text-align: center;
101
+ margin-bottom: 15px;
102
+ background-color: #f7f7f7;
103
+ transition: all 0.3s ease;
104
+ }
105
+ .upload-box:hover {
106
+ border-color: #2196F3;
107
+ background-color: #f0f8ff;
108
+ }
109
+ .gallery-container {
110
+ background-color: #f5f5f5;
111
+ border-radius: 8px;
112
+ padding: 10px;
113
+ margin-bottom: 15px;
114
+ }
115
+ .result-container {
116
+ border: 1px solid #e0e0e0;
117
+ border-radius: 8px;
118
+ padding: 15px;
119
+ margin-top: 20px;
120
+ background-color: white;
121
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
122
+ }
123
+ .nav-buttons {
124
+ display: flex;
125
+ justify-content: space-between;
126
+ align-items: center;
127
+ margin: 10px 0;
128
+ }
129
+ .footer {
130
+ text-align: center;
131
+ margin-top: 30px;
132
+ padding-top: 15px;
133
+ border-top: 1px solid #eee;
134
+ color: #666;
135
+ font-size: 0.9em;
136
+ }
137
+ .logo-container {
138
+ display: flex;
139
+ justify-content: center;
140
+ align-items: center;
141
+ gap: 20px;
142
+ margin-bottom: 10px;
143
+ }
144
+ .length-selector {
145
+ display: flex;
146
+ gap: 10px;
147
+ margin-bottom: 15px;
148
+ }
149
+ .progress-indicator {
150
+ height: 4px;
151
+ background-color: #f0f0f0;
152
+ border-radius: 2px;
153
+ overflow: hidden;
154
+ margin-bottom: 15px;
155
+ }
156
+ .progress-bar {
157
+ height: 100%;
158
+ background-color: #4CAF50;
159
+ width: 0%;
160
+ transition: width 0.3s ease;
161
+ }
162
+ """
163
+
164
+ # Define the Gradio interface with the new design
165
  def create_demo():
166
+ # Use the Monochrome theme with custom CSS for better compatibility
167
+ theme = gr.themes.Monochrome()
168
+
169
+ with gr.Blocks(theme=theme, css=custom_css) as demo:
170
+ # Header section
171
+ with gr.Row(elem_classes="header"):
172
  with gr.Column(scale=3):
173
  gr.Markdown("# AI Alt-text Generator")
174
+ gr.Markdown("Upload images to generate accessible alternative text that meets WCAG 2.1 Guidelines")
175
+
176
+ with gr.Column(scale=1, elem_classes="logo-container"):
177
+ gr.Image("images/nhm_logo.png", show_label=False, height=80,
178
+ interactive=False, show_download_button=False,
179
+ show_share_button=False, show_fullscreen_button=False)
180
+ gr.Image("images/nml_logo.png", show_label=False, height=80,
181
+ interactive=False, show_download_button=False,
182
+ show_share_button=False, show_fullscreen_button=False)
 
 
 
 
183
 
184
+ # Main content area
185
  with gr.Row():
186
+ # Left panel - Controls
187
+ with gr.Column(scale=1, elem_classes="control-panel"):
188
+ # Upload area with styling
189
+ with gr.Column(elem_classes="upload-box"):
190
+ upload_button = gr.UploadButton(
191
+ "📷 Upload Images",
192
+ file_types=["image"],
193
+ file_count="multiple",
194
+ size="lg"
195
+ )
196
+ gr.Markdown("*Drag and drop or click to upload multiple images*")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ # Options card
199
+ with gr.Column(elem_classes="model-card"):
200
+ gr.Markdown("### Model Settings")
201
+
202
+ # Model selection dropdown
203
+ model_choices = [
204
+ # Gemini
205
+ ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
206
+ # GPT-4.1 Series
207
+ ("GPT-4.1 Nano", "gpt-4.1-nano"),
208
+ ("GPT-4.1 Mini", "gpt-4.1-mini"),
209
+ ("GPT-4.1", "gpt-4.1"),
210
+ ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
211
+ # Other Models
212
+ ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
213
+ ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
214
+ # Experimental Models
215
+ ("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
216
+ ("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
217
+ ]
218
+
219
+ default_model_internal_value = "google/gemini-2.0-flash-001"
220
+
221
+ model_choice = gr.Dropdown(
222
+ choices=model_choices,
223
+ label="AI Model",
224
+ value=default_model_internal_value,
225
+ info="Select the AI model for generating descriptions",
226
+ visible=True
227
+ )
228
+
229
+ # Length selector with visual indicators
230
+ gr.Markdown("### Response Length")
231
+ with gr.Row(elem_classes="length-selector"):
232
+ length_choice = gr.Radio(
233
+ choices=["short", "medium", "long"],
234
+ label="Response Length",
235
+ value="medium",
236
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
237
+ )
238
 
239
+ # Image preview gallery
240
+ with gr.Column(elem_classes="gallery-container"):
241
+ gr.Markdown("### Uploaded Images")
242
+ input_gallery = gr.Gallery(
243
+ label="",
244
+ columns=3,
245
+ height=180,
246
+ object_fit="contain"
247
+ )
248
+
249
+ # Analysis button
250
+ analyze_button = gr.Button("🔍 Analyze Images", variant="primary", size="lg")
251
 
252
+ # CSV Download section
253
+ with gr.Column(visible=False, elem_id="download-section", elem_classes="model-card") as download_section:
254
+ gr.Markdown("### Download Results")
255
+ csv_download = gr.File(label="CSV Results", elem_id="csv-download")
256
 
257
+ # Hidden state components
258
  image_state = gr.State([])
259
  filename_state = gr.State([])
260
 
261
+ # Right panel - Results display
262
+ with gr.Column(scale=2, elem_classes="results-panel"):
263
+ with gr.Column(elem_classes="result-container"):
264
+ # Progress indicator
265
+ with gr.Row(elem_id="progress-container", visible=False) as progress_container:
266
+ with gr.Column():
267
+ gr.HTML('<div class="progress-indicator"><div class="progress-bar" id="progress-bar"></div></div>')
268
+ progress_text = gr.Markdown("Processing...", elem_id="progress-text")
269
+
270
+ # Image display
271
  current_image = gr.Image(
272
+ label="Image Preview",
273
+ height=400,
274
+ type="filepath",
275
+ show_fullscreen_button=True,
276
+ show_download_button=False,
277
+ show_share_button=False,
278
+ elem_classes="current-image"
279
+ )
280
+
281
+ # Navigation controls
282
+ with gr.Row(elem_classes="nav-buttons"):
283
+ prev_button = gr.Button("← Previous", size="sm", variant="secondary")
284
+ image_counter = gr.Markdown("", elem_id="image-counter")
285
+ next_button = gr.Button("Next →", size="sm", variant="secondary")
286
+
287
+ # Alt-text results
288
+ gr.Markdown("### Generated Alt-text", elem_id="result-heading")
289
+ analysis_text = gr.Textbox(
290
+ label="",
291
+ value="Images will appear here after analysis. Please upload and analyze images to see results.",
292
+ lines=6,
293
+ max_lines=10,
294
+ interactive=False,
295
+ show_label=False,
296
+ elem_classes="result-text"
297
  )
298
 
299
+ # Hidden states for navigation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  current_index = gr.State(0)
301
  all_images = gr.State([])
302
  all_results = gr.State([])
303
 
304
+ # Footer section
305
+ with gr.Row(elem_classes="footer"):
306
+ gr.Markdown("""
307
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
308
+ Funded by the DCMS Pilot Scheme. For support, contact: chris.addis@nhm.ac.uk
309
+ """)
310
+
311
+ # Handle file uploads
312
  def handle_upload(files):
313
  file_paths = []
314
  file_names = []
315
  for file in files:
316
  file_paths.append(file.name)
 
317
  file_names.append(get_base_filename(file.name))
318
+
319
+ # Show a message about the number of files uploaded
320
+ upload_message = f"✅ {len(files)} image{'s' if len(files) != 1 else ''} uploaded successfully!"
321
+
322
+ return file_paths, file_paths, file_names, upload_message
323
 
324
  upload_button.upload(
325
  fn=handle_upload,
326
  inputs=[upload_button],
327
+ outputs=[input_gallery, image_state, filename_state, progress_text]
328
  )
329
 
330
+ # Function to analyze images with visual feedback
 
 
331
  def analyze_images(image_paths, model_choice, length_choice, filenames):
332
  if not image_paths:
333
+ return [], [], 0, "", "No images uploaded", "", gr.update(visible=False)
334
 
335
  # Get system prompt based on length selection
336
  sys_prompt = get_sys_prompt(length_choice)
 
347
  try:
348
  # Open the image file for analysis
349
  img = Image.open(image_path)
350
+ prompt0 = prompt_new()
351
 
352
+ # Use model_choice directly since it's the internal value
353
+ model_name = model_choice
 
 
 
354
 
355
+ # Check if this is one of the Gemini models
356
  is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
357
 
358
  if is_gemini_model:
 
413
  initial_text = "No images analyzed"
414
  initial_counter = "0 of 0"
415
 
416
+ # Make the download section visible now that we have results
417
+ download_visible = gr.update(visible=True)
418
+
419
  return (image_paths, image_results, 0, initial_image, initial_counter,
420
+ initial_text, csv_path, download_visible)
 
421
 
422
  # Function to navigate to previous image
423
  def go_to_prev(current_idx, images, results):
 
439
 
440
  return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
441
 
442
+ # Show progress indicator during analysis
443
  analyze_button.click(
444
+ fn=lambda: (gr.update(visible=True), "Processing images... Please wait"),
445
+ inputs=[],
446
+ outputs=[progress_container, progress_text],
447
+ queue=False
448
+ ).then(
449
  fn=analyze_images,
450
  inputs=[image_state, model_choice, length_choice, filename_state],
451
  outputs=[
452
  all_images, all_results, current_index, current_image, image_counter,
453
+ analysis_text, csv_download, download_section
454
  ]
455
+ ).then(
456
+ fn=lambda: (gr.update(visible=False), "Analysis complete!"),
457
+ inputs=[],
458
+ outputs=[progress_container, progress_text],
459
+ queue=False
460
  )
461
 
462
  # Connect navigation buttons
 
472
  outputs=[current_index, current_image, image_counter, analysis_text]
473
  )
474
 
475
+ # Additional information accordion
476
+ with gr.Accordion("About this Tool", open=False):
477
  gr.Markdown("""
478
+ ## About the AI Alt-text Generator
479
+
480
+ This tool uses advanced AI models to automatically generate alternative text descriptions for images,
481
+ helping museums and cultural institutions make their digital content more accessible for visually impaired users.
482
+
483
+ ### Features:
484
+
485
+ - **Multiple AI Models**: Choose from various AI models including Gemini, GPT-4.1, Claude, and others
486
+ - **Customizable Length**: Select short, medium, or long descriptions based on your needs
487
+ - **Batch Processing**: Upload and process multiple images at once
488
+ - **CSV Export**: Download all generated descriptions in a single file
489
 
490
+ ### How to Use:
491
 
492
+ 1. Upload one or more images using the upload button
493
+ 2. Select your preferred AI model and description length
494
+ 3. Click "Analyze Images" to generate descriptions
495
+ 4. Navigate through results with the Previous and Next buttons
496
+ 5. Download all results as a CSV file
497
 
498
  Developed by the Natural History Museum in Partnership with National Museums Liverpool.
499
+ Funded by the DCMS Pilot Scheme.
500
 
501
+ For support, feedback, or suggestions, please contact: chris.addis@nhm.ac.uk
 
502
  """)
503
 
504
  return demo