Chris Addis commited on
Commit
4bc8fd1
·
1 Parent(s): 8a877e4

different design

Browse files
Files changed (2) hide show
  1. .ipynb_checkpoints/app-Copy1-checkpoint.py +380 -0
  2. app.py +239 -254
.ipynb_checkpoints/app-Copy1-checkpoint.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import io
5
+ import os
6
+ import requests
7
+ import json
8
+ from dotenv import load_dotenv
9
+ import openai
10
+ import base64
11
+ import csv
12
+ import tempfile
13
+ import datetime
14
+
15
+ # Load environment variables from .env file if it exists (for local development)
16
+ # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
+ if os.path.exists(".env"):
18
+ load_dotenv()
19
+
20
+ from io import BytesIO
21
+ import numpy as np
22
+ import requests
23
+ from PIL import Image
24
+
25
+ # import libraries
26
+ from library.utils_model import *
27
+ from library.utils_html import *
28
+ from library.utils_prompt import *
29
+
30
+ OR = OpenRouterAPI()
31
+ gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
32
+
33
+ # Path for storing user preferences
34
+ PREFERENCES_FILE = "data/user_preferences.csv"
35
+
36
+ # Ensure directory exists
37
+ os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
+
39
+ def get_sys_prompt(length="medium"):
40
+ if length == "short":
41
+ dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
42
+ elif length == "medium":
43
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
44
+ else:
45
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
+ return dev_prompt
47
+
48
+ # This function is no longer needed since we removed A/B testing
49
+
50
+ def create_csv_file_simple(results):
51
+ """Create a CSV file from the results and return the path"""
52
+ # Create a temporary file
53
+ fd, path = tempfile.mkstemp(suffix='.csv')
54
+
55
+ with os.fdopen(fd, 'w', newline='') as f:
56
+ writer = csv.writer(f)
57
+ # Write header
58
+ writer.writerow(['image_id', 'content'])
59
+ # Write data
60
+ for result in results:
61
+ writer.writerow([
62
+ result.get('image_id', ''),
63
+ result.get('content', '')
64
+ ])
65
+
66
+ return path
67
+
68
+ # Extract original filename without path or extension
69
+ def get_base_filename(filepath):
70
+ if not filepath:
71
+ return ""
72
+ # Get the basename (filename with extension)
73
+ basename = os.path.basename(filepath)
74
+ # Remove extension
75
+ filename = os.path.splitext(basename)[0]
76
+ return filename
77
+
78
+ # Define the Gradio interface
79
+ def create_demo():
80
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
81
+ # Replace the existing logo code section:
82
+ with gr.Row():
83
+ with gr.Column(scale=3):
84
+ gr.Markdown("# AI Alt-text Generator")
85
+ gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
+ gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
+ with gr.Column(scale=1):
88
+ with gr.Row():
89
+ # Use gr.Image with all interactive features disabled
90
+ gr.Image("images/nhm_logo.png", show_label=False, height=120,
91
+ interactive=False, show_download_button=False,
92
+ show_share_button=False, show_fullscreen_button=False,
93
+ container=False)
94
+ gr.Image("images/nml_logo.png", show_label=False, height=120,
95
+ interactive=False, show_download_button=False,
96
+ show_share_button=False, show_fullscreen_button=False,
97
+ container=False)
98
+
99
+
100
+ with gr.Row():
101
+ # Left column: Controls and uploads
102
+ with gr.Column(scale=1):
103
+ # Upload interface
104
+ upload_button = gr.UploadButton(
105
+ "Click to Upload Images",
106
+ file_types=["image"],
107
+ file_count="multiple"
108
+ )
109
+
110
+ # Define choices as a list of tuples: (Display Name, Internal Value)
111
+ model_choices = [
112
+ # Gemini
113
+ ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
114
+ # GPT-4.1 Series
115
+ ("GPT-4.1 Nano", "gpt-4.1-nano"),
116
+ ("GPT-4.1 Mini", "gpt-4.1-mini"),
117
+ ("GPT-4.1", "gpt-4.1"),
118
+ ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
119
+ # Other Models
120
+ ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
121
+ ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
122
+ # Experimental Models
123
+ ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
124
+ ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
125
+ ]
126
+
127
+ # Find the internal value of the default choice
128
+ default_model_internal_value = "google/gemini-2.0-flash-001"
129
+
130
+ # Add model selection dropdown
131
+ model_choice = gr.Dropdown(
132
+ choices=model_choices,
133
+ label="Select Model",
134
+ value=default_model_internal_value, # Use the internal value for the default
135
+ # info="Choose the language model to use." # Optional: Add extra info tooltip
136
+ visible=True
137
+ )
138
+
139
+
140
+ # Add response length selection
141
+ length_choice = gr.Radio(
142
+ choices=["short", "medium", "long"],
143
+ label="Response Length",
144
+ value="medium",
145
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
146
+ )
147
+
148
+ # Preview gallery for uploaded images
149
+ gr.Markdown("### Uploaded Images")
150
+ input_gallery = gr.Gallery(
151
+ label="",
152
+ columns=3,
153
+ height=150,
154
+ object_fit="contain"
155
+ )
156
+
157
+ # Analysis button
158
+ analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
159
+
160
+ # Hidden state component to store image info
161
+ image_state = gr.State([])
162
+ filename_state = gr.State([])
163
+
164
+ # CSV download component
165
+ csv_download = gr.File(label="CSV Results")
166
+
167
+ # Right column: Display area
168
+ with gr.Column(scale=2):
169
+ with gr.Column(elem_classes="image-container"):
170
+ current_image = gr.Image(
171
+ label="Current Image",
172
+ height=600, # Set the maximum desired height
173
+ type="filepath",
174
+ show_fullscreen_button=True,
175
+ show_download_button=False,
176
+ show_share_button=False
177
+ )
178
+
179
+ # Navigation row
180
+ with gr.Row():
181
+ prev_button = gr.Button("← Previous", size="sm")
182
+ image_counter = gr.Markdown("", elem_id="image-counter")
183
+ next_button = gr.Button("Next →", size="sm")
184
+
185
+ # Alt-text heading and output
186
+ gr.Markdown("### Generated Alt-text")
187
+
188
+ # Alt-text
189
+ analysis_text = gr.Textbox(
190
+ label="",
191
+ value="Please analyze images to see results",
192
+ lines=6,
193
+ max_lines=10,
194
+ interactive=False,
195
+ show_label=False
196
+ )
197
+
198
+ # Hidden state for gallery navigation
199
+ current_index = gr.State(0)
200
+ all_images = gr.State([])
201
+ all_results = gr.State([])
202
+
203
+ # Handle file uploads - store files for use during analysis
204
+ def handle_upload(files):
205
+ file_paths = []
206
+ file_names = []
207
+ for file in files:
208
+ file_paths.append(file.name)
209
+ # Extract filename without path or extension for later use
210
+ file_names.append(get_base_filename(file.name))
211
+ return file_paths, file_paths, file_names
212
+
213
+ upload_button.upload(
214
+ fn=handle_upload,
215
+ inputs=[upload_button],
216
+ outputs=[input_gallery, image_state, filename_state]
217
+ )
218
+
219
+ # Function to analyze images
220
+ # Modify the analyze_images function in your code:
221
+
222
+ def analyze_images(image_paths, model_choice, length_choice, filenames):
223
+ if not image_paths:
224
+ return [], [], 0, "", "No images", "", ""
225
+
226
+ # Get system prompt based on length selection
227
+ sys_prompt = get_sys_prompt(length_choice)
228
+
229
+ image_results = []
230
+
231
+ for i, image_path in enumerate(image_paths):
232
+ # Use original filename as image_id if available
233
+ if i < len(filenames) and filenames[i]:
234
+ image_id = filenames[i]
235
+ else:
236
+ image_id = f"Image {i+1}"
237
+
238
+ try:
239
+ # Open the image file for analysis
240
+ img = Image.open(image_path)
241
+ prompt0 = prompt_new() # Using the new prompt function
242
+
243
+ # Extract the actual model name (remove any labels like "(default)")
244
+ if " (" in model_choice:
245
+ model_name = model_choice.split(" (")[0]
246
+ else:
247
+ model_name = model_choice
248
+
249
+ # Check if this is one of the Gemini models that needs special handling
250
+ is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
251
+
252
+ if is_gemini_model:
253
+ try:
254
+ # First try using the dedicated gemini client
255
+ result = gemini.generate_caption(
256
+ img,
257
+ model=model_name,
258
+ max_image_size=512,
259
+ prompt=prompt0,
260
+ prompt_dev=sys_prompt,
261
+ temperature=1
262
+ )
263
+ except Exception as gemini_error:
264
+ # If gemini client fails, fall back to standard OR client
265
+ result = OR.generate_caption(
266
+ img,
267
+ model=model_name,
268
+ max_image_size=512,
269
+ prompt=prompt0,
270
+ prompt_dev=sys_prompt,
271
+ temperature=1
272
+ )
273
+ else:
274
+ # For all other models, use OR client directly
275
+ result = OR.generate_caption(
276
+ img,
277
+ model=model_name,
278
+ max_image_size=512,
279
+ prompt=prompt0,
280
+ prompt_dev=sys_prompt,
281
+ temperature=1
282
+ )
283
+
284
+ # Add to results
285
+ image_results.append({
286
+ "image_id": image_id,
287
+ "content": result
288
+ })
289
+
290
+ except Exception as e:
291
+ error_message = f"Error: {str(e)}"
292
+ image_results.append({
293
+ "image_id": image_id,
294
+ "content": error_message
295
+ })
296
+
297
+ # Create a CSV file for download
298
+ csv_path = create_csv_file_simple(image_results)
299
+
300
+ # Set up initial display with first image
301
+ if len(image_paths) > 0:
302
+ initial_image = image_paths[0]
303
+ initial_counter = f"{1} of {len(image_paths)}"
304
+ initial_text = image_results[0]["content"]
305
+ else:
306
+ initial_image = ""
307
+ initial_text = "No images analyzed"
308
+ initial_counter = "0 of 0"
309
+
310
+ return (image_paths, image_results, 0, initial_image, initial_counter,
311
+ initial_text, csv_path)
312
+
313
+
314
+ # Function to navigate to previous image
315
+ def go_to_prev(current_idx, images, results):
316
+ if not images or len(images) == 0:
317
+ return current_idx, "", "0 of 0", ""
318
+
319
+ new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
320
+ counter_html = f"{new_idx + 1} of {len(images)}"
321
+
322
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
323
+
324
+ # Function to navigate to next image
325
+ def go_to_next(current_idx, images, results):
326
+ if not images or len(images) == 0:
327
+ return current_idx, "", "0 of 0", ""
328
+
329
+ new_idx = (current_idx + 1) % len(images)
330
+ counter_html = f"{new_idx + 1} of {len(images)}"
331
+
332
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
333
+
334
+ # Connect the analyze button
335
+ analyze_button.click(
336
+ fn=analyze_images,
337
+ inputs=[image_state, model_choice, length_choice, filename_state],
338
+ outputs=[
339
+ all_images, all_results, current_index, current_image, image_counter,
340
+ analysis_text, csv_download
341
+ ]
342
+ )
343
+
344
+ # Connect navigation buttons
345
+ prev_button.click(
346
+ fn=go_to_prev,
347
+ inputs=[current_index, all_images, all_results],
348
+ outputs=[current_index, current_image, image_counter, analysis_text]
349
+ )
350
+
351
+ next_button.click(
352
+ fn=go_to_next,
353
+ inputs=[current_index, all_images, all_results],
354
+ outputs=[current_index, current_image, image_counter, analysis_text]
355
+ )
356
+
357
+ # Optional: Add additional information
358
+ with gr.Accordion("About", open=False):
359
+ gr.Markdown("""
360
+ ## About this demo
361
+
362
+ This demo generates alternative text for images.
363
+
364
+ - Upload one or more images using the upload button
365
+ - Choose a model and response length for generation
366
+ - Navigate through the images with the Previous and Next buttons
367
+ - Download CSV with all results
368
+
369
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
370
+
371
+ If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
372
+ chris.addis@nhm.ac.uk
373
+ """)
374
+
375
+ return demo
376
+
377
+ # Launch the app
378
+ if __name__ == "__main__":
379
+ app = create_demo()
380
+ app.launch()
app.py CHANGED
@@ -13,6 +13,7 @@ import tempfile
13
  import datetime
14
 
15
  # Load environment variables from .env file if it exists (for local development)
 
16
  if os.path.exists(".env"):
17
  load_dotenv()
18
 
@@ -72,293 +73,263 @@ def get_base_filename(filepath):
72
  filename = os.path.splitext(basename)[0]
73
  return filename
74
 
75
- # Define custom CSS for the new design
76
  custom_css = """
77
- :root {
78
- --primary-color: #1e2a78;
79
- --secondary-color: #33a1fd;
80
- --accent-color: #f5f5f5;
81
- --text-color: #333;
82
- --background-color: #fff;
83
- --card-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
84
- --border-radius: 8px;
85
- }
86
-
87
  .container {
88
  max-width: 1200px;
89
  margin: 0 auto;
90
- padding: 20px;
91
  }
92
-
93
- .app-header {
94
- background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
95
- color: white;
96
- padding: 20px;
97
- border-radius: var(--border-radius);
98
  margin-bottom: 20px;
 
 
99
  }
100
-
101
- .app-title {
102
- font-size: 28px;
103
- font-weight: bold;
104
- margin: 0;
105
- }
106
-
107
- .app-subtitle {
108
- font-size: 16px;
109
- margin-top: 5px;
110
- opacity: 0.9;
111
  }
112
-
113
- .card {
114
- background-color: var(--background-color);
115
- border-radius: var(--border-radius);
116
- box-shadow: var(--card-shadow);
117
  padding: 20px;
118
- margin-bottom: 20px;
 
 
 
119
  }
120
-
121
- .control-panel {
122
- background-color: #f8f9fa;
123
- border-radius: var(--border-radius);
 
 
 
 
 
 
 
 
 
124
  padding: 15px;
 
 
 
125
  }
126
-
127
- .museum-logos {
128
  display: flex;
 
129
  align-items: center;
130
- justify-content: flex-end;
131
  }
132
-
133
- .upload-area {
134
- border: 2px dashed var(--secondary-color);
135
- border-radius: var(--border-radius);
136
- padding: 30px;
137
  text-align: center;
138
- transition: all 0.3s;
139
- }
140
-
141
- .upload-area:hover {
142
- background-color: rgba(51, 161, 253, 0.1);
143
- }
144
-
145
- .primary-btn {
146
- background-color: var(--primary-color);
147
- color: white;
148
- border: none;
149
- padding: 10px 20px;
150
- border-radius: 20px;
151
- font-weight: bold;
152
- transition: all 0.3s;
153
  }
154
-
155
- .primary-btn:hover {
156
- background-color: var(--secondary-color);
157
- transform: translateY(-2px);
158
- }
159
-
160
- .result-card {
161
- border-left: 4px solid var(--secondary-color);
162
  }
163
-
164
- .nav-buttons {
165
  display: flex;
166
- justify-content: space-between;
167
- margin: 15px 0;
168
  }
169
-
170
- .image-preview {
171
- border-radius: var(--border-radius);
 
172
  overflow: hidden;
173
- box-shadow: var(--card-shadow);
174
  }
175
-
176
- .footer {
177
- text-align: center;
178
- padding: 20px;
179
- color: var(--text-color);
180
- opacity: 0.7;
181
- font-size: 14px;
182
- }
183
-
184
- @media (max-width: 768px) {
185
- .app-header {
186
- text-align: center;
187
- }
188
-
189
- .museum-logos {
190
- justify-content: center;
191
- margin-top: 15px;
192
- }
193
  }
194
  """
195
 
196
- # Define the Gradio interface with new design
197
  def create_demo():
198
- # Custom theme
199
  theme = gr.themes.Base(
200
  primary_hue="blue",
201
- secondary_hue="indigo",
202
  neutral_hue="gray",
203
- radius_size=gr.themes.sizes.radius_sm,
204
- text_size=gr.themes.sizes.text_md
205
  ).set(
206
- button_primary_background_fill="*primary_500",
207
- button_primary_background_fill_hover="*primary_600",
208
- button_secondary_background_fill="*neutral_100",
209
- button_secondary_background_fill_hover="*neutral_200",
210
- checkbox_background_color="*neutral_100",
211
- checkbox_background_color_selected="*primary_500",
212
- checkbox_border_color="*neutral_300",
213
- checkbox_border_color_focus="*primary_500",
214
- checkbox_border_color_hover="*neutral_400",
215
- checkbox_label_background_fill="white",
216
- block_title_text_color="*neutral_700",
217
- block_title_background_fill="*neutral_50",
218
- slider_color="*primary_500",
219
- slider_color_dark="*primary_600"
220
  )
221
-
222
  with gr.Blocks(theme=theme, css=custom_css) as demo:
223
  # Header section
224
- with gr.Row(elem_classes="app-header"):
225
  with gr.Column(scale=3):
226
- gr.Markdown("# AI Museum Alt-text Generator", elem_classes="app-title")
227
- gr.Markdown("Generating accessible image descriptions for museum objects - WCAG 2.1 compliant",
228
- elem_classes="app-subtitle")
229
- with gr.Column(scale=1, elem_classes="museum-logos"):
230
- with gr.Row():
231
- gr.Image("images/nhm_logo.png", show_label=False, height=80,
232
- interactive=False, show_download_button=False,
233
- show_share_button=False, show_fullscreen_button=False)
234
- gr.Image("images/nml_logo.png", show_label=False, height=80,
235
- interactive=False, show_download_button=False,
236
- show_share_button=False, show_fullscreen_button=False)
237
-
238
- # Main content area with two-column layout
239
  with gr.Row():
240
- # Left column: Controls and upload
241
- with gr.Column(scale=1):
242
- with gr.Box(elem_classes="card control-panel"):
243
- gr.Markdown("### Configuration")
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- # Model selection dropdown with new styling
246
  model_choices = [
 
247
  ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
 
248
  ("GPT-4.1 Nano", "gpt-4.1-nano"),
249
  ("GPT-4.1 Mini", "gpt-4.1-mini"),
250
  ("GPT-4.1", "gpt-4.1"),
251
  ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
 
252
  ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
253
  ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
 
254
  ("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
255
  ("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
256
  ]
257
 
258
- default_model_value = "google/gemini-2.0-flash-001"
259
 
260
  model_choice = gr.Dropdown(
261
  choices=model_choices,
262
  label="AI Model",
263
- value=default_model_value,
264
- info="Select the AI model to generate alt-text"
 
265
  )
266
 
267
- # Length selection with visual indicators
268
- with gr.Row():
 
269
  length_choice = gr.Radio(
270
- choices=["short", "medium", "long"],
271
- label="Description Length",
 
 
 
 
272
  value="medium",
273
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
274
  )
275
 
276
- # Upload area with instruction
277
- with gr.Box(elem_classes="card upload-area"):
278
- gr.Markdown("### Upload Images")
279
- gr.Markdown("Select one or more museum object images")
280
-
281
- upload_button = gr.UploadButton(
282
- "📁 Select Images",
283
- file_types=["image"],
284
- file_count="multiple",
285
- elem_classes="primary-btn"
286
- )
287
-
288
- # Preview gallery for uploaded images
289
  input_gallery = gr.Gallery(
290
- label="Uploaded Images",
291
  columns=3,
292
- height=200,
293
  object_fit="contain",
294
- elem_id="upload-preview"
295
  )
296
 
297
- # Analyze button
298
- analyze_button = gr.Button("🔍 Generate Alt-text",
299
- variant="primary",
300
- size="lg",
301
- elem_classes="primary-btn")
302
 
303
- # CSV download section
304
- with gr.Box(elem_classes="card"):
305
- gr.Markdown("### Export Results")
306
- csv_download = gr.File(
307
- label="Download CSV with all descriptions",
308
- elem_id="csv-download"
309
- )
310
-
311
- # Right column: Results display
312
- with gr.Column(scale=2):
313
- with gr.Box(elem_classes="card image-preview"):
314
- # Current image display
 
 
 
 
 
 
 
315
  current_image = gr.Image(
316
- label="Current Image",
317
- height=450,
318
  type="filepath",
319
  show_fullscreen_button=True,
320
  show_download_button=False,
321
- show_share_button=False
 
322
  )
323
 
324
  # Navigation controls
325
  with gr.Row(elem_classes="nav-buttons"):
326
- prev_button = gr.Button("← Previous", size="sm")
327
- image_counter = gr.Markdown("0 of 0", elem_id="image-counter")
328
- next_button = gr.Button("Next →", size="sm")
329
-
330
- # Results display with card styling
331
- with gr.Box(elem_classes="card result-card"):
332
- gr.Markdown("### Generated Alt-text")
333
 
 
 
334
  analysis_text = gr.Textbox(
335
  label="",
336
- value="Please upload and analyze images to see results",
337
- lines=8,
338
- max_lines=12,
339
  interactive=False,
340
- show_label=False
 
341
  )
342
-
343
- # Character count display
344
- char_count = gr.Markdown("*Character count: 0*", elem_id="char-count")
345
-
346
- # Hidden state components
347
- image_state = gr.State([])
348
- filename_state = gr.State([])
349
- current_index = gr.State(0)
350
- all_images = gr.State([])
351
- all_results = gr.State([])
352
 
353
- # Footer with attribution
354
- gr.Markdown(
355
- """
356
- ---
357
- Developed by the Natural History Museum in Partnership with National Museums Liverpool.
358
- Funded by the DCMS Pilot Scheme | Contact: chris.addis@nhm.ac.uk
359
- """,
360
- elem_classes="footer"
361
- )
362
 
363
  # Handle file uploads
364
  def handle_upload(files):
@@ -367,21 +338,30 @@ def create_demo():
367
  for file in files:
368
  file_paths.append(file.name)
369
  file_names.append(get_base_filename(file.name))
370
- return file_paths, file_paths, file_names
 
 
 
 
371
 
372
  upload_button.upload(
373
  fn=handle_upload,
374
  inputs=[upload_button],
375
- outputs=[input_gallery, image_state, filename_state]
376
  )
377
 
378
- # Function to analyze images with failover mechanism for Gemini
379
  def analyze_images(image_paths, model_choice, length_choice, filenames):
380
  if not image_paths:
381
- return [], [], 0, "", "No images", "", "", "*Character count: 0*"
382
 
383
  # Get system prompt based on length selection
384
- sys_prompt = get_sys_prompt(length_choice)
 
 
 
 
 
385
 
386
  image_results = []
387
 
@@ -395,15 +375,14 @@ def create_demo():
395
  try:
396
  # Open the image file for analysis
397
  img = Image.open(image_path)
398
- prompt0 = prompt_new() # Using the new prompt function
399
 
400
- # Extract the actual model name (remove any labels like "(default)")
401
- if " (" in model_choice:
402
- model_name = model_choice.split(" (")[0]
403
- else:
404
- model_name = model_choice
405
 
406
- # Check if this is one of the Gemini models that needs special handling
407
  is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
408
 
409
  if is_gemini_model:
@@ -418,7 +397,6 @@ def create_demo():
418
  temperature=1
419
  )
420
  except Exception as gemini_error:
421
- print(f"Gemini API failed: {str(gemini_error)}. Falling back to OpenRouter.")
422
  # If gemini client fails, fall back to standard OR client
423
  result = OR.generate_caption(
424
  img,
@@ -460,90 +438,97 @@ def create_demo():
460
  initial_image = image_paths[0]
461
  initial_counter = f"{1} of {len(image_paths)}"
462
  initial_text = image_results[0]["content"]
463
- char_count_text = f"*Character count: {len(initial_text)}*"
464
  else:
465
  initial_image = ""
466
  initial_text = "No images analyzed"
467
  initial_counter = "0 of 0"
468
- char_count_text = "*Character count: 0*"
 
 
469
 
470
  return (image_paths, image_results, 0, initial_image, initial_counter,
471
- initial_text, csv_path, char_count_text)
472
 
473
  # Function to navigate to previous image
474
  def go_to_prev(current_idx, images, results):
475
  if not images or len(images) == 0:
476
- return current_idx, "", "0 of 0", "", "*Character count: 0*"
477
 
478
  new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
479
  counter_html = f"{new_idx + 1} of {len(images)}"
480
- result_text = results[new_idx]["content"]
481
- char_count_text = f"*Character count: {len(result_text)}*"
482
 
483
- return (new_idx, images[new_idx], counter_html, result_text, char_count_text)
484
 
485
  # Function to navigate to next image
486
  def go_to_next(current_idx, images, results):
487
  if not images or len(images) == 0:
488
- return current_idx, "", "0 of 0", "", "*Character count: 0*"
489
 
490
  new_idx = (current_idx + 1) % len(images)
491
  counter_html = f"{new_idx + 1} of {len(images)}"
492
- result_text = results[new_idx]["content"]
493
- char_count_text = f"*Character count: {len(result_text)}*"
494
 
495
- return (new_idx, images[new_idx], counter_html, result_text, char_count_text)
496
 
497
- # Connect the analyze button
498
  analyze_button.click(
 
 
 
 
 
499
  fn=analyze_images,
500
  inputs=[image_state, model_choice, length_choice, filename_state],
501
  outputs=[
502
  all_images, all_results, current_index, current_image, image_counter,
503
- analysis_text, csv_download, char_count
504
  ]
 
 
 
 
 
505
  )
506
 
507
  # Connect navigation buttons
508
  prev_button.click(
509
  fn=go_to_prev,
510
  inputs=[current_index, all_images, all_results],
511
- outputs=[current_index, current_image, image_counter, analysis_text, char_count]
512
  )
513
 
514
  next_button.click(
515
  fn=go_to_next,
516
  inputs=[current_index, all_images, all_results],
517
- outputs=[current_index, current_image, image_counter, analysis_text, char_count]
518
  )
519
 
520
- # Add collapsible info section
521
  with gr.Accordion("About this Tool", open=False):
522
  gr.Markdown("""
523
- ## AI Museum Alt-text Generator
524
 
525
- This tool helps museum professionals create high-quality alternative text descriptions for museum objects, designed to meet WCAG 2.1 accessibility guidelines. The generated descriptions are crafted to provide meaningful context for visitors with visual impairments.
 
526
 
527
- ### Features
528
 
529
- - **Multiple AI Models**: Choose from various AI vision models to generate descriptions
530
  - **Customizable Length**: Select short, medium, or long descriptions based on your needs
531
- - **Batch Processing**: Upload multiple images at once and navigate through results
532
- - **Export**: Download all results as CSV for easy integration with your collection management system
533
-
534
- ### How to Use
535
 
536
- 1. Select your preferred AI model and description length
537
- 2. Upload one or more images using the upload button
538
- 3. Click "Generate Alt-text" to process the images
539
- 4. Navigate through results with Previous/Next buttons
540
- 5. Download the CSV with all generated descriptions
541
 
542
- ### About the Project
 
 
 
 
543
 
544
- Developed by the Natural History Museum in Partnership with National Museums Liverpool as part of the DCMS Pilot Scheme for enhancing accessibility in cultural institutions.
 
545
 
546
- For feedback, suggestions, or support: chris.addis@nhm.ac.uk
547
  """)
548
 
549
  return demo
 
13
  import datetime
14
 
15
  # Load environment variables from .env file if it exists (for local development)
16
+ # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
  if os.path.exists(".env"):
18
  load_dotenv()
19
 
 
73
  filename = os.path.splitext(basename)[0]
74
  return filename
75
 
76
+ # Define custom CSS for the application
77
  custom_css = """
 
 
 
 
 
 
 
 
 
 
78
  .container {
79
  max-width: 1200px;
80
  margin: 0 auto;
 
81
  }
82
+ .header {
83
+ text-align: center;
 
 
 
 
84
  margin-bottom: 20px;
85
+ border-bottom: 2px solid #eee;
86
+ padding-bottom: 15px;
87
  }
88
+ .model-card {
89
+ border: 1px solid #e0e0e0;
90
+ border-radius: 8px;
91
+ padding: 15px;
92
+ background-color: #f9f9f9;
93
+ margin-bottom: 15px;
94
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
 
 
 
 
95
  }
96
+ .upload-box {
97
+ border: 2px dashed #ccc;
98
+ border-radius: 8px;
 
 
99
  padding: 20px;
100
+ text-align: center;
101
+ margin-bottom: 15px;
102
+ background-color: #f7f7f7;
103
+ transition: all 0.3s ease;
104
  }
105
+ .upload-box:hover {
106
+ border-color: #2196F3;
107
+ background-color: #f0f8ff;
108
+ }
109
+ .gallery-container {
110
+ background-color: #f5f5f5;
111
+ border-radius: 8px;
112
+ padding: 10px;
113
+ margin-bottom: 15px;
114
+ }
115
+ .result-container {
116
+ border: 1px solid #e0e0e0;
117
+ border-radius: 8px;
118
  padding: 15px;
119
+ margin-top: 20px;
120
+ background-color: white;
121
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
122
  }
123
+ .nav-buttons {
 
124
  display: flex;
125
+ justify-content: space-between;
126
  align-items: center;
127
+ margin: 10px 0;
128
  }
129
+ .footer {
 
 
 
 
130
  text-align: center;
131
+ margin-top: 30px;
132
+ padding-top: 15px;
133
+ border-top: 1px solid #eee;
134
+ color: #666;
135
+ font-size: 0.9em;
 
 
 
 
 
 
 
 
 
 
136
  }
137
+ .logo-container {
138
+ display: flex;
139
+ justify-content: center;
140
+ align-items: center;
141
+ gap: 20px;
142
+ margin-bottom: 10px;
 
 
143
  }
144
+ .length-selector {
 
145
  display: flex;
146
+ gap: 10px;
147
+ margin-bottom: 15px;
148
  }
149
+ .progress-indicator {
150
+ height: 4px;
151
+ background-color: #f0f0f0;
152
+ border-radius: 2px;
153
  overflow: hidden;
154
+ margin-bottom: 15px;
155
  }
156
+ .progress-bar {
157
+ height: 100%;
158
+ background-color: #4CAF50;
159
+ width: 0%;
160
+ transition: width 0.3s ease;
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  }
162
  """
163
 
164
+ # Define the Gradio interface with the new design
165
  def create_demo():
166
+ # Create a custom theme
167
  theme = gr.themes.Base(
168
  primary_hue="blue",
169
+ secondary_hue="teal",
170
  neutral_hue="gray",
171
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
 
172
  ).set(
173
+ button_primary_background_fill="#2196F3",
174
+ button_primary_background_fill_hover="#1976D2",
175
+ button_primary_text_color="white",
176
+ button_secondary_background_fill="#f5f5f5",
177
+ button_secondary_background_fill_hover="#e0e0e0",
178
+ button_secondary_text_color="#333",
179
+ background_fill="#ffffff",
180
+ block_background_fill="#fafafa",
181
+ block_border_width="1px",
182
+ block_border_color="#e0e0e0",
183
+ block_radius="8px",
184
+ block_title_text_weight="600",
 
 
185
  )
186
+
187
  with gr.Blocks(theme=theme, css=custom_css) as demo:
188
  # Header section
189
+ with gr.Row(elem_classes="header"):
190
  with gr.Column(scale=3):
191
+ gr.Markdown("# AI Alt-text Generator")
192
+ gr.Markdown("Upload images to generate accessible alternative text that meets WCAG 2.1 Guidelines")
193
+
194
+ with gr.Column(scale=1, elem_classes="logo-container"):
195
+ gr.Image("images/nhm_logo.png", show_label=False, height=80,
196
+ interactive=False, show_download_button=False,
197
+ show_share_button=False, show_fullscreen_button=False)
198
+ gr.Image("images/nml_logo.png", show_label=False, height=80,
199
+ interactive=False, show_download_button=False,
200
+ show_share_button=False, show_fullscreen_button=False)
201
+
202
+ # Main content area
 
203
  with gr.Row():
204
+ # Left panel - Controls
205
+ with gr.Column(scale=1, elem_classes="control-panel"):
206
+ # Upload area with styling
207
+ with gr.Box(elem_classes="upload-box"):
208
+ upload_button = gr.UploadButton(
209
+ "📷 Upload Images",
210
+ file_types=["image"],
211
+ file_count="multiple",
212
+ size="lg"
213
+ )
214
+ gr.Markdown("*Drag and drop or click to upload multiple images*")
215
+
216
+ # Options card
217
+ with gr.Box(elem_classes="model-card"):
218
+ gr.Markdown("### Model Settings")
219
 
220
+ # Model selection dropdown
221
  model_choices = [
222
+ # Gemini
223
  ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
224
+ # GPT-4.1 Series
225
  ("GPT-4.1 Nano", "gpt-4.1-nano"),
226
  ("GPT-4.1 Mini", "gpt-4.1-mini"),
227
  ("GPT-4.1", "gpt-4.1"),
228
  ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
229
+ # Other Models
230
  ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
231
  ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
232
+ # Experimental Models
233
  ("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
234
  ("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
235
  ]
236
 
237
+ default_model_internal_value = "google/gemini-2.0-flash-001"
238
 
239
  model_choice = gr.Dropdown(
240
  choices=model_choices,
241
  label="AI Model",
242
+ value=default_model_internal_value,
243
+ info="Select the AI model for generating descriptions",
244
+ visible=True
245
  )
246
 
247
+ # Length selector with visual indicators
248
+ gr.Markdown("### Response Length")
249
+ with gr.Row(elem_classes="length-selector"):
250
  length_choice = gr.Radio(
251
+ choices=[
252
+ ("Short (max 130 chars)", "short"),
253
+ ("Medium (250-300 chars)", "medium"),
254
+ ("Long (max 450 chars)", "long")
255
+ ],
256
+ label="",
257
  value="medium",
258
+ show_label=False
259
  )
260
 
261
+ # Image preview gallery
262
+ with gr.Box(elem_classes="gallery-container"):
263
+ gr.Markdown("### Uploaded Images")
 
 
 
 
 
 
 
 
 
 
264
  input_gallery = gr.Gallery(
265
+ label="",
266
  columns=3,
267
+ height=180,
268
  object_fit="contain",
269
+ allow_preview=True
270
  )
271
 
272
+ # Analysis button
273
+ analyze_button = gr.Button("🔍 Analyze Images", variant="primary", size="lg")
 
 
 
274
 
275
+ # CSV Download section
276
+ with gr.Box(visible=False, elem_id="download-section", elem_classes="model-card") as download_section:
277
+ gr.Markdown("### Download Results")
278
+ csv_download = gr.File(label="CSV Results", elem_id="csv-download")
279
+
280
+ # Hidden state components
281
+ image_state = gr.State([])
282
+ filename_state = gr.State([])
283
+
284
+ # Right panel - Results display
285
+ with gr.Column(scale=2, elem_classes="results-panel"):
286
+ with gr.Box(elem_classes="result-container"):
287
+ # Progress indicator
288
+ with gr.Row(elem_id="progress-container", visible=False) as progress_container:
289
+ with gr.Column():
290
+ gr.HTML('<div class="progress-indicator"><div class="progress-bar" id="progress-bar"></div></div>')
291
+ progress_text = gr.Markdown("Processing...", elem_id="progress-text")
292
+
293
+ # Image display
294
  current_image = gr.Image(
295
+ label="Image Preview",
296
+ height=400,
297
  type="filepath",
298
  show_fullscreen_button=True,
299
  show_download_button=False,
300
+ show_share_button=False,
301
+ elem_classes="current-image"
302
  )
303
 
304
  # Navigation controls
305
  with gr.Row(elem_classes="nav-buttons"):
306
+ prev_button = gr.Button("← Previous", size="sm", variant="secondary")
307
+ image_counter = gr.Markdown("", elem_id="image-counter")
308
+ next_button = gr.Button("Next →", size="sm", variant="secondary")
 
 
 
 
309
 
310
+ # Alt-text results
311
+ gr.Markdown("### Generated Alt-text", elem_id="result-heading")
312
  analysis_text = gr.Textbox(
313
  label="",
314
+ value="Images will appear here after analysis. Please upload and analyze images to see results.",
315
+ lines=6,
316
+ max_lines=10,
317
  interactive=False,
318
+ show_label=False,
319
+ elem_classes="result-text"
320
  )
321
+
322
+ # Hidden states for navigation
323
+ current_index = gr.State(0)
324
+ all_images = gr.State([])
325
+ all_results = gr.State([])
 
 
 
 
 
326
 
327
+ # Footer section
328
+ with gr.Row(elem_classes="footer"):
329
+ gr.Markdown("""
330
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
331
+ Funded by the DCMS Pilot Scheme. For support, contact: chris.addis@nhm.ac.uk
332
+ """)
 
 
 
333
 
334
  # Handle file uploads
335
  def handle_upload(files):
 
338
  for file in files:
339
  file_paths.append(file.name)
340
  file_names.append(get_base_filename(file.name))
341
+
342
+ # Show a message about the number of files uploaded
343
+ upload_message = f"✅ {len(files)} image{'s' if len(files) != 1 else ''} uploaded successfully!"
344
+
345
+ return file_paths, file_paths, file_names, upload_message
346
 
347
  upload_button.upload(
348
  fn=handle_upload,
349
  inputs=[upload_button],
350
+ outputs=[input_gallery, image_state, filename_state, progress_text]
351
  )
352
 
353
+ # Function to analyze images with visual feedback
354
  def analyze_images(image_paths, model_choice, length_choice, filenames):
355
  if not image_paths:
356
+ return [], [], 0, "", "No images uploaded", "", gr.update(visible=False)
357
 
358
  # Get system prompt based on length selection
359
+ if isinstance(length_choice, tuple):
360
+ length_value = length_choice[1] # Extract the value part
361
+ else:
362
+ length_value = length_choice
363
+
364
+ sys_prompt = get_sys_prompt(length_value)
365
 
366
  image_results = []
367
 
 
375
  try:
376
  # Open the image file for analysis
377
  img = Image.open(image_path)
378
+ prompt0 = prompt_new()
379
 
380
+ # Extract model name
381
+ model_name = model_choice
382
+ if isinstance(model_choice, tuple):
383
+ model_name = model_choice[1]
 
384
 
385
+ # Check if this is one of the Gemini models
386
  is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
387
 
388
  if is_gemini_model:
 
397
  temperature=1
398
  )
399
  except Exception as gemini_error:
 
400
  # If gemini client fails, fall back to standard OR client
401
  result = OR.generate_caption(
402
  img,
 
438
  initial_image = image_paths[0]
439
  initial_counter = f"{1} of {len(image_paths)}"
440
  initial_text = image_results[0]["content"]
 
441
  else:
442
  initial_image = ""
443
  initial_text = "No images analyzed"
444
  initial_counter = "0 of 0"
445
+
446
+ # Make the download section visible now that we have results
447
+ download_visible = gr.update(visible=True)
448
 
449
  return (image_paths, image_results, 0, initial_image, initial_counter,
450
+ initial_text, csv_path, download_visible)
451
 
452
  # Function to navigate to previous image
453
  def go_to_prev(current_idx, images, results):
454
  if not images or len(images) == 0:
455
+ return current_idx, "", "0 of 0", ""
456
 
457
  new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
458
  counter_html = f"{new_idx + 1} of {len(images)}"
 
 
459
 
460
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
461
 
462
  # Function to navigate to next image
463
  def go_to_next(current_idx, images, results):
464
  if not images or len(images) == 0:
465
+ return current_idx, "", "0 of 0", ""
466
 
467
  new_idx = (current_idx + 1) % len(images)
468
  counter_html = f"{new_idx + 1} of {len(images)}"
 
 
469
 
470
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
471
 
472
+ # Show progress indicator during analysis
473
  analyze_button.click(
474
+ fn=lambda: (gr.update(visible=True), "Processing images... Please wait"),
475
+ inputs=[],
476
+ outputs=[progress_container, progress_text],
477
+ queue=False
478
+ ).then(
479
  fn=analyze_images,
480
  inputs=[image_state, model_choice, length_choice, filename_state],
481
  outputs=[
482
  all_images, all_results, current_index, current_image, image_counter,
483
+ analysis_text, csv_download, download_section
484
  ]
485
+ ).then(
486
+ fn=lambda: (gr.update(visible=False), "Analysis complete!"),
487
+ inputs=[],
488
+ outputs=[progress_container, progress_text],
489
+ queue=False
490
  )
491
 
492
  # Connect navigation buttons
493
  prev_button.click(
494
  fn=go_to_prev,
495
  inputs=[current_index, all_images, all_results],
496
+ outputs=[current_index, current_image, image_counter, analysis_text]
497
  )
498
 
499
  next_button.click(
500
  fn=go_to_next,
501
  inputs=[current_index, all_images, all_results],
502
+ outputs=[current_index, current_image, image_counter, analysis_text]
503
  )
504
 
505
+ # Additional information accordion
506
  with gr.Accordion("About this Tool", open=False):
507
  gr.Markdown("""
508
+ ## About the AI Alt-text Generator
509
 
510
+ This tool uses advanced AI models to automatically generate alternative text descriptions for images,
511
+ helping museums and cultural institutions make their digital content more accessible for visually impaired users.
512
 
513
+ ### Features:
514
 
515
+ - **Multiple AI Models**: Choose from various AI models including Gemini, GPT-4.1, Claude, and others
516
  - **Customizable Length**: Select short, medium, or long descriptions based on your needs
517
+ - **Batch Processing**: Upload and process multiple images at once
518
+ - **CSV Export**: Download all generated descriptions in a single file
 
 
519
 
520
+ ### How to Use:
 
 
 
 
521
 
522
+ 1. Upload one or more images using the upload button
523
+ 2. Select your preferred AI model and description length
524
+ 3. Click "Analyze Images" to generate descriptions
525
+ 4. Navigate through results with the Previous and Next buttons
526
+ 5. Download all results as a CSV file
527
 
528
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
529
+ Funded by the DCMS Pilot Scheme.
530
 
531
+ For support, feedback, or suggestions, please contact: chris.addis@nhm.ac.uk
532
  """)
533
 
534
  return demo