Chris Addis commited on
Commit
9793f9c
·
1 Parent(s): 3e18454
Files changed (3) hide show
  1. .ipynb_checkpoints/app2-checkpoint.py +0 -27
  2. app.py +1 -1
  3. app2.py +0 -509
.ipynb_checkpoints/app2-checkpoint.py DELETED
@@ -1,27 +0,0 @@
1
- import gradio as gr
2
-
3
- def process_image(image):
4
- # Simply return the image as is for display
5
- return image
6
-
7
- # Create the Gradio interface
8
- with gr.Blocks() as demo:
9
- gr.Markdown("# Image Uploader and Viewer")
10
-
11
- with gr.Row():
12
- with gr.Column():
13
- input_image = gr.Image(type="pil", label="Upload an image")
14
- upload_button = gr.Button("Display Image")
15
-
16
- with gr.Column():
17
- output_image = gr.Image(label="Displayed Image")
18
-
19
- upload_button.click(
20
- fn=process_image,
21
- inputs=input_image,
22
- outputs=output_image
23
- )
24
-
25
- # Launch the app
26
- if __name__ == "__main__":
27
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -82,7 +82,7 @@ def create_demo():
82
  with gr.Row():
83
  with gr.Column(scale=3):
84
  gr.Markdown("# AI Alt-text Generator")
85
- gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
86
  gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
  with gr.Column(scale=1):
88
  with gr.Row():
 
82
  with gr.Row():
83
  with gr.Column(scale=3):
84
  gr.Markdown("# AI Alt-text Generator")
85
+ gr.Markdown("Upload one or more images to generate accessible alternative text (designed to meet WCAG 2.1 Guidelines)")
86
  gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
87
  with gr.Column(scale=1):
88
  with gr.Row():
app2.py DELETED
@@ -1,509 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- from PIL import Image
4
- import io
5
- import os
6
- import requests
7
- import json
8
- from dotenv import load_dotenv
9
- import openai
10
- import base64
11
- import csv
12
- import tempfile
13
- import datetime
14
-
15
- # Load environment variables from .env file if it exists (for local development)
16
- # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
- if os.path.exists(".env"):
18
- load_dotenv()
19
-
20
- from io import BytesIO
21
- import numpy as np
22
- import requests
23
- from PIL import Image
24
-
25
- # import libraries
26
- from library.utils_model import *
27
- from library.utils_html import *
28
- from library.utils_prompt import *
29
-
30
- OR = OpenRouterAPI()
31
- gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
32
-
33
- # Path for storing user preferences
34
- PREFERENCES_FILE = "data/user_preferences.csv"
35
-
36
- # Ensure directory exists
37
- os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
-
39
- def get_sys_prompt(length="medium"):
40
- if length == "short":
41
- dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
42
- elif length == "medium":
43
- dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
44
- else:
45
- dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
- return dev_prompt
47
-
48
- def create_csv_file_simple(results):
49
- """Create a CSV file from the results and return the path"""
50
- # Create a temporary file
51
- fd, path = tempfile.mkstemp(suffix='.csv')
52
-
53
- with os.fdopen(fd, 'w', newline='') as f:
54
- writer = csv.writer(f)
55
- # Write header
56
- writer.writerow(['image_id', 'content'])
57
- # Write data
58
- for result in results:
59
- writer.writerow([
60
- result.get('image_id', ''),
61
- result.get('content', '')
62
- ])
63
-
64
- return path
65
-
66
- # Extract original filename without path or extension
67
- def get_base_filename(filepath):
68
- if not filepath:
69
- return ""
70
- # Get the basename (filename with extension)
71
- basename = os.path.basename(filepath)
72
- # Remove extension
73
- filename = os.path.splitext(basename)[0]
74
- return filename
75
-
76
- # Define custom CSS for the application
77
- custom_css = """
78
- .container {
79
- max-width: 1200px;
80
- margin: 0 auto;
81
- }
82
- .header {
83
- text-align: center;
84
- margin-bottom: 20px;
85
- border-bottom: 2px solid #eee;
86
- padding-bottom: 15px;
87
- }
88
- .model-card {
89
- border: 1px solid #e0e0e0;
90
- border-radius: 8px;
91
- padding: 15px;
92
- background-color: #f9f9f9;
93
- margin-bottom: 15px;
94
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
95
- }
96
- .upload-box {
97
- border: 2px dashed #ccc;
98
- border-radius: 8px;
99
- padding: 20px;
100
- text-align: center;
101
- margin-bottom: 15px;
102
- background-color: #f7f7f7;
103
- transition: all 0.3s ease;
104
- }
105
- .upload-box:hover {
106
- border-color: #2196F3;
107
- background-color: #f0f8ff;
108
- }
109
- .gallery-container {
110
- background-color: #f5f5f5;
111
- border-radius: 8px;
112
- padding: 10px;
113
- margin-bottom: 15px;
114
- }
115
- .result-container {
116
- border: 1px solid #e0e0e0;
117
- border-radius: 8px;
118
- padding: 15px;
119
- margin-top: 20px;
120
- background-color: white;
121
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
122
- }
123
- .nav-buttons {
124
- display: flex;
125
- justify-content: space-between;
126
- align-items: center;
127
- margin: 10px 0;
128
- }
129
- .footer {
130
- text-align: center;
131
- margin-top: 30px;
132
- padding-top: 15px;
133
- border-top: 1px solid #eee;
134
- color: #666;
135
- font-size: 0.9em;
136
- }
137
- .logo-container {
138
- display: flex;
139
- justify-content: center;
140
- align-items: center;
141
- gap: 20px;
142
- margin-bottom: 10px;
143
- }
144
- .length-selector {
145
- display: flex;
146
- gap: 10px;
147
- margin-bottom: 15px;
148
- }
149
- .progress-indicator {
150
- height: 4px;
151
- background-color: #f0f0f0;
152
- border-radius: 2px;
153
- overflow: hidden;
154
- margin-bottom: 15px;
155
- }
156
- .progress-bar {
157
- height: 100%;
158
- background-color: #4CAF50;
159
- width: 0%;
160
- transition: width 0.3s ease;
161
- }
162
- """
163
-
164
- # Define the Gradio interface with the new design
165
- def create_demo():
166
- # Use the Monochrome theme with custom CSS for better compatibility
167
- theme = gr.themes.Monochrome()
168
-
169
- with gr.Blocks(theme=theme, css=custom_css) as demo:
170
- # Header section
171
- with gr.Row(elem_classes="header"):
172
- with gr.Column(scale=3):
173
- gr.Markdown("# AI Alt-text Generator")
174
- gr.Markdown("Upload images to generate accessible alternative text that meets WCAG 2.1 Guidelines")
175
-
176
- with gr.Column(scale=1, elem_classes="logo-container"):
177
- gr.Image("images/nhm_logo.png", show_label=False, height=80,
178
- interactive=False, show_download_button=False,
179
- show_share_button=False, show_fullscreen_button=False)
180
- gr.Image("images/nml_logo.png", show_label=False, height=80,
181
- interactive=False, show_download_button=False,
182
- show_share_button=False, show_fullscreen_button=False)
183
-
184
- # Main content area
185
- with gr.Row():
186
- # Left panel - Controls
187
- with gr.Column(scale=1, elem_classes="control-panel"):
188
- # Upload area with styling
189
- with gr.Column(elem_classes="upload-box"):
190
- upload_button = gr.UploadButton(
191
- "📷 Upload Images",
192
- file_types=["image"],
193
- file_count="multiple",
194
- size="lg"
195
- )
196
- gr.Markdown("*Drag and drop or click to upload multiple images*")
197
-
198
- # Options card
199
- with gr.Column(elem_classes="model-card"):
200
- gr.Markdown("### Model Settings")
201
-
202
- # Model selection dropdown
203
- model_choices = [
204
- # Gemini
205
- ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
206
- # GPT-4.1 Series
207
- ("GPT-4.1 Nano", "gpt-4.1-nano"),
208
- ("GPT-4.1 Mini", "gpt-4.1-mini"),
209
- ("GPT-4.1", "gpt-4.1"),
210
- ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
211
- # Other Models
212
- ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
213
- ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
214
- # Experimental Models
215
- ("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
216
- ("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
217
- ]
218
-
219
- default_model_internal_value = "google/gemini-2.0-flash-001"
220
-
221
- model_choice = gr.Dropdown(
222
- choices=model_choices,
223
- label="AI Model",
224
- value=default_model_internal_value,
225
- info="Select the AI model for generating descriptions",
226
- visible=True
227
- )
228
-
229
- # Length selector with visual indicators
230
- gr.Markdown("### Response Length")
231
- with gr.Row(elem_classes="length-selector"):
232
- length_choice = gr.Radio(
233
- choices=["short", "medium", "long"],
234
- label="Response Length",
235
- value="medium",
236
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
237
- )
238
-
239
- # Image preview gallery
240
- with gr.Column(elem_classes="gallery-container"):
241
- gr.Markdown("### Uploaded Images")
242
- input_gallery = gr.Gallery(
243
- label="",
244
- columns=3,
245
- height=180,
246
- object_fit="contain"
247
- )
248
-
249
- # Analysis button
250
- analyze_button = gr.Button("🔍 Analyze Images", variant="primary", size="lg")
251
-
252
- # CSV Download section
253
- with gr.Column(visible=False, elem_id="download-section", elem_classes="model-card") as download_section:
254
- gr.Markdown("### Download Results")
255
- csv_download = gr.File(label="CSV Results", elem_id="csv-download")
256
-
257
- # Hidden state components
258
- image_state = gr.State([])
259
- filename_state = gr.State([])
260
-
261
- # Right panel - Results display
262
- with gr.Column(scale=2, elem_classes="results-panel"):
263
- with gr.Column(elem_classes="result-container"):
264
- # Progress indicator
265
- with gr.Row(elem_id="progress-container", visible=False) as progress_container:
266
- with gr.Column():
267
- gr.HTML('<div class="progress-indicator"><div class="progress-bar" id="progress-bar"></div></div>')
268
- progress_text = gr.Markdown("Processing...", elem_id="progress-text")
269
-
270
- # Image display
271
- current_image = gr.Image(
272
- label="Image Preview",
273
- height=400,
274
- type="filepath",
275
- show_fullscreen_button=True,
276
- show_download_button=False,
277
- show_share_button=False,
278
- elem_classes="current-image"
279
- )
280
-
281
- # Navigation controls
282
- with gr.Row(elem_classes="nav-buttons"):
283
- prev_button = gr.Button("← Previous", size="sm", variant="secondary")
284
- image_counter = gr.Markdown("", elem_id="image-counter")
285
- next_button = gr.Button("Next →", size="sm", variant="secondary")
286
-
287
- # Alt-text results
288
- gr.Markdown("### Generated Alt-text", elem_id="result-heading")
289
- analysis_text = gr.Textbox(
290
- label="",
291
- value="Images will appear here after analysis. Please upload and analyze images to see results.",
292
- lines=6,
293
- max_lines=10,
294
- interactive=False,
295
- show_label=False,
296
- elem_classes="result-text"
297
- )
298
-
299
- # Hidden states for navigation
300
- current_index = gr.State(0)
301
- all_images = gr.State([])
302
- all_results = gr.State([])
303
-
304
- # Footer section
305
- with gr.Row(elem_classes="footer"):
306
- gr.Markdown("""
307
- Developed by the Natural History Museum in Partnership with National Museums Liverpool.
308
- Funded by the DCMS Pilot Scheme. For support, contact: chris.addis@nhm.ac.uk
309
- """)
310
-
311
- # Handle file uploads
312
- def handle_upload(files):
313
- file_paths = []
314
- file_names = []
315
- for file in files:
316
- file_paths.append(file.name)
317
- file_names.append(get_base_filename(file.name))
318
-
319
- # Show a message about the number of files uploaded
320
- upload_message = f"✅ {len(files)} image{'s' if len(files) != 1 else ''} uploaded successfully!"
321
-
322
- return file_paths, file_paths, file_names, upload_message
323
-
324
- upload_button.upload(
325
- fn=handle_upload,
326
- inputs=[upload_button],
327
- outputs=[input_gallery, image_state, filename_state, progress_text]
328
- )
329
-
330
- # Function to analyze images with visual feedback
331
- def analyze_images(image_paths, model_choice, length_choice, filenames):
332
- if not image_paths:
333
- return [], [], 0, "", "No images uploaded", "", gr.update(visible=False)
334
-
335
- # Get system prompt based on length selection
336
- sys_prompt = get_sys_prompt(length_choice)
337
-
338
- image_results = []
339
-
340
- for i, image_path in enumerate(image_paths):
341
- # Use original filename as image_id if available
342
- if i < len(filenames) and filenames[i]:
343
- image_id = filenames[i]
344
- else:
345
- image_id = f"Image {i+1}"
346
-
347
- try:
348
- # Open the image file for analysis
349
- img = Image.open(image_path)
350
- prompt0 = prompt_new()
351
-
352
- # Use model_choice directly since it's the internal value
353
- model_name = model_choice
354
-
355
- # Check if this is one of the Gemini models
356
- is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
357
-
358
- if is_gemini_model:
359
- try:
360
- # First try using the dedicated gemini client
361
- result = gemini.generate_caption(
362
- img,
363
- model=model_name,
364
- max_image_size=512,
365
- prompt=prompt0,
366
- prompt_dev=sys_prompt,
367
- temperature=1
368
- )
369
- except Exception as gemini_error:
370
- # If gemini client fails, fall back to standard OR client
371
- result = OR.generate_caption(
372
- img,
373
- model=model_name,
374
- max_image_size=512,
375
- prompt=prompt0,
376
- prompt_dev=sys_prompt,
377
- temperature=1
378
- )
379
- else:
380
- # For all other models, use OR client directly
381
- result = OR.generate_caption(
382
- img,
383
- model=model_name,
384
- max_image_size=512,
385
- prompt=prompt0,
386
- prompt_dev=sys_prompt,
387
- temperature=1
388
- )
389
-
390
- # Add to results
391
- image_results.append({
392
- "image_id": image_id,
393
- "content": result
394
- })
395
-
396
- except Exception as e:
397
- error_message = f"Error: {str(e)}"
398
- image_results.append({
399
- "image_id": image_id,
400
- "content": error_message
401
- })
402
-
403
- # Create a CSV file for download
404
- csv_path = create_csv_file_simple(image_results)
405
-
406
- # Set up initial display with first image
407
- if len(image_paths) > 0:
408
- initial_image = image_paths[0]
409
- initial_counter = f"{1} of {len(image_paths)}"
410
- initial_text = image_results[0]["content"]
411
- else:
412
- initial_image = ""
413
- initial_text = "No images analyzed"
414
- initial_counter = "0 of 0"
415
-
416
- # Make the download section visible now that we have results
417
- download_visible = gr.update(visible=True)
418
-
419
- return (image_paths, image_results, 0, initial_image, initial_counter,
420
- initial_text, csv_path, download_visible)
421
-
422
- # Function to navigate to previous image
423
- def go_to_prev(current_idx, images, results):
424
- if not images or len(images) == 0:
425
- return current_idx, "", "0 of 0", ""
426
-
427
- new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
428
- counter_html = f"{new_idx + 1} of {len(images)}"
429
-
430
- return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
431
-
432
- # Function to navigate to next image
433
- def go_to_next(current_idx, images, results):
434
- if not images or len(images) == 0:
435
- return current_idx, "", "0 of 0", ""
436
-
437
- new_idx = (current_idx + 1) % len(images)
438
- counter_html = f"{new_idx + 1} of {len(images)}"
439
-
440
- return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
441
-
442
- # Show progress indicator during analysis
443
- analyze_button.click(
444
- fn=lambda: (gr.update(visible=True), "Processing images... Please wait"),
445
- inputs=[],
446
- outputs=[progress_container, progress_text],
447
- queue=False
448
- ).then(
449
- fn=analyze_images,
450
- inputs=[image_state, model_choice, length_choice, filename_state],
451
- outputs=[
452
- all_images, all_results, current_index, current_image, image_counter,
453
- analysis_text, csv_download, download_section
454
- ]
455
- ).then(
456
- fn=lambda: (gr.update(visible=False), "Analysis complete!"),
457
- inputs=[],
458
- outputs=[progress_container, progress_text],
459
- queue=False
460
- )
461
-
462
- # Connect navigation buttons
463
- prev_button.click(
464
- fn=go_to_prev,
465
- inputs=[current_index, all_images, all_results],
466
- outputs=[current_index, current_image, image_counter, analysis_text]
467
- )
468
-
469
- next_button.click(
470
- fn=go_to_next,
471
- inputs=[current_index, all_images, all_results],
472
- outputs=[current_index, current_image, image_counter, analysis_text]
473
- )
474
-
475
- # Additional information accordion
476
- with gr.Accordion("About this Tool", open=False):
477
- gr.Markdown("""
478
- ## About the AI Alt-text Generator
479
-
480
- This tool uses advanced AI models to automatically generate alternative text descriptions for images,
481
- helping museums and cultural institutions make their digital content more accessible for visually impaired users.
482
-
483
- ### Features:
484
-
485
- - **Multiple AI Models**: Choose from various AI models including Gemini, GPT-4.1, Claude, and others
486
- - **Customizable Length**: Select short, medium, or long descriptions based on your needs
487
- - **Batch Processing**: Upload and process multiple images at once
488
- - **CSV Export**: Download all generated descriptions in a single file
489
-
490
- ### How to Use:
491
-
492
- 1. Upload one or more images using the upload button
493
- 2. Select your preferred AI model and description length
494
- 3. Click "Analyze Images" to generate descriptions
495
- 4. Navigate through results with the Previous and Next buttons
496
- 5. Download all results as a CSV file
497
-
498
- Developed by the Natural History Museum in Partnership with National Museums Liverpool.
499
- Funded by the DCMS Pilot Scheme.
500
-
501
- For support, feedback, or suggestions, please contact: chris.addis@nhm.ac.uk
502
- """)
503
-
504
- return demo
505
-
506
- # Launch the app
507
- if __name__ == "__main__":
508
- app = create_demo()
509
- app.launch()