Chris Addis commited on
Commit ·
3e18454
1
Parent(s): 46d47e4
revert
Browse files- .ipynb_checkpoints/app-checkpoint.py +371 -18
- .ipynb_checkpoints/app2-checkpoint.py +27 -0
- app-Copy1.py +0 -380
- app.py +137 -266
- .ipynb_checkpoints/app-Copy1-checkpoint.py → app2.py +266 -137
.ipynb_checkpoints/app-checkpoint.py
CHANGED
|
@@ -1,27 +1,380 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
fn=process_image,
|
| 21 |
-
inputs=input_image,
|
| 22 |
-
outputs=output_image
|
| 23 |
-
)
|
| 24 |
|
| 25 |
# Launch the app
|
| 26 |
if __name__ == "__main__":
|
| 27 |
-
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import io
|
| 5 |
+
import os
|
| 6 |
+
import requests
|
| 7 |
+
import json
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
import openai
|
| 10 |
+
import base64
|
| 11 |
+
import csv
|
| 12 |
+
import tempfile
|
| 13 |
+
import datetime
|
| 14 |
|
| 15 |
+
# Load environment variables from .env file if it exists (for local development)
|
| 16 |
+
# On Hugging Face Spaces, the secrets are automatically available as environment variables
|
| 17 |
+
if os.path.exists(".env"):
|
| 18 |
+
load_dotenv()
|
| 19 |
|
| 20 |
+
from io import BytesIO
|
| 21 |
+
import numpy as np
|
| 22 |
+
import requests
|
| 23 |
+
from PIL import Image
|
| 24 |
+
|
| 25 |
+
# import libraries
|
| 26 |
+
from library.utils_model import *
|
| 27 |
+
from library.utils_html import *
|
| 28 |
+
from library.utils_prompt import *
|
| 29 |
+
|
| 30 |
+
OR = OpenRouterAPI()
|
| 31 |
+
gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 32 |
+
|
| 33 |
+
# Path for storing user preferences
|
| 34 |
+
PREFERENCES_FILE = "data/user_preferences.csv"
|
| 35 |
+
|
| 36 |
+
# Ensure directory exists
|
| 37 |
+
os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
|
| 38 |
+
|
| 39 |
+
def get_sys_prompt(length="medium"):
|
| 40 |
+
if length == "short":
|
| 41 |
+
dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
|
| 42 |
+
elif length == "medium":
|
| 43 |
+
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
|
| 44 |
+
else:
|
| 45 |
+
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
+
return dev_prompt
|
| 47 |
+
|
| 48 |
+
# This function is no longer needed since we removed A/B testing
|
| 49 |
+
|
| 50 |
+
def create_csv_file_simple(results):
|
| 51 |
+
"""Create a CSV file from the results and return the path"""
|
| 52 |
+
# Create a temporary file
|
| 53 |
+
fd, path = tempfile.mkstemp(suffix='.csv')
|
| 54 |
+
|
| 55 |
+
with os.fdopen(fd, 'w', newline='') as f:
|
| 56 |
+
writer = csv.writer(f)
|
| 57 |
+
# Write header
|
| 58 |
+
writer.writerow(['image_id', 'content'])
|
| 59 |
+
# Write data
|
| 60 |
+
for result in results:
|
| 61 |
+
writer.writerow([
|
| 62 |
+
result.get('image_id', ''),
|
| 63 |
+
result.get('content', '')
|
| 64 |
+
])
|
| 65 |
|
| 66 |
+
return path
|
| 67 |
+
|
| 68 |
+
# Extract original filename without path or extension
|
| 69 |
+
def get_base_filename(filepath):
|
| 70 |
+
if not filepath:
|
| 71 |
+
return ""
|
| 72 |
+
# Get the basename (filename with extension)
|
| 73 |
+
basename = os.path.basename(filepath)
|
| 74 |
+
# Remove extension
|
| 75 |
+
filename = os.path.splitext(basename)[0]
|
| 76 |
+
return filename
|
| 77 |
+
|
| 78 |
+
# Define the Gradio interface
|
| 79 |
+
def create_demo():
|
| 80 |
+
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 81 |
+
# Replace the existing logo code section:
|
| 82 |
+
with gr.Row():
|
| 83 |
+
with gr.Column(scale=3):
|
| 84 |
+
gr.Markdown("# AI Alt-text Generator")
|
| 85 |
+
gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
|
| 86 |
+
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 87 |
+
with gr.Column(scale=1):
|
| 88 |
+
with gr.Row():
|
| 89 |
+
# Use gr.Image with all interactive features disabled
|
| 90 |
+
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 91 |
+
interactive=False, show_download_button=False,
|
| 92 |
+
show_share_button=False, show_fullscreen_button=False,
|
| 93 |
+
container=False)
|
| 94 |
+
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 95 |
+
interactive=False, show_download_button=False,
|
| 96 |
+
show_share_button=False, show_fullscreen_button=False,
|
| 97 |
+
container=False)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
with gr.Row():
|
| 101 |
+
# Left column: Controls and uploads
|
| 102 |
+
with gr.Column(scale=1):
|
| 103 |
+
# Upload interface
|
| 104 |
+
upload_button = gr.UploadButton(
|
| 105 |
+
"Click to Upload Images",
|
| 106 |
+
file_types=["image"],
|
| 107 |
+
file_count="multiple"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# Define choices as a list of tuples: (Display Name, Internal Value)
|
| 111 |
+
model_choices = [
|
| 112 |
+
# Gemini
|
| 113 |
+
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 114 |
+
# GPT-4.1 Series
|
| 115 |
+
("GPT-4.1 Nano", "gpt-4.1-nano"),
|
| 116 |
+
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 117 |
+
("GPT-4.1", "gpt-4.1"),
|
| 118 |
+
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 119 |
+
# Other Models
|
| 120 |
+
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 121 |
+
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 122 |
+
# Experimental Models
|
| 123 |
+
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 124 |
+
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
# Find the internal value of the default choice
|
| 128 |
+
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 129 |
+
|
| 130 |
+
# Add model selection dropdown
|
| 131 |
+
model_choice = gr.Dropdown(
|
| 132 |
+
choices=model_choices,
|
| 133 |
+
label="Select Model",
|
| 134 |
+
value=default_model_internal_value, # Use the internal value for the default
|
| 135 |
+
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 136 |
+
visible=True
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# Add response length selection
|
| 141 |
+
length_choice = gr.Radio(
|
| 142 |
+
choices=["short", "medium", "long"],
|
| 143 |
+
label="Response Length",
|
| 144 |
+
value="medium",
|
| 145 |
+
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Preview gallery for uploaded images
|
| 149 |
+
gr.Markdown("### Uploaded Images")
|
| 150 |
+
input_gallery = gr.Gallery(
|
| 151 |
+
label="",
|
| 152 |
+
columns=3,
|
| 153 |
+
height=150,
|
| 154 |
+
object_fit="contain"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Analysis button
|
| 158 |
+
analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
|
| 159 |
+
|
| 160 |
+
# Hidden state component to store image info
|
| 161 |
+
image_state = gr.State([])
|
| 162 |
+
filename_state = gr.State([])
|
| 163 |
+
|
| 164 |
+
# CSV download component
|
| 165 |
+
csv_download = gr.File(label="CSV Results")
|
| 166 |
+
|
| 167 |
+
# Right column: Display area
|
| 168 |
+
with gr.Column(scale=2):
|
| 169 |
+
with gr.Column(elem_classes="image-container"):
|
| 170 |
+
current_image = gr.Image(
|
| 171 |
+
label="Current Image",
|
| 172 |
+
height=600, # Set the maximum desired height
|
| 173 |
+
type="filepath",
|
| 174 |
+
show_fullscreen_button=True,
|
| 175 |
+
show_download_button=False,
|
| 176 |
+
show_share_button=False
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# Navigation row
|
| 180 |
+
with gr.Row():
|
| 181 |
+
prev_button = gr.Button("← Previous", size="sm")
|
| 182 |
+
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 183 |
+
next_button = gr.Button("Next →", size="sm")
|
| 184 |
+
|
| 185 |
+
# Alt-text heading and output
|
| 186 |
+
gr.Markdown("### Generated Alt-text")
|
| 187 |
+
|
| 188 |
+
# Alt-text
|
| 189 |
+
analysis_text = gr.Textbox(
|
| 190 |
+
label="",
|
| 191 |
+
value="Please analyze images to see results",
|
| 192 |
+
lines=6,
|
| 193 |
+
max_lines=10,
|
| 194 |
+
interactive=False,
|
| 195 |
+
show_label=False
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
# Hidden state for gallery navigation
|
| 199 |
+
current_index = gr.State(0)
|
| 200 |
+
all_images = gr.State([])
|
| 201 |
+
all_results = gr.State([])
|
| 202 |
+
|
| 203 |
+
# Handle file uploads - store files for use during analysis
|
| 204 |
+
def handle_upload(files):
|
| 205 |
+
file_paths = []
|
| 206 |
+
file_names = []
|
| 207 |
+
for file in files:
|
| 208 |
+
file_paths.append(file.name)
|
| 209 |
+
# Extract filename without path or extension for later use
|
| 210 |
+
file_names.append(get_base_filename(file.name))
|
| 211 |
+
return file_paths, file_paths, file_names
|
| 212 |
+
|
| 213 |
+
upload_button.upload(
|
| 214 |
+
fn=handle_upload,
|
| 215 |
+
inputs=[upload_button],
|
| 216 |
+
outputs=[input_gallery, image_state, filename_state]
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
# Function to analyze images
|
| 220 |
+
# Modify the analyze_images function in your code:
|
| 221 |
+
|
| 222 |
+
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 223 |
+
if not image_paths:
|
| 224 |
+
return [], [], 0, "", "No images", "", ""
|
| 225 |
+
|
| 226 |
+
# Get system prompt based on length selection
|
| 227 |
+
sys_prompt = get_sys_prompt(length_choice)
|
| 228 |
+
|
| 229 |
+
image_results = []
|
| 230 |
+
|
| 231 |
+
for i, image_path in enumerate(image_paths):
|
| 232 |
+
# Use original filename as image_id if available
|
| 233 |
+
if i < len(filenames) and filenames[i]:
|
| 234 |
+
image_id = filenames[i]
|
| 235 |
+
else:
|
| 236 |
+
image_id = f"Image {i+1}"
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
# Open the image file for analysis
|
| 240 |
+
img = Image.open(image_path)
|
| 241 |
+
prompt0 = prompt_new() # Using the new prompt function
|
| 242 |
+
|
| 243 |
+
# Extract the actual model name (remove any labels like "(default)")
|
| 244 |
+
if " (" in model_choice:
|
| 245 |
+
model_name = model_choice.split(" (")[0]
|
| 246 |
+
else:
|
| 247 |
+
model_name = model_choice
|
| 248 |
+
|
| 249 |
+
# Check if this is one of the Gemini models that needs special handling
|
| 250 |
+
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 251 |
+
|
| 252 |
+
if is_gemini_model:
|
| 253 |
+
try:
|
| 254 |
+
# First try using the dedicated gemini client
|
| 255 |
+
result = gemini.generate_caption(
|
| 256 |
+
img,
|
| 257 |
+
model=model_name,
|
| 258 |
+
max_image_size=512,
|
| 259 |
+
prompt=prompt0,
|
| 260 |
+
prompt_dev=sys_prompt,
|
| 261 |
+
temperature=1
|
| 262 |
+
)
|
| 263 |
+
except Exception as gemini_error:
|
| 264 |
+
# If gemini client fails, fall back to standard OR client
|
| 265 |
+
result = OR.generate_caption(
|
| 266 |
+
img,
|
| 267 |
+
model=model_name,
|
| 268 |
+
max_image_size=512,
|
| 269 |
+
prompt=prompt0,
|
| 270 |
+
prompt_dev=sys_prompt,
|
| 271 |
+
temperature=1
|
| 272 |
+
)
|
| 273 |
+
else:
|
| 274 |
+
# For all other models, use OR client directly
|
| 275 |
+
result = OR.generate_caption(
|
| 276 |
+
img,
|
| 277 |
+
model=model_name,
|
| 278 |
+
max_image_size=512,
|
| 279 |
+
prompt=prompt0,
|
| 280 |
+
prompt_dev=sys_prompt,
|
| 281 |
+
temperature=1
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
# Add to results
|
| 285 |
+
image_results.append({
|
| 286 |
+
"image_id": image_id,
|
| 287 |
+
"content": result
|
| 288 |
+
})
|
| 289 |
+
|
| 290 |
+
except Exception as e:
|
| 291 |
+
error_message = f"Error: {str(e)}"
|
| 292 |
+
image_results.append({
|
| 293 |
+
"image_id": image_id,
|
| 294 |
+
"content": error_message
|
| 295 |
+
})
|
| 296 |
+
|
| 297 |
+
# Create a CSV file for download
|
| 298 |
+
csv_path = create_csv_file_simple(image_results)
|
| 299 |
+
|
| 300 |
+
# Set up initial display with first image
|
| 301 |
+
if len(image_paths) > 0:
|
| 302 |
+
initial_image = image_paths[0]
|
| 303 |
+
initial_counter = f"{1} of {len(image_paths)}"
|
| 304 |
+
initial_text = image_results[0]["content"]
|
| 305 |
+
else:
|
| 306 |
+
initial_image = ""
|
| 307 |
+
initial_text = "No images analyzed"
|
| 308 |
+
initial_counter = "0 of 0"
|
| 309 |
+
|
| 310 |
+
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 311 |
+
initial_text, csv_path)
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
# Function to navigate to previous image
|
| 315 |
+
def go_to_prev(current_idx, images, results):
|
| 316 |
+
if not images or len(images) == 0:
|
| 317 |
+
return current_idx, "", "0 of 0", ""
|
| 318 |
+
|
| 319 |
+
new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
|
| 320 |
+
counter_html = f"{new_idx + 1} of {len(images)}"
|
| 321 |
+
|
| 322 |
+
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 323 |
+
|
| 324 |
+
# Function to navigate to next image
|
| 325 |
+
def go_to_next(current_idx, images, results):
|
| 326 |
+
if not images or len(images) == 0:
|
| 327 |
+
return current_idx, "", "0 of 0", ""
|
| 328 |
+
|
| 329 |
+
new_idx = (current_idx + 1) % len(images)
|
| 330 |
+
counter_html = f"{new_idx + 1} of {len(images)}"
|
| 331 |
+
|
| 332 |
+
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 333 |
+
|
| 334 |
+
# Connect the analyze button
|
| 335 |
+
analyze_button.click(
|
| 336 |
+
fn=analyze_images,
|
| 337 |
+
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 338 |
+
outputs=[
|
| 339 |
+
all_images, all_results, current_index, current_image, image_counter,
|
| 340 |
+
analysis_text, csv_download
|
| 341 |
+
]
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
# Connect navigation buttons
|
| 345 |
+
prev_button.click(
|
| 346 |
+
fn=go_to_prev,
|
| 347 |
+
inputs=[current_index, all_images, all_results],
|
| 348 |
+
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
next_button.click(
|
| 352 |
+
fn=go_to_next,
|
| 353 |
+
inputs=[current_index, all_images, all_results],
|
| 354 |
+
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 355 |
+
)
|
| 356 |
|
| 357 |
+
# Optional: Add additional information
|
| 358 |
+
with gr.Accordion("About", open=False):
|
| 359 |
+
gr.Markdown("""
|
| 360 |
+
## About this demo
|
| 361 |
+
|
| 362 |
+
This demo generates alternative text for images.
|
| 363 |
+
|
| 364 |
+
- Upload one or more images using the upload button
|
| 365 |
+
- Choose a model and response length for generation
|
| 366 |
+
- Navigate through the images with the Previous and Next buttons
|
| 367 |
+
- Download CSV with all results
|
| 368 |
+
|
| 369 |
+
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 370 |
+
|
| 371 |
+
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 372 |
+
chris.addis@nhm.ac.uk
|
| 373 |
+
""")
|
| 374 |
|
| 375 |
+
return demo
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
# Launch the app
|
| 378 |
if __name__ == "__main__":
|
| 379 |
+
app = create_demo()
|
| 380 |
+
app.launch()
|
.ipynb_checkpoints/app2-checkpoint.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
def process_image(image):
|
| 4 |
+
# Simply return the image as is for display
|
| 5 |
+
return image
|
| 6 |
+
|
| 7 |
+
# Create the Gradio interface
|
| 8 |
+
with gr.Blocks() as demo:
|
| 9 |
+
gr.Markdown("# Image Uploader and Viewer")
|
| 10 |
+
|
| 11 |
+
with gr.Row():
|
| 12 |
+
with gr.Column():
|
| 13 |
+
input_image = gr.Image(type="pil", label="Upload an image")
|
| 14 |
+
upload_button = gr.Button("Display Image")
|
| 15 |
+
|
| 16 |
+
with gr.Column():
|
| 17 |
+
output_image = gr.Image(label="Displayed Image")
|
| 18 |
+
|
| 19 |
+
upload_button.click(
|
| 20 |
+
fn=process_image,
|
| 21 |
+
inputs=input_image,
|
| 22 |
+
outputs=output_image
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Launch the app
|
| 26 |
+
if __name__ == "__main__":
|
| 27 |
+
demo.launch()
|
app-Copy1.py
DELETED
|
@@ -1,380 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import numpy as np
|
| 3 |
-
from PIL import Image
|
| 4 |
-
import io
|
| 5 |
-
import os
|
| 6 |
-
import requests
|
| 7 |
-
import json
|
| 8 |
-
from dotenv import load_dotenv
|
| 9 |
-
import openai
|
| 10 |
-
import base64
|
| 11 |
-
import csv
|
| 12 |
-
import tempfile
|
| 13 |
-
import datetime
|
| 14 |
-
|
| 15 |
-
# Load environment variables from .env file if it exists (for local development)
|
| 16 |
-
# On Hugging Face Spaces, the secrets are automatically available as environment variables
|
| 17 |
-
if os.path.exists(".env"):
|
| 18 |
-
load_dotenv()
|
| 19 |
-
|
| 20 |
-
from io import BytesIO
|
| 21 |
-
import numpy as np
|
| 22 |
-
import requests
|
| 23 |
-
from PIL import Image
|
| 24 |
-
|
| 25 |
-
# import libraries
|
| 26 |
-
from library.utils_model import *
|
| 27 |
-
from library.utils_html import *
|
| 28 |
-
from library.utils_prompt import *
|
| 29 |
-
|
| 30 |
-
OR = OpenRouterAPI()
|
| 31 |
-
gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 32 |
-
|
| 33 |
-
# Path for storing user preferences
|
| 34 |
-
PREFERENCES_FILE = "data/user_preferences.csv"
|
| 35 |
-
|
| 36 |
-
# Ensure directory exists
|
| 37 |
-
os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
|
| 38 |
-
|
| 39 |
-
def get_sys_prompt(length="medium"):
|
| 40 |
-
if length == "short":
|
| 41 |
-
dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
|
| 42 |
-
elif length == "medium":
|
| 43 |
-
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
|
| 44 |
-
else:
|
| 45 |
-
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
-
return dev_prompt
|
| 47 |
-
|
| 48 |
-
# This function is no longer needed since we removed A/B testing
|
| 49 |
-
|
| 50 |
-
def create_csv_file_simple(results):
|
| 51 |
-
"""Create a CSV file from the results and return the path"""
|
| 52 |
-
# Create a temporary file
|
| 53 |
-
fd, path = tempfile.mkstemp(suffix='.csv')
|
| 54 |
-
|
| 55 |
-
with os.fdopen(fd, 'w', newline='') as f:
|
| 56 |
-
writer = csv.writer(f)
|
| 57 |
-
# Write header
|
| 58 |
-
writer.writerow(['image_id', 'content'])
|
| 59 |
-
# Write data
|
| 60 |
-
for result in results:
|
| 61 |
-
writer.writerow([
|
| 62 |
-
result.get('image_id', ''),
|
| 63 |
-
result.get('content', '')
|
| 64 |
-
])
|
| 65 |
-
|
| 66 |
-
return path
|
| 67 |
-
|
| 68 |
-
# Extract original filename without path or extension
|
| 69 |
-
def get_base_filename(filepath):
|
| 70 |
-
if not filepath:
|
| 71 |
-
return ""
|
| 72 |
-
# Get the basename (filename with extension)
|
| 73 |
-
basename = os.path.basename(filepath)
|
| 74 |
-
# Remove extension
|
| 75 |
-
filename = os.path.splitext(basename)[0]
|
| 76 |
-
return filename
|
| 77 |
-
|
| 78 |
-
# Define the Gradio interface
|
| 79 |
-
def create_demo():
|
| 80 |
-
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 81 |
-
# Replace the existing logo code section:
|
| 82 |
-
with gr.Row():
|
| 83 |
-
with gr.Column(scale=3):
|
| 84 |
-
gr.Markdown("# AI Alt-text Generator")
|
| 85 |
-
gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
|
| 86 |
-
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 87 |
-
with gr.Column(scale=1):
|
| 88 |
-
with gr.Row():
|
| 89 |
-
# Use gr.Image with all interactive features disabled
|
| 90 |
-
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 91 |
-
interactive=False, show_download_button=False,
|
| 92 |
-
show_share_button=False, show_fullscreen_button=False,
|
| 93 |
-
container=False)
|
| 94 |
-
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 95 |
-
interactive=False, show_download_button=False,
|
| 96 |
-
show_share_button=False, show_fullscreen_button=False,
|
| 97 |
-
container=False)
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
with gr.Row():
|
| 101 |
-
# Left column: Controls and uploads
|
| 102 |
-
with gr.Column(scale=1):
|
| 103 |
-
# Upload interface
|
| 104 |
-
upload_button = gr.UploadButton(
|
| 105 |
-
"Click to Upload Images",
|
| 106 |
-
file_types=["image"],
|
| 107 |
-
file_count="multiple"
|
| 108 |
-
)
|
| 109 |
-
|
| 110 |
-
# Define choices as a list of tuples: (Display Name, Internal Value)
|
| 111 |
-
model_choices = [
|
| 112 |
-
# Gemini
|
| 113 |
-
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 114 |
-
# GPT-4.1 Series
|
| 115 |
-
("GPT-4.1 Nano", "gpt-4.1-nano"),
|
| 116 |
-
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 117 |
-
("GPT-4.1", "gpt-4.1"),
|
| 118 |
-
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 119 |
-
# Other Models
|
| 120 |
-
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 121 |
-
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 122 |
-
# Experimental Models
|
| 123 |
-
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 124 |
-
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 125 |
-
]
|
| 126 |
-
|
| 127 |
-
# Find the internal value of the default choice
|
| 128 |
-
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 129 |
-
|
| 130 |
-
# Add model selection dropdown
|
| 131 |
-
model_choice = gr.Dropdown(
|
| 132 |
-
choices=model_choices,
|
| 133 |
-
label="Select Model",
|
| 134 |
-
value=default_model_internal_value, # Use the internal value for the default
|
| 135 |
-
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 136 |
-
visible=True
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
# Add response length selection
|
| 141 |
-
length_choice = gr.Radio(
|
| 142 |
-
choices=["short", "medium", "long"],
|
| 143 |
-
label="Response Length",
|
| 144 |
-
value="medium",
|
| 145 |
-
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
# Preview gallery for uploaded images
|
| 149 |
-
gr.Markdown("### Uploaded Images")
|
| 150 |
-
input_gallery = gr.Gallery(
|
| 151 |
-
label="",
|
| 152 |
-
columns=3,
|
| 153 |
-
height=150,
|
| 154 |
-
object_fit="contain"
|
| 155 |
-
)
|
| 156 |
-
|
| 157 |
-
# Analysis button
|
| 158 |
-
analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
|
| 159 |
-
|
| 160 |
-
# Hidden state component to store image info
|
| 161 |
-
image_state = gr.State([])
|
| 162 |
-
filename_state = gr.State([])
|
| 163 |
-
|
| 164 |
-
# CSV download component
|
| 165 |
-
csv_download = gr.File(label="CSV Results")
|
| 166 |
-
|
| 167 |
-
# Right column: Display area
|
| 168 |
-
with gr.Column(scale=2):
|
| 169 |
-
with gr.Column(elem_classes="image-container"):
|
| 170 |
-
current_image = gr.Image(
|
| 171 |
-
label="Current Image",
|
| 172 |
-
height=600, # Set the maximum desired height
|
| 173 |
-
type="filepath",
|
| 174 |
-
show_fullscreen_button=True,
|
| 175 |
-
show_download_button=False,
|
| 176 |
-
show_share_button=False
|
| 177 |
-
)
|
| 178 |
-
|
| 179 |
-
# Navigation row
|
| 180 |
-
with gr.Row():
|
| 181 |
-
prev_button = gr.Button("← Previous", size="sm")
|
| 182 |
-
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 183 |
-
next_button = gr.Button("Next →", size="sm")
|
| 184 |
-
|
| 185 |
-
# Alt-text heading and output
|
| 186 |
-
gr.Markdown("### Generated Alt-text")
|
| 187 |
-
|
| 188 |
-
# Alt-text
|
| 189 |
-
analysis_text = gr.Textbox(
|
| 190 |
-
label="",
|
| 191 |
-
value="Please analyze images to see results",
|
| 192 |
-
lines=6,
|
| 193 |
-
max_lines=10,
|
| 194 |
-
interactive=False,
|
| 195 |
-
show_label=False
|
| 196 |
-
)
|
| 197 |
-
|
| 198 |
-
# Hidden state for gallery navigation
|
| 199 |
-
current_index = gr.State(0)
|
| 200 |
-
all_images = gr.State([])
|
| 201 |
-
all_results = gr.State([])
|
| 202 |
-
|
| 203 |
-
# Handle file uploads - store files for use during analysis
|
| 204 |
-
def handle_upload(files):
|
| 205 |
-
file_paths = []
|
| 206 |
-
file_names = []
|
| 207 |
-
for file in files:
|
| 208 |
-
file_paths.append(file.name)
|
| 209 |
-
# Extract filename without path or extension for later use
|
| 210 |
-
file_names.append(get_base_filename(file.name))
|
| 211 |
-
return file_paths, file_paths, file_names
|
| 212 |
-
|
| 213 |
-
upload_button.upload(
|
| 214 |
-
fn=handle_upload,
|
| 215 |
-
inputs=[upload_button],
|
| 216 |
-
outputs=[input_gallery, image_state, filename_state]
|
| 217 |
-
)
|
| 218 |
-
|
| 219 |
-
# Function to analyze images
|
| 220 |
-
# Modify the analyze_images function in your code:
|
| 221 |
-
|
| 222 |
-
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 223 |
-
if not image_paths:
|
| 224 |
-
return [], [], 0, "", "No images", "", ""
|
| 225 |
-
|
| 226 |
-
# Get system prompt based on length selection
|
| 227 |
-
sys_prompt = get_sys_prompt(length_choice)
|
| 228 |
-
|
| 229 |
-
image_results = []
|
| 230 |
-
|
| 231 |
-
for i, image_path in enumerate(image_paths):
|
| 232 |
-
# Use original filename as image_id if available
|
| 233 |
-
if i < len(filenames) and filenames[i]:
|
| 234 |
-
image_id = filenames[i]
|
| 235 |
-
else:
|
| 236 |
-
image_id = f"Image {i+1}"
|
| 237 |
-
|
| 238 |
-
try:
|
| 239 |
-
# Open the image file for analysis
|
| 240 |
-
img = Image.open(image_path)
|
| 241 |
-
prompt0 = prompt_new() # Using the new prompt function
|
| 242 |
-
|
| 243 |
-
# Extract the actual model name (remove any labels like "(default)")
|
| 244 |
-
if " (" in model_choice:
|
| 245 |
-
model_name = model_choice.split(" (")[0]
|
| 246 |
-
else:
|
| 247 |
-
model_name = model_choice
|
| 248 |
-
|
| 249 |
-
# Check if this is one of the Gemini models that needs special handling
|
| 250 |
-
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 251 |
-
|
| 252 |
-
if is_gemini_model:
|
| 253 |
-
try:
|
| 254 |
-
# First try using the dedicated gemini client
|
| 255 |
-
result = gemini.generate_caption(
|
| 256 |
-
img,
|
| 257 |
-
model=model_name,
|
| 258 |
-
max_image_size=512,
|
| 259 |
-
prompt=prompt0,
|
| 260 |
-
prompt_dev=sys_prompt,
|
| 261 |
-
temperature=1
|
| 262 |
-
)
|
| 263 |
-
except Exception as gemini_error:
|
| 264 |
-
# If gemini client fails, fall back to standard OR client
|
| 265 |
-
result = OR.generate_caption(
|
| 266 |
-
img,
|
| 267 |
-
model=model_name,
|
| 268 |
-
max_image_size=512,
|
| 269 |
-
prompt=prompt0,
|
| 270 |
-
prompt_dev=sys_prompt,
|
| 271 |
-
temperature=1
|
| 272 |
-
)
|
| 273 |
-
else:
|
| 274 |
-
# For all other models, use OR client directly
|
| 275 |
-
result = OR.generate_caption(
|
| 276 |
-
img,
|
| 277 |
-
model=model_name,
|
| 278 |
-
max_image_size=512,
|
| 279 |
-
prompt=prompt0,
|
| 280 |
-
prompt_dev=sys_prompt,
|
| 281 |
-
temperature=1
|
| 282 |
-
)
|
| 283 |
-
|
| 284 |
-
# Add to results
|
| 285 |
-
image_results.append({
|
| 286 |
-
"image_id": image_id,
|
| 287 |
-
"content": result
|
| 288 |
-
})
|
| 289 |
-
|
| 290 |
-
except Exception as e:
|
| 291 |
-
error_message = f"Error: {str(e)}"
|
| 292 |
-
image_results.append({
|
| 293 |
-
"image_id": image_id,
|
| 294 |
-
"content": error_message
|
| 295 |
-
})
|
| 296 |
-
|
| 297 |
-
# Create a CSV file for download
|
| 298 |
-
csv_path = create_csv_file_simple(image_results)
|
| 299 |
-
|
| 300 |
-
# Set up initial display with first image
|
| 301 |
-
if len(image_paths) > 0:
|
| 302 |
-
initial_image = image_paths[0]
|
| 303 |
-
initial_counter = f"{1} of {len(image_paths)}"
|
| 304 |
-
initial_text = image_results[0]["content"]
|
| 305 |
-
else:
|
| 306 |
-
initial_image = ""
|
| 307 |
-
initial_text = "No images analyzed"
|
| 308 |
-
initial_counter = "0 of 0"
|
| 309 |
-
|
| 310 |
-
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 311 |
-
initial_text, csv_path)
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
# Function to navigate to previous image
|
| 315 |
-
def go_to_prev(current_idx, images, results):
|
| 316 |
-
if not images or len(images) == 0:
|
| 317 |
-
return current_idx, "", "0 of 0", ""
|
| 318 |
-
|
| 319 |
-
new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
|
| 320 |
-
counter_html = f"{new_idx + 1} of {len(images)}"
|
| 321 |
-
|
| 322 |
-
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 323 |
-
|
| 324 |
-
# Function to navigate to next image
|
| 325 |
-
def go_to_next(current_idx, images, results):
|
| 326 |
-
if not images or len(images) == 0:
|
| 327 |
-
return current_idx, "", "0 of 0", ""
|
| 328 |
-
|
| 329 |
-
new_idx = (current_idx + 1) % len(images)
|
| 330 |
-
counter_html = f"{new_idx + 1} of {len(images)}"
|
| 331 |
-
|
| 332 |
-
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 333 |
-
|
| 334 |
-
# Connect the analyze button
|
| 335 |
-
analyze_button.click(
|
| 336 |
-
fn=analyze_images,
|
| 337 |
-
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 338 |
-
outputs=[
|
| 339 |
-
all_images, all_results, current_index, current_image, image_counter,
|
| 340 |
-
analysis_text, csv_download
|
| 341 |
-
]
|
| 342 |
-
)
|
| 343 |
-
|
| 344 |
-
# Connect navigation buttons
|
| 345 |
-
prev_button.click(
|
| 346 |
-
fn=go_to_prev,
|
| 347 |
-
inputs=[current_index, all_images, all_results],
|
| 348 |
-
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 349 |
-
)
|
| 350 |
-
|
| 351 |
-
next_button.click(
|
| 352 |
-
fn=go_to_next,
|
| 353 |
-
inputs=[current_index, all_images, all_results],
|
| 354 |
-
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 355 |
-
)
|
| 356 |
-
|
| 357 |
-
# Optional: Add additional information
|
| 358 |
-
with gr.Accordion("About", open=False):
|
| 359 |
-
gr.Markdown("""
|
| 360 |
-
## About this demo
|
| 361 |
-
|
| 362 |
-
This demo generates alternative text for images.
|
| 363 |
-
|
| 364 |
-
- Upload one or more images using the upload button
|
| 365 |
-
- Choose a model and response length for generation
|
| 366 |
-
- Navigate through the images with the Previous and Next buttons
|
| 367 |
-
- Download CSV with all results
|
| 368 |
-
|
| 369 |
-
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 370 |
-
|
| 371 |
-
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 372 |
-
chris.addis@nhm.ac.uk
|
| 373 |
-
""")
|
| 374 |
-
|
| 375 |
-
return demo
|
| 376 |
-
|
| 377 |
-
# Launch the app
|
| 378 |
-
if __name__ == "__main__":
|
| 379 |
-
app = create_demo()
|
| 380 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -45,6 +45,8 @@ def get_sys_prompt(length="medium"):
|
|
| 45 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
return dev_prompt
|
| 47 |
|
|
|
|
|
|
|
| 48 |
def create_csv_file_simple(results):
|
| 49 |
"""Create a CSV file from the results and return the path"""
|
| 50 |
# Create a temporary file
|
|
@@ -73,264 +75,153 @@ def get_base_filename(filepath):
|
|
| 73 |
filename = os.path.splitext(basename)[0]
|
| 74 |
return filename
|
| 75 |
|
| 76 |
-
# Define
|
| 77 |
-
custom_css = """
|
| 78 |
-
.container {
|
| 79 |
-
max-width: 1200px;
|
| 80 |
-
margin: 0 auto;
|
| 81 |
-
}
|
| 82 |
-
.header {
|
| 83 |
-
text-align: center;
|
| 84 |
-
margin-bottom: 20px;
|
| 85 |
-
border-bottom: 2px solid #eee;
|
| 86 |
-
padding-bottom: 15px;
|
| 87 |
-
}
|
| 88 |
-
.model-card {
|
| 89 |
-
border: 1px solid #e0e0e0;
|
| 90 |
-
border-radius: 8px;
|
| 91 |
-
padding: 15px;
|
| 92 |
-
background-color: #f9f9f9;
|
| 93 |
-
margin-bottom: 15px;
|
| 94 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 95 |
-
}
|
| 96 |
-
.upload-box {
|
| 97 |
-
border: 2px dashed #ccc;
|
| 98 |
-
border-radius: 8px;
|
| 99 |
-
padding: 20px;
|
| 100 |
-
text-align: center;
|
| 101 |
-
margin-bottom: 15px;
|
| 102 |
-
background-color: #f7f7f7;
|
| 103 |
-
transition: all 0.3s ease;
|
| 104 |
-
}
|
| 105 |
-
.upload-box:hover {
|
| 106 |
-
border-color: #2196F3;
|
| 107 |
-
background-color: #f0f8ff;
|
| 108 |
-
}
|
| 109 |
-
.gallery-container {
|
| 110 |
-
background-color: #f5f5f5;
|
| 111 |
-
border-radius: 8px;
|
| 112 |
-
padding: 10px;
|
| 113 |
-
margin-bottom: 15px;
|
| 114 |
-
}
|
| 115 |
-
.result-container {
|
| 116 |
-
border: 1px solid #e0e0e0;
|
| 117 |
-
border-radius: 8px;
|
| 118 |
-
padding: 15px;
|
| 119 |
-
margin-top: 20px;
|
| 120 |
-
background-color: white;
|
| 121 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 122 |
-
}
|
| 123 |
-
.nav-buttons {
|
| 124 |
-
display: flex;
|
| 125 |
-
justify-content: space-between;
|
| 126 |
-
align-items: center;
|
| 127 |
-
margin: 10px 0;
|
| 128 |
-
}
|
| 129 |
-
.footer {
|
| 130 |
-
text-align: center;
|
| 131 |
-
margin-top: 30px;
|
| 132 |
-
padding-top: 15px;
|
| 133 |
-
border-top: 1px solid #eee;
|
| 134 |
-
color: #666;
|
| 135 |
-
font-size: 0.9em;
|
| 136 |
-
}
|
| 137 |
-
.logo-container {
|
| 138 |
-
display: flex;
|
| 139 |
-
justify-content: center;
|
| 140 |
-
align-items: center;
|
| 141 |
-
gap: 20px;
|
| 142 |
-
margin-bottom: 10px;
|
| 143 |
-
}
|
| 144 |
-
.length-selector {
|
| 145 |
-
display: flex;
|
| 146 |
-
gap: 10px;
|
| 147 |
-
margin-bottom: 15px;
|
| 148 |
-
}
|
| 149 |
-
.progress-indicator {
|
| 150 |
-
height: 4px;
|
| 151 |
-
background-color: #f0f0f0;
|
| 152 |
-
border-radius: 2px;
|
| 153 |
-
overflow: hidden;
|
| 154 |
-
margin-bottom: 15px;
|
| 155 |
-
}
|
| 156 |
-
.progress-bar {
|
| 157 |
-
height: 100%;
|
| 158 |
-
background-color: #4CAF50;
|
| 159 |
-
width: 0%;
|
| 160 |
-
transition: width 0.3s ease;
|
| 161 |
-
}
|
| 162 |
-
"""
|
| 163 |
-
|
| 164 |
-
# Define the Gradio interface with the new design
|
| 165 |
def create_demo():
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
with gr.Blocks(theme=theme, css=custom_css) as demo:
|
| 170 |
-
# Header section
|
| 171 |
-
with gr.Row(elem_classes="header"):
|
| 172 |
with gr.Column(scale=3):
|
| 173 |
gr.Markdown("# AI Alt-text Generator")
|
| 174 |
-
gr.Markdown("Upload images to generate
|
| 175 |
-
|
| 176 |
-
with gr.Column(scale=1
|
| 177 |
-
gr.
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
-
|
| 185 |
with gr.Row():
|
| 186 |
-
# Left
|
| 187 |
-
with gr.Column(scale=1
|
| 188 |
-
# Upload
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
size="lg"
|
| 195 |
-
)
|
| 196 |
-
gr.Markdown("*Drag and drop or click to upload multiple images*")
|
| 197 |
|
| 198 |
-
#
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
#
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
# Experimental Models
|
| 215 |
-
("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
|
| 216 |
-
("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 217 |
-
]
|
| 218 |
-
|
| 219 |
-
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 220 |
-
|
| 221 |
-
model_choice = gr.Dropdown(
|
| 222 |
-
choices=model_choices,
|
| 223 |
-
label="AI Model",
|
| 224 |
-
value=default_model_internal_value,
|
| 225 |
-
info="Select the AI model for generating descriptions",
|
| 226 |
-
visible=True
|
| 227 |
-
)
|
| 228 |
-
|
| 229 |
-
# Length selector with visual indicators
|
| 230 |
-
gr.Markdown("### Response Length")
|
| 231 |
-
with gr.Row(elem_classes="length-selector"):
|
| 232 |
-
length_choice = gr.Radio(
|
| 233 |
-
choices=["short", "medium", "long"],
|
| 234 |
-
label="Response Length",
|
| 235 |
-
value="medium",
|
| 236 |
-
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 237 |
-
)
|
| 238 |
|
| 239 |
-
#
|
| 240 |
-
|
| 241 |
-
gr.Markdown("### Uploaded Images")
|
| 242 |
-
input_gallery = gr.Gallery(
|
| 243 |
-
label="",
|
| 244 |
-
columns=3,
|
| 245 |
-
height=180,
|
| 246 |
-
object_fit="contain"
|
| 247 |
-
)
|
| 248 |
-
|
| 249 |
-
# Analysis button
|
| 250 |
-
analyze_button = gr.Button("🔍 Analyze Images", variant="primary", size="lg")
|
| 251 |
|
| 252 |
-
#
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
-
#
|
|
|
|
|
|
|
|
|
|
| 258 |
image_state = gr.State([])
|
| 259 |
filename_state = gr.State([])
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
gr.HTML('<div class="progress-indicator"><div class="progress-bar" id="progress-bar"></div></div>')
|
| 268 |
-
progress_text = gr.Markdown("Processing...", elem_id="progress-text")
|
| 269 |
-
|
| 270 |
-
# Image display
|
| 271 |
current_image = gr.Image(
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
elem_classes="current-image"
|
| 279 |
-
)
|
| 280 |
-
|
| 281 |
-
# Navigation controls
|
| 282 |
-
with gr.Row(elem_classes="nav-buttons"):
|
| 283 |
-
prev_button = gr.Button("← Previous", size="sm", variant="secondary")
|
| 284 |
-
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 285 |
-
next_button = gr.Button("Next →", size="sm", variant="secondary")
|
| 286 |
-
|
| 287 |
-
# Alt-text results
|
| 288 |
-
gr.Markdown("### Generated Alt-text", elem_id="result-heading")
|
| 289 |
-
analysis_text = gr.Textbox(
|
| 290 |
-
label="",
|
| 291 |
-
value="Images will appear here after analysis. Please upload and analyze images to see results.",
|
| 292 |
-
lines=6,
|
| 293 |
-
max_lines=10,
|
| 294 |
-
interactive=False,
|
| 295 |
-
show_label=False,
|
| 296 |
-
elem_classes="result-text"
|
| 297 |
)
|
| 298 |
|
| 299 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
current_index = gr.State(0)
|
| 301 |
all_images = gr.State([])
|
| 302 |
all_results = gr.State([])
|
| 303 |
|
| 304 |
-
#
|
| 305 |
-
with gr.Row(elem_classes="footer"):
|
| 306 |
-
gr.Markdown("""
|
| 307 |
-
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 308 |
-
Funded by the DCMS Pilot Scheme. For support, contact: chris.addis@nhm.ac.uk
|
| 309 |
-
""")
|
| 310 |
-
|
| 311 |
-
# Handle file uploads
|
| 312 |
def handle_upload(files):
|
| 313 |
file_paths = []
|
| 314 |
file_names = []
|
| 315 |
for file in files:
|
| 316 |
file_paths.append(file.name)
|
|
|
|
| 317 |
file_names.append(get_base_filename(file.name))
|
| 318 |
-
|
| 319 |
-
# Show a message about the number of files uploaded
|
| 320 |
-
upload_message = f"✅ {len(files)} image{'s' if len(files) != 1 else ''} uploaded successfully!"
|
| 321 |
-
|
| 322 |
-
return file_paths, file_paths, file_names, upload_message
|
| 323 |
|
| 324 |
upload_button.upload(
|
| 325 |
fn=handle_upload,
|
| 326 |
inputs=[upload_button],
|
| 327 |
-
outputs=[input_gallery, image_state, filename_state
|
| 328 |
)
|
| 329 |
|
| 330 |
-
# Function to analyze images
|
|
|
|
|
|
|
| 331 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 332 |
if not image_paths:
|
| 333 |
-
return [], [], 0, "", "No images
|
| 334 |
|
| 335 |
# Get system prompt based on length selection
|
| 336 |
sys_prompt = get_sys_prompt(length_choice)
|
|
@@ -347,12 +238,15 @@ def create_demo():
|
|
| 347 |
try:
|
| 348 |
# Open the image file for analysis
|
| 349 |
img = Image.open(image_path)
|
| 350 |
-
prompt0 = prompt_new()
|
| 351 |
|
| 352 |
-
#
|
| 353 |
-
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
-
# Check if this is one of the Gemini models
|
| 356 |
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 357 |
|
| 358 |
if is_gemini_model:
|
|
@@ -413,11 +307,9 @@ def create_demo():
|
|
| 413 |
initial_text = "No images analyzed"
|
| 414 |
initial_counter = "0 of 0"
|
| 415 |
|
| 416 |
-
# Make the download section visible now that we have results
|
| 417 |
-
download_visible = gr.update(visible=True)
|
| 418 |
-
|
| 419 |
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 420 |
-
initial_text, csv_path
|
|
|
|
| 421 |
|
| 422 |
# Function to navigate to previous image
|
| 423 |
def go_to_prev(current_idx, images, results):
|
|
@@ -439,24 +331,14 @@ def create_demo():
|
|
| 439 |
|
| 440 |
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 441 |
|
| 442 |
-
#
|
| 443 |
analyze_button.click(
|
| 444 |
-
fn=lambda: (gr.update(visible=True), "Processing images... Please wait"),
|
| 445 |
-
inputs=[],
|
| 446 |
-
outputs=[progress_container, progress_text],
|
| 447 |
-
queue=False
|
| 448 |
-
).then(
|
| 449 |
fn=analyze_images,
|
| 450 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 451 |
outputs=[
|
| 452 |
all_images, all_results, current_index, current_image, image_counter,
|
| 453 |
-
analysis_text, csv_download
|
| 454 |
]
|
| 455 |
-
).then(
|
| 456 |
-
fn=lambda: (gr.update(visible=False), "Analysis complete!"),
|
| 457 |
-
inputs=[],
|
| 458 |
-
outputs=[progress_container, progress_text],
|
| 459 |
-
queue=False
|
| 460 |
)
|
| 461 |
|
| 462 |
# Connect navigation buttons
|
|
@@ -472,33 +354,22 @@ def create_demo():
|
|
| 472 |
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 473 |
)
|
| 474 |
|
| 475 |
-
#
|
| 476 |
-
with gr.Accordion("About
|
| 477 |
gr.Markdown("""
|
| 478 |
-
## About
|
| 479 |
-
|
| 480 |
-
This tool uses advanced AI models to automatically generate alternative text descriptions for images,
|
| 481 |
-
helping museums and cultural institutions make their digital content more accessible for visually impaired users.
|
| 482 |
-
|
| 483 |
-
### Features:
|
| 484 |
-
|
| 485 |
-
- **Multiple AI Models**: Choose from various AI models including Gemini, GPT-4.1, Claude, and others
|
| 486 |
-
- **Customizable Length**: Select short, medium, or long descriptions based on your needs
|
| 487 |
-
- **Batch Processing**: Upload and process multiple images at once
|
| 488 |
-
- **CSV Export**: Download all generated descriptions in a single file
|
| 489 |
|
| 490 |
-
|
| 491 |
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
5. Download all results as a CSV file
|
| 497 |
|
| 498 |
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 499 |
-
Funded by the DCMS Pilot Scheme.
|
| 500 |
|
| 501 |
-
|
|
|
|
| 502 |
""")
|
| 503 |
|
| 504 |
return demo
|
|
|
|
| 45 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
return dev_prompt
|
| 47 |
|
| 48 |
+
# This function is no longer needed since we removed A/B testing
|
| 49 |
+
|
| 50 |
def create_csv_file_simple(results):
|
| 51 |
"""Create a CSV file from the results and return the path"""
|
| 52 |
# Create a temporary file
|
|
|
|
| 75 |
filename = os.path.splitext(basename)[0]
|
| 76 |
return filename
|
| 77 |
|
| 78 |
+
# Define the Gradio interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def create_demo():
|
| 80 |
+
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 81 |
+
# Replace the existing logo code section:
|
| 82 |
+
with gr.Row():
|
|
|
|
|
|
|
|
|
|
| 83 |
with gr.Column(scale=3):
|
| 84 |
gr.Markdown("# AI Alt-text Generator")
|
| 85 |
+
gr.Markdown("Upload one or more images to generate alternative text (designed to meet WCAG 2.1 Guidelines)")
|
| 86 |
+
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 87 |
+
with gr.Column(scale=1):
|
| 88 |
+
with gr.Row():
|
| 89 |
+
# Use gr.Image with all interactive features disabled
|
| 90 |
+
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 91 |
+
interactive=False, show_download_button=False,
|
| 92 |
+
show_share_button=False, show_fullscreen_button=False,
|
| 93 |
+
container=False)
|
| 94 |
+
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 95 |
+
interactive=False, show_download_button=False,
|
| 96 |
+
show_share_button=False, show_fullscreen_button=False,
|
| 97 |
+
container=False)
|
| 98 |
|
| 99 |
+
|
| 100 |
with gr.Row():
|
| 101 |
+
# Left column: Controls and uploads
|
| 102 |
+
with gr.Column(scale=1):
|
| 103 |
+
# Upload interface
|
| 104 |
+
upload_button = gr.UploadButton(
|
| 105 |
+
"Click to Upload Images",
|
| 106 |
+
file_types=["image"],
|
| 107 |
+
file_count="multiple"
|
| 108 |
+
)
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
# Define choices as a list of tuples: (Display Name, Internal Value)
|
| 111 |
+
model_choices = [
|
| 112 |
+
# Gemini
|
| 113 |
+
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 114 |
+
# GPT-4.1 Series
|
| 115 |
+
("GPT-4.1 Nano", "gpt-4.1-nano"),
|
| 116 |
+
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 117 |
+
("GPT-4.1", "gpt-4.1"),
|
| 118 |
+
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 119 |
+
# Other Models
|
| 120 |
+
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 121 |
+
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 122 |
+
# Experimental Models
|
| 123 |
+
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 124 |
+
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 125 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
# Find the internal value of the default choice
|
| 128 |
+
default_model_internal_value = "google/gemini-2.0-flash-001"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
# Add model selection dropdown
|
| 131 |
+
model_choice = gr.Dropdown(
|
| 132 |
+
choices=model_choices,
|
| 133 |
+
label="Select Model",
|
| 134 |
+
value=default_model_internal_value, # Use the internal value for the default
|
| 135 |
+
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 136 |
+
visible=True
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# Add response length selection
|
| 141 |
+
length_choice = gr.Radio(
|
| 142 |
+
choices=["short", "medium", "long"],
|
| 143 |
+
label="Response Length",
|
| 144 |
+
value="medium",
|
| 145 |
+
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Preview gallery for uploaded images
|
| 149 |
+
gr.Markdown("### Uploaded Images")
|
| 150 |
+
input_gallery = gr.Gallery(
|
| 151 |
+
label="",
|
| 152 |
+
columns=3,
|
| 153 |
+
height=150,
|
| 154 |
+
object_fit="contain"
|
| 155 |
+
)
|
| 156 |
|
| 157 |
+
# Analysis button
|
| 158 |
+
analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
|
| 159 |
+
|
| 160 |
+
# Hidden state component to store image info
|
| 161 |
image_state = gr.State([])
|
| 162 |
filename_state = gr.State([])
|
| 163 |
|
| 164 |
+
# CSV download component
|
| 165 |
+
csv_download = gr.File(label="CSV Results")
|
| 166 |
+
|
| 167 |
+
# Right column: Display area
|
| 168 |
+
with gr.Column(scale=2):
|
| 169 |
+
with gr.Column(elem_classes="image-container"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
current_image = gr.Image(
|
| 171 |
+
label="Current Image",
|
| 172 |
+
height=600, # Set the maximum desired height
|
| 173 |
+
type="filepath",
|
| 174 |
+
show_fullscreen_button=True,
|
| 175 |
+
show_download_button=False,
|
| 176 |
+
show_share_button=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
)
|
| 178 |
|
| 179 |
+
# Navigation row
|
| 180 |
+
with gr.Row():
|
| 181 |
+
prev_button = gr.Button("← Previous", size="sm")
|
| 182 |
+
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 183 |
+
next_button = gr.Button("Next →", size="sm")
|
| 184 |
+
|
| 185 |
+
# Alt-text heading and output
|
| 186 |
+
gr.Markdown("### Generated Alt-text")
|
| 187 |
+
|
| 188 |
+
# Alt-text
|
| 189 |
+
analysis_text = gr.Textbox(
|
| 190 |
+
label="",
|
| 191 |
+
value="Please analyze images to see results",
|
| 192 |
+
lines=6,
|
| 193 |
+
max_lines=10,
|
| 194 |
+
interactive=False,
|
| 195 |
+
show_label=False
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
# Hidden state for gallery navigation
|
| 199 |
current_index = gr.State(0)
|
| 200 |
all_images = gr.State([])
|
| 201 |
all_results = gr.State([])
|
| 202 |
|
| 203 |
+
# Handle file uploads - store files for use during analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
def handle_upload(files):
|
| 205 |
file_paths = []
|
| 206 |
file_names = []
|
| 207 |
for file in files:
|
| 208 |
file_paths.append(file.name)
|
| 209 |
+
# Extract filename without path or extension for later use
|
| 210 |
file_names.append(get_base_filename(file.name))
|
| 211 |
+
return file_paths, file_paths, file_names
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
upload_button.upload(
|
| 214 |
fn=handle_upload,
|
| 215 |
inputs=[upload_button],
|
| 216 |
+
outputs=[input_gallery, image_state, filename_state]
|
| 217 |
)
|
| 218 |
|
| 219 |
+
# Function to analyze images
|
| 220 |
+
# Modify the analyze_images function in your code:
|
| 221 |
+
|
| 222 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 223 |
if not image_paths:
|
| 224 |
+
return [], [], 0, "", "No images", "", ""
|
| 225 |
|
| 226 |
# Get system prompt based on length selection
|
| 227 |
sys_prompt = get_sys_prompt(length_choice)
|
|
|
|
| 238 |
try:
|
| 239 |
# Open the image file for analysis
|
| 240 |
img = Image.open(image_path)
|
| 241 |
+
prompt0 = prompt_new() # Using the new prompt function
|
| 242 |
|
| 243 |
+
# Extract the actual model name (remove any labels like "(default)")
|
| 244 |
+
if " (" in model_choice:
|
| 245 |
+
model_name = model_choice.split(" (")[0]
|
| 246 |
+
else:
|
| 247 |
+
model_name = model_choice
|
| 248 |
|
| 249 |
+
# Check if this is one of the Gemini models that needs special handling
|
| 250 |
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 251 |
|
| 252 |
if is_gemini_model:
|
|
|
|
| 307 |
initial_text = "No images analyzed"
|
| 308 |
initial_counter = "0 of 0"
|
| 309 |
|
|
|
|
|
|
|
|
|
|
| 310 |
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 311 |
+
initial_text, csv_path)
|
| 312 |
+
|
| 313 |
|
| 314 |
# Function to navigate to previous image
|
| 315 |
def go_to_prev(current_idx, images, results):
|
|
|
|
| 331 |
|
| 332 |
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 333 |
|
| 334 |
+
# Connect the analyze button
|
| 335 |
analyze_button.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
fn=analyze_images,
|
| 337 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 338 |
outputs=[
|
| 339 |
all_images, all_results, current_index, current_image, image_counter,
|
| 340 |
+
analysis_text, csv_download
|
| 341 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
)
|
| 343 |
|
| 344 |
# Connect navigation buttons
|
|
|
|
| 354 |
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 355 |
)
|
| 356 |
|
| 357 |
+
# Optional: Add additional information
|
| 358 |
+
with gr.Accordion("About", open=False):
|
| 359 |
gr.Markdown("""
|
| 360 |
+
## About this demo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
+
This demo generates alternative text for images.
|
| 363 |
|
| 364 |
+
- Upload one or more images using the upload button
|
| 365 |
+
- Choose a model and response length for generation
|
| 366 |
+
- Navigate through the images with the Previous and Next buttons
|
| 367 |
+
- Download CSV with all results
|
|
|
|
| 368 |
|
| 369 |
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
|
|
|
| 370 |
|
| 371 |
+
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 372 |
+
chris.addis@nhm.ac.uk
|
| 373 |
""")
|
| 374 |
|
| 375 |
return demo
|
.ipynb_checkpoints/app-Copy1-checkpoint.py → app2.py
RENAMED
|
@@ -45,8 +45,6 @@ def get_sys_prompt(length="medium"):
|
|
| 45 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
return dev_prompt
|
| 47 |
|
| 48 |
-
# This function is no longer needed since we removed A/B testing
|
| 49 |
-
|
| 50 |
def create_csv_file_simple(results):
|
| 51 |
"""Create a CSV file from the results and return the path"""
|
| 52 |
# Create a temporary file
|
|
@@ -75,153 +73,264 @@ def get_base_filename(filepath):
|
|
| 75 |
filename = os.path.splitext(basename)[0]
|
| 76 |
return filename
|
| 77 |
|
| 78 |
-
# Define
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def create_demo():
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
| 83 |
with gr.Column(scale=3):
|
| 84 |
gr.Markdown("# AI Alt-text Generator")
|
| 85 |
-
gr.Markdown("Upload
|
| 86 |
-
|
| 87 |
-
with gr.Column(scale=1):
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 95 |
-
interactive=False, show_download_button=False,
|
| 96 |
-
show_share_button=False, show_fullscreen_button=False,
|
| 97 |
-
container=False)
|
| 98 |
|
| 99 |
-
|
| 100 |
with gr.Row():
|
| 101 |
-
# Left
|
| 102 |
-
with gr.Column(scale=1):
|
| 103 |
-
# Upload
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
# Gemini
|
| 113 |
-
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 114 |
-
# GPT-4.1 Series
|
| 115 |
-
("GPT-4.1 Nano", "gpt-4.1-nano"),
|
| 116 |
-
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 117 |
-
("GPT-4.1", "gpt-4.1"),
|
| 118 |
-
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 119 |
-
# Other Models
|
| 120 |
-
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 121 |
-
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 122 |
-
# Experimental Models
|
| 123 |
-
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 124 |
-
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 125 |
-
]
|
| 126 |
-
|
| 127 |
-
# Find the internal value of the default choice
|
| 128 |
-
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 129 |
-
|
| 130 |
-
# Add model selection dropdown
|
| 131 |
-
model_choice = gr.Dropdown(
|
| 132 |
-
choices=model_choices,
|
| 133 |
-
label="Select Model",
|
| 134 |
-
value=default_model_internal_value, # Use the internal value for the default
|
| 135 |
-
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 136 |
-
visible=True
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
|
| 140 |
-
#
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
#
|
| 149 |
-
gr.
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
#
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
# Hidden state
|
| 161 |
image_state = gr.State([])
|
| 162 |
filename_state = gr.State([])
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
current_image = gr.Image(
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
)
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
with gr.Row():
|
| 181 |
-
prev_button = gr.Button("← Previous", size="sm")
|
| 182 |
-
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 183 |
-
next_button = gr.Button("Next →", size="sm")
|
| 184 |
-
|
| 185 |
-
# Alt-text heading and output
|
| 186 |
-
gr.Markdown("### Generated Alt-text")
|
| 187 |
-
|
| 188 |
-
# Alt-text
|
| 189 |
-
analysis_text = gr.Textbox(
|
| 190 |
-
label="",
|
| 191 |
-
value="Please analyze images to see results",
|
| 192 |
-
lines=6,
|
| 193 |
-
max_lines=10,
|
| 194 |
-
interactive=False,
|
| 195 |
-
show_label=False
|
| 196 |
-
)
|
| 197 |
-
|
| 198 |
-
# Hidden state for gallery navigation
|
| 199 |
current_index = gr.State(0)
|
| 200 |
all_images = gr.State([])
|
| 201 |
all_results = gr.State([])
|
| 202 |
|
| 203 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
def handle_upload(files):
|
| 205 |
file_paths = []
|
| 206 |
file_names = []
|
| 207 |
for file in files:
|
| 208 |
file_paths.append(file.name)
|
| 209 |
-
# Extract filename without path or extension for later use
|
| 210 |
file_names.append(get_base_filename(file.name))
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
upload_button.upload(
|
| 214 |
fn=handle_upload,
|
| 215 |
inputs=[upload_button],
|
| 216 |
-
outputs=[input_gallery, image_state, filename_state]
|
| 217 |
)
|
| 218 |
|
| 219 |
-
# Function to analyze images
|
| 220 |
-
# Modify the analyze_images function in your code:
|
| 221 |
-
|
| 222 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 223 |
if not image_paths:
|
| 224 |
-
return [], [], 0, "", "No images", "",
|
| 225 |
|
| 226 |
# Get system prompt based on length selection
|
| 227 |
sys_prompt = get_sys_prompt(length_choice)
|
|
@@ -238,15 +347,12 @@ def create_demo():
|
|
| 238 |
try:
|
| 239 |
# Open the image file for analysis
|
| 240 |
img = Image.open(image_path)
|
| 241 |
-
prompt0 = prompt_new()
|
| 242 |
|
| 243 |
-
#
|
| 244 |
-
|
| 245 |
-
model_name = model_choice.split(" (")[0]
|
| 246 |
-
else:
|
| 247 |
-
model_name = model_choice
|
| 248 |
|
| 249 |
-
# Check if this is one of the Gemini models
|
| 250 |
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 251 |
|
| 252 |
if is_gemini_model:
|
|
@@ -307,9 +413,11 @@ def create_demo():
|
|
| 307 |
initial_text = "No images analyzed"
|
| 308 |
initial_counter = "0 of 0"
|
| 309 |
|
|
|
|
|
|
|
|
|
|
| 310 |
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 311 |
-
initial_text, csv_path)
|
| 312 |
-
|
| 313 |
|
| 314 |
# Function to navigate to previous image
|
| 315 |
def go_to_prev(current_idx, images, results):
|
|
@@ -331,14 +439,24 @@ def create_demo():
|
|
| 331 |
|
| 332 |
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 333 |
|
| 334 |
-
#
|
| 335 |
analyze_button.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
fn=analyze_images,
|
| 337 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 338 |
outputs=[
|
| 339 |
all_images, all_results, current_index, current_image, image_counter,
|
| 340 |
-
analysis_text, csv_download
|
| 341 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
)
|
| 343 |
|
| 344 |
# Connect navigation buttons
|
|
@@ -354,22 +472,33 @@ def create_demo():
|
|
| 354 |
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 355 |
)
|
| 356 |
|
| 357 |
-
#
|
| 358 |
-
with gr.Accordion("About", open=False):
|
| 359 |
gr.Markdown("""
|
| 360 |
-
## About
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
-
|
| 363 |
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
|
|
|
| 368 |
|
| 369 |
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
|
|
|
| 370 |
|
| 371 |
-
|
| 372 |
-
chris.addis@nhm.ac.uk
|
| 373 |
""")
|
| 374 |
|
| 375 |
return demo
|
|
|
|
| 45 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
return dev_prompt
|
| 47 |
|
|
|
|
|
|
|
| 48 |
def create_csv_file_simple(results):
|
| 49 |
"""Create a CSV file from the results and return the path"""
|
| 50 |
# Create a temporary file
|
|
|
|
| 73 |
filename = os.path.splitext(basename)[0]
|
| 74 |
return filename
|
| 75 |
|
| 76 |
+
# Define custom CSS for the application
|
| 77 |
+
custom_css = """
|
| 78 |
+
.container {
|
| 79 |
+
max-width: 1200px;
|
| 80 |
+
margin: 0 auto;
|
| 81 |
+
}
|
| 82 |
+
.header {
|
| 83 |
+
text-align: center;
|
| 84 |
+
margin-bottom: 20px;
|
| 85 |
+
border-bottom: 2px solid #eee;
|
| 86 |
+
padding-bottom: 15px;
|
| 87 |
+
}
|
| 88 |
+
.model-card {
|
| 89 |
+
border: 1px solid #e0e0e0;
|
| 90 |
+
border-radius: 8px;
|
| 91 |
+
padding: 15px;
|
| 92 |
+
background-color: #f9f9f9;
|
| 93 |
+
margin-bottom: 15px;
|
| 94 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 95 |
+
}
|
| 96 |
+
.upload-box {
|
| 97 |
+
border: 2px dashed #ccc;
|
| 98 |
+
border-radius: 8px;
|
| 99 |
+
padding: 20px;
|
| 100 |
+
text-align: center;
|
| 101 |
+
margin-bottom: 15px;
|
| 102 |
+
background-color: #f7f7f7;
|
| 103 |
+
transition: all 0.3s ease;
|
| 104 |
+
}
|
| 105 |
+
.upload-box:hover {
|
| 106 |
+
border-color: #2196F3;
|
| 107 |
+
background-color: #f0f8ff;
|
| 108 |
+
}
|
| 109 |
+
.gallery-container {
|
| 110 |
+
background-color: #f5f5f5;
|
| 111 |
+
border-radius: 8px;
|
| 112 |
+
padding: 10px;
|
| 113 |
+
margin-bottom: 15px;
|
| 114 |
+
}
|
| 115 |
+
.result-container {
|
| 116 |
+
border: 1px solid #e0e0e0;
|
| 117 |
+
border-radius: 8px;
|
| 118 |
+
padding: 15px;
|
| 119 |
+
margin-top: 20px;
|
| 120 |
+
background-color: white;
|
| 121 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 122 |
+
}
|
| 123 |
+
.nav-buttons {
|
| 124 |
+
display: flex;
|
| 125 |
+
justify-content: space-between;
|
| 126 |
+
align-items: center;
|
| 127 |
+
margin: 10px 0;
|
| 128 |
+
}
|
| 129 |
+
.footer {
|
| 130 |
+
text-align: center;
|
| 131 |
+
margin-top: 30px;
|
| 132 |
+
padding-top: 15px;
|
| 133 |
+
border-top: 1px solid #eee;
|
| 134 |
+
color: #666;
|
| 135 |
+
font-size: 0.9em;
|
| 136 |
+
}
|
| 137 |
+
.logo-container {
|
| 138 |
+
display: flex;
|
| 139 |
+
justify-content: center;
|
| 140 |
+
align-items: center;
|
| 141 |
+
gap: 20px;
|
| 142 |
+
margin-bottom: 10px;
|
| 143 |
+
}
|
| 144 |
+
.length-selector {
|
| 145 |
+
display: flex;
|
| 146 |
+
gap: 10px;
|
| 147 |
+
margin-bottom: 15px;
|
| 148 |
+
}
|
| 149 |
+
.progress-indicator {
|
| 150 |
+
height: 4px;
|
| 151 |
+
background-color: #f0f0f0;
|
| 152 |
+
border-radius: 2px;
|
| 153 |
+
overflow: hidden;
|
| 154 |
+
margin-bottom: 15px;
|
| 155 |
+
}
|
| 156 |
+
.progress-bar {
|
| 157 |
+
height: 100%;
|
| 158 |
+
background-color: #4CAF50;
|
| 159 |
+
width: 0%;
|
| 160 |
+
transition: width 0.3s ease;
|
| 161 |
+
}
|
| 162 |
+
"""
|
| 163 |
+
|
| 164 |
+
# Define the Gradio interface with the new design
|
| 165 |
def create_demo():
|
| 166 |
+
# Use the Monochrome theme with custom CSS for better compatibility
|
| 167 |
+
theme = gr.themes.Monochrome()
|
| 168 |
+
|
| 169 |
+
with gr.Blocks(theme=theme, css=custom_css) as demo:
|
| 170 |
+
# Header section
|
| 171 |
+
with gr.Row(elem_classes="header"):
|
| 172 |
with gr.Column(scale=3):
|
| 173 |
gr.Markdown("# AI Alt-text Generator")
|
| 174 |
+
gr.Markdown("Upload images to generate accessible alternative text that meets WCAG 2.1 Guidelines")
|
| 175 |
+
|
| 176 |
+
with gr.Column(scale=1, elem_classes="logo-container"):
|
| 177 |
+
gr.Image("images/nhm_logo.png", show_label=False, height=80,
|
| 178 |
+
interactive=False, show_download_button=False,
|
| 179 |
+
show_share_button=False, show_fullscreen_button=False)
|
| 180 |
+
gr.Image("images/nml_logo.png", show_label=False, height=80,
|
| 181 |
+
interactive=False, show_download_button=False,
|
| 182 |
+
show_share_button=False, show_fullscreen_button=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
# Main content area
|
| 185 |
with gr.Row():
|
| 186 |
+
# Left panel - Controls
|
| 187 |
+
with gr.Column(scale=1, elem_classes="control-panel"):
|
| 188 |
+
# Upload area with styling
|
| 189 |
+
with gr.Column(elem_classes="upload-box"):
|
| 190 |
+
upload_button = gr.UploadButton(
|
| 191 |
+
"📷 Upload Images",
|
| 192 |
+
file_types=["image"],
|
| 193 |
+
file_count="multiple",
|
| 194 |
+
size="lg"
|
| 195 |
+
)
|
| 196 |
+
gr.Markdown("*Drag and drop or click to upload multiple images*")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
# Options card
|
| 199 |
+
with gr.Column(elem_classes="model-card"):
|
| 200 |
+
gr.Markdown("### Model Settings")
|
| 201 |
+
|
| 202 |
+
# Model selection dropdown
|
| 203 |
+
model_choices = [
|
| 204 |
+
# Gemini
|
| 205 |
+
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 206 |
+
# GPT-4.1 Series
|
| 207 |
+
("GPT-4.1 Nano", "gpt-4.1-nano"),
|
| 208 |
+
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 209 |
+
("GPT-4.1", "gpt-4.1"),
|
| 210 |
+
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 211 |
+
# Other Models
|
| 212 |
+
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 213 |
+
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 214 |
+
# Experimental Models
|
| 215 |
+
("Gemini 2.5 Pro (Experimental)", "gemini-2.5-pro-exp-03-25"),
|
| 216 |
+
("Gemini 2.0 Flash Thinking (Experimental)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 217 |
+
]
|
| 218 |
+
|
| 219 |
+
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 220 |
+
|
| 221 |
+
model_choice = gr.Dropdown(
|
| 222 |
+
choices=model_choices,
|
| 223 |
+
label="AI Model",
|
| 224 |
+
value=default_model_internal_value,
|
| 225 |
+
info="Select the AI model for generating descriptions",
|
| 226 |
+
visible=True
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Length selector with visual indicators
|
| 230 |
+
gr.Markdown("### Response Length")
|
| 231 |
+
with gr.Row(elem_classes="length-selector"):
|
| 232 |
+
length_choice = gr.Radio(
|
| 233 |
+
choices=["short", "medium", "long"],
|
| 234 |
+
label="Response Length",
|
| 235 |
+
value="medium",
|
| 236 |
+
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 237 |
+
)
|
| 238 |
|
| 239 |
+
# Image preview gallery
|
| 240 |
+
with gr.Column(elem_classes="gallery-container"):
|
| 241 |
+
gr.Markdown("### Uploaded Images")
|
| 242 |
+
input_gallery = gr.Gallery(
|
| 243 |
+
label="",
|
| 244 |
+
columns=3,
|
| 245 |
+
height=180,
|
| 246 |
+
object_fit="contain"
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
# Analysis button
|
| 250 |
+
analyze_button = gr.Button("🔍 Analyze Images", variant="primary", size="lg")
|
| 251 |
|
| 252 |
+
# CSV Download section
|
| 253 |
+
with gr.Column(visible=False, elem_id="download-section", elem_classes="model-card") as download_section:
|
| 254 |
+
gr.Markdown("### Download Results")
|
| 255 |
+
csv_download = gr.File(label="CSV Results", elem_id="csv-download")
|
| 256 |
|
| 257 |
+
# Hidden state components
|
| 258 |
image_state = gr.State([])
|
| 259 |
filename_state = gr.State([])
|
| 260 |
|
| 261 |
+
# Right panel - Results display
|
| 262 |
+
with gr.Column(scale=2, elem_classes="results-panel"):
|
| 263 |
+
with gr.Column(elem_classes="result-container"):
|
| 264 |
+
# Progress indicator
|
| 265 |
+
with gr.Row(elem_id="progress-container", visible=False) as progress_container:
|
| 266 |
+
with gr.Column():
|
| 267 |
+
gr.HTML('<div class="progress-indicator"><div class="progress-bar" id="progress-bar"></div></div>')
|
| 268 |
+
progress_text = gr.Markdown("Processing...", elem_id="progress-text")
|
| 269 |
+
|
| 270 |
+
# Image display
|
| 271 |
current_image = gr.Image(
|
| 272 |
+
label="Image Preview",
|
| 273 |
+
height=400,
|
| 274 |
+
type="filepath",
|
| 275 |
+
show_fullscreen_button=True,
|
| 276 |
+
show_download_button=False,
|
| 277 |
+
show_share_button=False,
|
| 278 |
+
elem_classes="current-image"
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
# Navigation controls
|
| 282 |
+
with gr.Row(elem_classes="nav-buttons"):
|
| 283 |
+
prev_button = gr.Button("← Previous", size="sm", variant="secondary")
|
| 284 |
+
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 285 |
+
next_button = gr.Button("Next →", size="sm", variant="secondary")
|
| 286 |
+
|
| 287 |
+
# Alt-text results
|
| 288 |
+
gr.Markdown("### Generated Alt-text", elem_id="result-heading")
|
| 289 |
+
analysis_text = gr.Textbox(
|
| 290 |
+
label="",
|
| 291 |
+
value="Images will appear here after analysis. Please upload and analyze images to see results.",
|
| 292 |
+
lines=6,
|
| 293 |
+
max_lines=10,
|
| 294 |
+
interactive=False,
|
| 295 |
+
show_label=False,
|
| 296 |
+
elem_classes="result-text"
|
| 297 |
)
|
| 298 |
|
| 299 |
+
# Hidden states for navigation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
current_index = gr.State(0)
|
| 301 |
all_images = gr.State([])
|
| 302 |
all_results = gr.State([])
|
| 303 |
|
| 304 |
+
# Footer section
|
| 305 |
+
with gr.Row(elem_classes="footer"):
|
| 306 |
+
gr.Markdown("""
|
| 307 |
+
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 308 |
+
Funded by the DCMS Pilot Scheme. For support, contact: chris.addis@nhm.ac.uk
|
| 309 |
+
""")
|
| 310 |
+
|
| 311 |
+
# Handle file uploads
|
| 312 |
def handle_upload(files):
|
| 313 |
file_paths = []
|
| 314 |
file_names = []
|
| 315 |
for file in files:
|
| 316 |
file_paths.append(file.name)
|
|
|
|
| 317 |
file_names.append(get_base_filename(file.name))
|
| 318 |
+
|
| 319 |
+
# Show a message about the number of files uploaded
|
| 320 |
+
upload_message = f"✅ {len(files)} image{'s' if len(files) != 1 else ''} uploaded successfully!"
|
| 321 |
+
|
| 322 |
+
return file_paths, file_paths, file_names, upload_message
|
| 323 |
|
| 324 |
upload_button.upload(
|
| 325 |
fn=handle_upload,
|
| 326 |
inputs=[upload_button],
|
| 327 |
+
outputs=[input_gallery, image_state, filename_state, progress_text]
|
| 328 |
)
|
| 329 |
|
| 330 |
+
# Function to analyze images with visual feedback
|
|
|
|
|
|
|
| 331 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 332 |
if not image_paths:
|
| 333 |
+
return [], [], 0, "", "No images uploaded", "", gr.update(visible=False)
|
| 334 |
|
| 335 |
# Get system prompt based on length selection
|
| 336 |
sys_prompt = get_sys_prompt(length_choice)
|
|
|
|
| 347 |
try:
|
| 348 |
# Open the image file for analysis
|
| 349 |
img = Image.open(image_path)
|
| 350 |
+
prompt0 = prompt_new()
|
| 351 |
|
| 352 |
+
# Use model_choice directly since it's the internal value
|
| 353 |
+
model_name = model_choice
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
+
# Check if this is one of the Gemini models
|
| 356 |
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 357 |
|
| 358 |
if is_gemini_model:
|
|
|
|
| 413 |
initial_text = "No images analyzed"
|
| 414 |
initial_counter = "0 of 0"
|
| 415 |
|
| 416 |
+
# Make the download section visible now that we have results
|
| 417 |
+
download_visible = gr.update(visible=True)
|
| 418 |
+
|
| 419 |
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 420 |
+
initial_text, csv_path, download_visible)
|
|
|
|
| 421 |
|
| 422 |
# Function to navigate to previous image
|
| 423 |
def go_to_prev(current_idx, images, results):
|
|
|
|
| 439 |
|
| 440 |
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 441 |
|
| 442 |
+
# Show progress indicator during analysis
|
| 443 |
analyze_button.click(
|
| 444 |
+
fn=lambda: (gr.update(visible=True), "Processing images... Please wait"),
|
| 445 |
+
inputs=[],
|
| 446 |
+
outputs=[progress_container, progress_text],
|
| 447 |
+
queue=False
|
| 448 |
+
).then(
|
| 449 |
fn=analyze_images,
|
| 450 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 451 |
outputs=[
|
| 452 |
all_images, all_results, current_index, current_image, image_counter,
|
| 453 |
+
analysis_text, csv_download, download_section
|
| 454 |
]
|
| 455 |
+
).then(
|
| 456 |
+
fn=lambda: (gr.update(visible=False), "Analysis complete!"),
|
| 457 |
+
inputs=[],
|
| 458 |
+
outputs=[progress_container, progress_text],
|
| 459 |
+
queue=False
|
| 460 |
)
|
| 461 |
|
| 462 |
# Connect navigation buttons
|
|
|
|
| 472 |
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 473 |
)
|
| 474 |
|
| 475 |
+
# Additional information accordion
|
| 476 |
+
with gr.Accordion("About this Tool", open=False):
|
| 477 |
gr.Markdown("""
|
| 478 |
+
## About the AI Alt-text Generator
|
| 479 |
+
|
| 480 |
+
This tool uses advanced AI models to automatically generate alternative text descriptions for images,
|
| 481 |
+
helping museums and cultural institutions make their digital content more accessible for visually impaired users.
|
| 482 |
+
|
| 483 |
+
### Features:
|
| 484 |
+
|
| 485 |
+
- **Multiple AI Models**: Choose from various AI models including Gemini, GPT-4.1, Claude, and others
|
| 486 |
+
- **Customizable Length**: Select short, medium, or long descriptions based on your needs
|
| 487 |
+
- **Batch Processing**: Upload and process multiple images at once
|
| 488 |
+
- **CSV Export**: Download all generated descriptions in a single file
|
| 489 |
|
| 490 |
+
### How to Use:
|
| 491 |
|
| 492 |
+
1. Upload one or more images using the upload button
|
| 493 |
+
2. Select your preferred AI model and description length
|
| 494 |
+
3. Click "Analyze Images" to generate descriptions
|
| 495 |
+
4. Navigate through results with the Previous and Next buttons
|
| 496 |
+
5. Download all results as a CSV file
|
| 497 |
|
| 498 |
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 499 |
+
Funded by the DCMS Pilot Scheme.
|
| 500 |
|
| 501 |
+
For support, feedback, or suggestions, please contact: chris.addis@nhm.ac.uk
|
|
|
|
| 502 |
""")
|
| 503 |
|
| 504 |
return demo
|