Chris Addis commited on
Commit ·
997297a
1
Parent(s): 5005504
add photograph
Browse files- app-Copy1.py +0 -387
- .ipynb_checkpoints/app-Copy1-checkpoint.py → app-Copy2.py +151 -239
- app.py +43 -12
- library/utils_prompt.py +1 -1
app-Copy1.py
DELETED
|
@@ -1,387 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import numpy as np
|
| 3 |
-
from PIL import Image
|
| 4 |
-
import io
|
| 5 |
-
import os
|
| 6 |
-
import requests
|
| 7 |
-
import json
|
| 8 |
-
from dotenv import load_dotenv
|
| 9 |
-
import openai
|
| 10 |
-
import base64
|
| 11 |
-
import csv
|
| 12 |
-
import tempfile
|
| 13 |
-
import datetime
|
| 14 |
-
|
| 15 |
-
# Load environment variables from .env file if it exists (for local development)
|
| 16 |
-
# On Hugging Face Spaces, the secrets are automatically available as environment variables
|
| 17 |
-
if os.path.exists(".env"):
|
| 18 |
-
load_dotenv()
|
| 19 |
-
|
| 20 |
-
from io import BytesIO
|
| 21 |
-
import numpy as np
|
| 22 |
-
import requests
|
| 23 |
-
from PIL import Image
|
| 24 |
-
|
| 25 |
-
# import libraries
|
| 26 |
-
from library.utils_model import *
|
| 27 |
-
from library.utils_html import *
|
| 28 |
-
from library.utils_prompt import *
|
| 29 |
-
|
| 30 |
-
OR = OpenRouterAPI()
|
| 31 |
-
gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 32 |
-
|
| 33 |
-
# Path for storing user preferences
|
| 34 |
-
PREFERENCES_FILE = "data/user_preferences.csv"
|
| 35 |
-
|
| 36 |
-
# Ensure directory exists
|
| 37 |
-
os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
|
| 38 |
-
|
| 39 |
-
def get_sys_prompt(length="medium"):
|
| 40 |
-
if length == "short":
|
| 41 |
-
dev_prompt = """You are a museum curator tasked with generating alt-text (as defined by W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
|
| 42 |
-
elif length == "medium":
|
| 43 |
-
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
|
| 44 |
-
else:
|
| 45 |
-
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
-
return dev_prompt
|
| 47 |
-
|
| 48 |
-
def create_csv_file_simple(results):
|
| 49 |
-
"""Create a CSV file from the results and return the path"""
|
| 50 |
-
# Create a temporary file
|
| 51 |
-
fd, path = tempfile.mkstemp(suffix='.csv')
|
| 52 |
-
|
| 53 |
-
with os.fdopen(fd, 'w', newline='') as f:
|
| 54 |
-
writer = csv.writer(f)
|
| 55 |
-
# Write header
|
| 56 |
-
writer.writerow(['image_id', 'content'])
|
| 57 |
-
# Write data
|
| 58 |
-
for result in results:
|
| 59 |
-
writer.writerow([
|
| 60 |
-
result.get('image_id', ''),
|
| 61 |
-
result.get('content', '')
|
| 62 |
-
])
|
| 63 |
-
|
| 64 |
-
return path
|
| 65 |
-
|
| 66 |
-
# Extract original filename without path or extension
|
| 67 |
-
def get_base_filename(filepath):
|
| 68 |
-
if not filepath:
|
| 69 |
-
return ""
|
| 70 |
-
# Get the basename (filename with extension)
|
| 71 |
-
basename = os.path.basename(filepath)
|
| 72 |
-
# Remove extension
|
| 73 |
-
filename = os.path.splitext(basename)[0]
|
| 74 |
-
return filename
|
| 75 |
-
|
| 76 |
-
custom_css = """
|
| 77 |
-
.image-container img {
|
| 78 |
-
object-fit: contain;
|
| 79 |
-
width: 100%;
|
| 80 |
-
height: 100%;
|
| 81 |
-
}
|
| 82 |
-
"""
|
| 83 |
-
|
| 84 |
-
# Define the Gradio interface
|
| 85 |
-
def create_demo():
|
| 86 |
-
with gr.Blocks(theme=gr.themes.Monochrome(),css=custom_css) as demo:
|
| 87 |
-
# Replace the existing logo code section:
|
| 88 |
-
with gr.Row():
|
| 89 |
-
with gr.Column(scale=3):
|
| 90 |
-
gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
|
| 91 |
-
gr.Markdown("Upload one or more images to generate accessible alternative text (designed to meet WCAG Guidelines)")
|
| 92 |
-
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 93 |
-
with gr.Column(scale=1):
|
| 94 |
-
with gr.Row():
|
| 95 |
-
# Use gr.Image with all interactive features disabled
|
| 96 |
-
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 97 |
-
interactive=False, show_download_button=False,
|
| 98 |
-
show_share_button=False, show_fullscreen_button=False,
|
| 99 |
-
container=False)
|
| 100 |
-
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 101 |
-
interactive=False, show_download_button=False,
|
| 102 |
-
show_share_button=False, show_fullscreen_button=False,
|
| 103 |
-
container=False)
|
| 104 |
-
|
| 105 |
-
with gr.Row():
|
| 106 |
-
# Left column: Controls and uploads
|
| 107 |
-
with gr.Column(scale=1):
|
| 108 |
-
# Upload interface
|
| 109 |
-
upload_button = gr.UploadButton(
|
| 110 |
-
"Click to Upload Images",
|
| 111 |
-
file_types=["image"],
|
| 112 |
-
file_count="multiple"
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
# Define choices as a list of tuples: (Display Name, Internal Value)
|
| 116 |
-
model_choices = [
|
| 117 |
-
# Gemini
|
| 118 |
-
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 119 |
-
# GPT-4.1 Series
|
| 120 |
-
("GPT-4.1 Nano", "gpt-4.1-nano"),
|
| 121 |
-
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 122 |
-
("GPT-4.1", "gpt-4.1"),
|
| 123 |
-
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 124 |
-
# Other Models
|
| 125 |
-
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 126 |
-
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 127 |
-
# Experimental Models
|
| 128 |
-
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 129 |
-
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 130 |
-
]
|
| 131 |
-
|
| 132 |
-
# Find the internal value of the default choice
|
| 133 |
-
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 134 |
-
|
| 135 |
-
# Add model selection dropdown
|
| 136 |
-
model_choice = gr.Dropdown(
|
| 137 |
-
choices=model_choices,
|
| 138 |
-
label="Select Model",
|
| 139 |
-
value=default_model_internal_value, # Use the internal value for the default
|
| 140 |
-
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 141 |
-
visible=True
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
# Add response length selection
|
| 146 |
-
length_choice = gr.Radio(
|
| 147 |
-
choices=["short", "medium", "long"],
|
| 148 |
-
label="Response Length",
|
| 149 |
-
value="medium",
|
| 150 |
-
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
# Preview gallery for uploaded images
|
| 154 |
-
gr.Markdown("### Uploaded Images")
|
| 155 |
-
input_gallery = gr.Gallery(
|
| 156 |
-
label="",
|
| 157 |
-
columns=3,
|
| 158 |
-
height=150,
|
| 159 |
-
object_fit="contain"
|
| 160 |
-
)
|
| 161 |
-
|
| 162 |
-
# Analysis button
|
| 163 |
-
analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
|
| 164 |
-
|
| 165 |
-
# Hidden state component to store image info
|
| 166 |
-
image_state = gr.State([])
|
| 167 |
-
filename_state = gr.State([])
|
| 168 |
-
|
| 169 |
-
# CSV download component
|
| 170 |
-
csv_download = gr.File(label="CSV Results")
|
| 171 |
-
|
| 172 |
-
# Right column: Display area
|
| 173 |
-
with gr.Column(scale=2):
|
| 174 |
-
with gr.Column(elem_classes="image-container"):
|
| 175 |
-
current_image = gr.Image(
|
| 176 |
-
label="Current Image",
|
| 177 |
-
height=600, # Set the maximum desired height
|
| 178 |
-
width=1000,
|
| 179 |
-
type="filepath",
|
| 180 |
-
show_fullscreen_button=True,
|
| 181 |
-
show_download_button=False,
|
| 182 |
-
show_share_button=False,
|
| 183 |
-
elem_classes="image-container"
|
| 184 |
-
)
|
| 185 |
-
|
| 186 |
-
# Navigation row
|
| 187 |
-
with gr.Row():
|
| 188 |
-
prev_button = gr.Button("← Previous", size="sm")
|
| 189 |
-
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 190 |
-
next_button = gr.Button("Next →", size="sm")
|
| 191 |
-
|
| 192 |
-
# Alt-text heading and output
|
| 193 |
-
gr.Markdown("### Generated Alt-text")
|
| 194 |
-
|
| 195 |
-
# Alt-text
|
| 196 |
-
analysis_text = gr.Textbox(
|
| 197 |
-
label="",
|
| 198 |
-
value="Upload images and select model to generate alt-text!",
|
| 199 |
-
lines=6,
|
| 200 |
-
max_lines=10,
|
| 201 |
-
interactive=False,
|
| 202 |
-
show_label=False
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
# Hidden state for gallery navigation
|
| 206 |
-
current_index = gr.State(0)
|
| 207 |
-
all_images = gr.State([])
|
| 208 |
-
all_results = gr.State([])
|
| 209 |
-
|
| 210 |
-
# Handle file uploads - store files for use during analysis
|
| 211 |
-
def handle_upload(files):
|
| 212 |
-
file_paths = []
|
| 213 |
-
file_names = []
|
| 214 |
-
for file in files:
|
| 215 |
-
file_paths.append(file.name)
|
| 216 |
-
# Extract filename without path or extension for later use
|
| 217 |
-
file_names.append(get_base_filename(file.name))
|
| 218 |
-
return file_paths, file_paths, file_names
|
| 219 |
-
|
| 220 |
-
upload_button.upload(
|
| 221 |
-
fn=handle_upload,
|
| 222 |
-
inputs=[upload_button],
|
| 223 |
-
outputs=[input_gallery, image_state, filename_state]
|
| 224 |
-
)
|
| 225 |
-
|
| 226 |
-
# Function to analyze images
|
| 227 |
-
# Modify the analyze_images function in your code:
|
| 228 |
-
|
| 229 |
-
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 230 |
-
if not image_paths:
|
| 231 |
-
return [], [], 0, "", "No images", "", ""
|
| 232 |
-
|
| 233 |
-
# Get system prompt based on length selection
|
| 234 |
-
sys_prompt = get_sys_prompt(length_choice)
|
| 235 |
-
|
| 236 |
-
image_results = []
|
| 237 |
-
|
| 238 |
-
for i, image_path in enumerate(image_paths):
|
| 239 |
-
# Use original filename as image_id if available
|
| 240 |
-
if i < len(filenames) and filenames[i]:
|
| 241 |
-
image_id = filenames[i]
|
| 242 |
-
else:
|
| 243 |
-
image_id = f"Image {i+1}"
|
| 244 |
-
|
| 245 |
-
try:
|
| 246 |
-
# Open the image file for analysis
|
| 247 |
-
img = Image.open(image_path)
|
| 248 |
-
prompt0 = prompt_new() # Using the new prompt function
|
| 249 |
-
|
| 250 |
-
# Extract the actual model name (remove any labels like "(default)")
|
| 251 |
-
if " (" in model_choice:
|
| 252 |
-
model_name = model_choice.split(" (")[0]
|
| 253 |
-
else:
|
| 254 |
-
model_name = model_choice
|
| 255 |
-
|
| 256 |
-
# Check if this is one of the Gemini models that needs special handling
|
| 257 |
-
is_gemini_model = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 258 |
-
|
| 259 |
-
if is_gemini_model:
|
| 260 |
-
try:
|
| 261 |
-
# First try using the dedicated gemini client
|
| 262 |
-
result = gemini.generate_caption(
|
| 263 |
-
img,
|
| 264 |
-
model=model_name,
|
| 265 |
-
max_image_size=512,
|
| 266 |
-
prompt=prompt0,
|
| 267 |
-
prompt_dev=sys_prompt,
|
| 268 |
-
temperature=1
|
| 269 |
-
)
|
| 270 |
-
except Exception as gemini_error:
|
| 271 |
-
# If gemini client fails, fall back to standard OR client
|
| 272 |
-
result = OR.generate_caption(
|
| 273 |
-
img,
|
| 274 |
-
model=model_name,
|
| 275 |
-
max_image_size=512,
|
| 276 |
-
prompt=prompt0,
|
| 277 |
-
prompt_dev=sys_prompt,
|
| 278 |
-
temperature=1
|
| 279 |
-
)
|
| 280 |
-
else:
|
| 281 |
-
# For all other models, use OR client directly
|
| 282 |
-
result = OR.generate_caption(
|
| 283 |
-
img,
|
| 284 |
-
model=model_name,
|
| 285 |
-
max_image_size=512,
|
| 286 |
-
prompt=prompt0,
|
| 287 |
-
prompt_dev=sys_prompt,
|
| 288 |
-
temperature=1
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
-
# Add to results
|
| 292 |
-
image_results.append({
|
| 293 |
-
"image_id": image_id,
|
| 294 |
-
"content": result
|
| 295 |
-
})
|
| 296 |
-
|
| 297 |
-
except Exception as e:
|
| 298 |
-
error_message = f"Error: {str(e)}"
|
| 299 |
-
image_results.append({
|
| 300 |
-
"image_id": image_id,
|
| 301 |
-
"content": error_message
|
| 302 |
-
})
|
| 303 |
-
|
| 304 |
-
# Create a CSV file for download
|
| 305 |
-
csv_path = create_csv_file_simple(image_results)
|
| 306 |
-
|
| 307 |
-
# Set up initial display with first image
|
| 308 |
-
if len(image_paths) > 0:
|
| 309 |
-
initial_image = image_paths[0]
|
| 310 |
-
initial_counter = f"{1} of {len(image_paths)}"
|
| 311 |
-
initial_text = image_results[0]["content"]
|
| 312 |
-
else:
|
| 313 |
-
initial_image = ""
|
| 314 |
-
initial_text = "No images analyzed"
|
| 315 |
-
initial_counter = "0 of 0"
|
| 316 |
-
|
| 317 |
-
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 318 |
-
initial_text, csv_path)
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
# Function to navigate to previous image
|
| 322 |
-
def go_to_prev(current_idx, images, results):
|
| 323 |
-
if not images or len(images) == 0:
|
| 324 |
-
return current_idx, "", "0 of 0", ""
|
| 325 |
-
|
| 326 |
-
new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
|
| 327 |
-
counter_html = f"{new_idx + 1} of {len(images)}"
|
| 328 |
-
|
| 329 |
-
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 330 |
-
|
| 331 |
-
# Function to navigate to next image
|
| 332 |
-
def go_to_next(current_idx, images, results):
|
| 333 |
-
if not images or len(images) == 0:
|
| 334 |
-
return current_idx, "", "0 of 0", ""
|
| 335 |
-
|
| 336 |
-
new_idx = (current_idx + 1) % len(images)
|
| 337 |
-
counter_html = f"{new_idx + 1} of {len(images)}"
|
| 338 |
-
|
| 339 |
-
return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
|
| 340 |
-
|
| 341 |
-
# Connect the analyze button
|
| 342 |
-
analyze_button.click(
|
| 343 |
-
fn=analyze_images,
|
| 344 |
-
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 345 |
-
outputs=[
|
| 346 |
-
all_images, all_results, current_index, current_image, image_counter,
|
| 347 |
-
analysis_text, csv_download
|
| 348 |
-
]
|
| 349 |
-
)
|
| 350 |
-
|
| 351 |
-
# Connect navigation buttons
|
| 352 |
-
prev_button.click(
|
| 353 |
-
fn=go_to_prev,
|
| 354 |
-
inputs=[current_index, all_images, all_results],
|
| 355 |
-
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 356 |
-
)
|
| 357 |
-
|
| 358 |
-
next_button.click(
|
| 359 |
-
fn=go_to_next,
|
| 360 |
-
inputs=[current_index, all_images, all_results],
|
| 361 |
-
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 362 |
-
)
|
| 363 |
-
|
| 364 |
-
# Optional: Add additional information
|
| 365 |
-
with gr.Accordion("About", open=False):
|
| 366 |
-
gr.Markdown("""
|
| 367 |
-
## About this demo
|
| 368 |
-
|
| 369 |
-
This demo generates alternative text for images.
|
| 370 |
-
|
| 371 |
-
- Upload one or more images using the upload button
|
| 372 |
-
- Choose a model and response length for generation
|
| 373 |
-
- Navigate through the images with the Previous and Next buttons
|
| 374 |
-
- Download CSV with all results
|
| 375 |
-
|
| 376 |
-
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 377 |
-
|
| 378 |
-
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 379 |
-
chris.addis@nhm.ac.uk
|
| 380 |
-
""")
|
| 381 |
-
|
| 382 |
-
return demo
|
| 383 |
-
|
| 384 |
-
# Launch the app
|
| 385 |
-
if __name__ == "__main__":
|
| 386 |
-
app = create_demo()
|
| 387 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.ipynb_checkpoints/app-Copy1-checkpoint.py → app-Copy2.py
RENAMED
|
@@ -41,50 +41,61 @@ def get_sys_prompt(length="medium"):
|
|
| 41 |
dev_prompt = """You are a museum curator tasked with generating alt-text (as defined by W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
|
| 42 |
elif length == "medium":
|
| 43 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
|
| 44 |
-
else:
|
| 45 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
return dev_prompt
|
| 47 |
|
| 48 |
def create_csv_file_simple(results):
|
| 49 |
"""Create a CSV file from the results and return the path"""
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
|
| 66 |
-
# Extract original filename without path or extension
|
| 67 |
def get_base_filename(filepath):
|
| 68 |
if not filepath:
|
| 69 |
return ""
|
| 70 |
-
# Get the basename (filename with extension)
|
| 71 |
basename = os.path.basename(filepath)
|
| 72 |
-
# Remove extension
|
| 73 |
filename = os.path.splitext(basename)[0]
|
| 74 |
return filename
|
| 75 |
|
| 76 |
-
custom_css = """
|
| 77 |
-
.image-container img {
|
| 78 |
-
object-fit: contain;
|
| 79 |
-
width: 100%;
|
| 80 |
-
height: 100%;
|
| 81 |
-
}
|
| 82 |
-
"""
|
| 83 |
-
|
| 84 |
# Define the Gradio interface
|
| 85 |
def create_demo():
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
with gr.Row():
|
| 89 |
with gr.Column(scale=3):
|
| 90 |
gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
|
|
@@ -92,279 +103,179 @@ def create_demo():
|
|
| 92 |
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 93 |
with gr.Column(scale=1):
|
| 94 |
with gr.Row():
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
with gr.Row():
|
| 106 |
# Left column: Controls and uploads
|
| 107 |
with gr.Column(scale=1):
|
| 108 |
-
# Upload interface
|
| 109 |
upload_button = gr.UploadButton(
|
| 110 |
-
"Click to Upload Images",
|
| 111 |
-
file_types=["image"],
|
| 112 |
file_count="multiple"
|
| 113 |
)
|
| 114 |
-
|
| 115 |
-
# Define choices as a list of tuples: (Display Name, Internal Value)
|
| 116 |
model_choices = [
|
| 117 |
-
# Gemini
|
| 118 |
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 119 |
-
|
| 120 |
-
("GPT-4.1
|
| 121 |
-
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 122 |
-
("GPT-4.1", "gpt-4.1"),
|
| 123 |
-
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 124 |
-
# Other Models
|
| 125 |
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 126 |
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 127 |
-
# Experimental Models
|
| 128 |
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 129 |
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 130 |
]
|
| 131 |
-
|
| 132 |
-
# Find the internal value of the default choice
|
| 133 |
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 134 |
-
|
| 135 |
-
# Add model selection dropdown
|
| 136 |
model_choice = gr.Dropdown(
|
| 137 |
-
choices=model_choices,
|
| 138 |
-
|
| 139 |
-
value=default_model_internal_value, # Use the internal value for the default
|
| 140 |
-
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 141 |
-
visible=True
|
| 142 |
)
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
# Add response length selection
|
| 146 |
length_choice = gr.Radio(
|
| 147 |
-
choices=["short", "medium", "long"],
|
| 148 |
-
|
| 149 |
-
value="medium",
|
| 150 |
-
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 151 |
)
|
| 152 |
-
|
| 153 |
-
# Preview gallery for uploaded images
|
| 154 |
gr.Markdown("### Uploaded Images")
|
| 155 |
input_gallery = gr.Gallery(
|
| 156 |
-
label="",
|
| 157 |
-
|
| 158 |
-
height=150,
|
| 159 |
-
object_fit="contain"
|
| 160 |
)
|
| 161 |
-
|
| 162 |
-
# Analysis button
|
| 163 |
analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
|
| 164 |
-
|
| 165 |
-
# Hidden state component to store image info
|
| 166 |
image_state = gr.State([])
|
| 167 |
filename_state = gr.State([])
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
csv_download = gr.File(label="CSV Results")
|
| 171 |
-
|
| 172 |
# Right column: Display area
|
| 173 |
with gr.Column(scale=2):
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
)
|
| 185 |
-
|
| 186 |
-
# Navigation row
|
| 187 |
with gr.Row():
|
| 188 |
prev_button = gr.Button("← Previous", size="sm")
|
| 189 |
-
image_counter = gr.Markdown("", elem_id="image-counter")
|
| 190 |
next_button = gr.Button("Next →", size="sm")
|
| 191 |
-
|
| 192 |
-
# Alt-text heading and output
|
| 193 |
gr.Markdown("### Generated Alt-text")
|
| 194 |
-
|
| 195 |
-
# Alt-text
|
| 196 |
analysis_text = gr.Textbox(
|
| 197 |
-
label="",
|
| 198 |
-
value="Upload images and
|
| 199 |
-
lines=6,
|
| 200 |
-
max_lines=10,
|
| 201 |
-
interactive=False,
|
| 202 |
-
show_label=False
|
| 203 |
)
|
| 204 |
-
|
| 205 |
-
# Hidden state for gallery navigation
|
| 206 |
current_index = gr.State(0)
|
| 207 |
all_images = gr.State([])
|
| 208 |
all_results = gr.State([])
|
| 209 |
-
|
| 210 |
-
#
|
| 211 |
-
|
|
|
|
| 212 |
file_paths = []
|
| 213 |
file_names = []
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
return file_paths, file_paths, file_names
|
| 219 |
-
|
| 220 |
upload_button.upload(
|
| 221 |
fn=handle_upload,
|
| 222 |
-
inputs=[upload_button],
|
| 223 |
-
outputs=[input_gallery, image_state, filename_state
|
|
|
|
| 224 |
)
|
| 225 |
-
|
| 226 |
-
# Function to analyze images
|
| 227 |
-
# Modify the analyze_images function in your code:
|
| 228 |
|
|
|
|
| 229 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 230 |
if not image_paths:
|
| 231 |
-
return [], [], 0, "", "No images
|
| 232 |
-
|
| 233 |
-
# Get system prompt based on length selection
|
| 234 |
sys_prompt = get_sys_prompt(length_choice)
|
| 235 |
-
|
| 236 |
image_results = []
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
if i < len(filenames) and filenames[i]
|
| 241 |
-
image_id = filenames[i]
|
| 242 |
-
else:
|
| 243 |
-
image_id = f"Image {i+1}"
|
| 244 |
-
|
| 245 |
try:
|
| 246 |
-
# Open the image file for analysis
|
| 247 |
img = Image.open(image_path)
|
| 248 |
-
prompt0 = prompt_new()
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
if
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
max_image_size=512,
|
| 266 |
-
prompt=prompt0,
|
| 267 |
-
prompt_dev=sys_prompt,
|
| 268 |
-
temperature=1
|
| 269 |
-
)
|
| 270 |
-
except Exception as gemini_error:
|
| 271 |
-
# If gemini client fails, fall back to standard OR client
|
| 272 |
-
result = OR.generate_caption(
|
| 273 |
-
img,
|
| 274 |
-
model=model_name,
|
| 275 |
-
max_image_size=512,
|
| 276 |
-
prompt=prompt0,
|
| 277 |
-
prompt_dev=sys_prompt,
|
| 278 |
-
temperature=1
|
| 279 |
-
)
|
| 280 |
-
else:
|
| 281 |
-
# For all other models, use OR client directly
|
| 282 |
-
result = OR.generate_caption(
|
| 283 |
-
img,
|
| 284 |
-
model=model_name,
|
| 285 |
-
max_image_size=512,
|
| 286 |
-
prompt=prompt0,
|
| 287 |
-
prompt_dev=sys_prompt,
|
| 288 |
-
temperature=1
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
-
# Add to results
|
| 292 |
-
image_results.append({
|
| 293 |
-
"image_id": image_id,
|
| 294 |
-
"content": result
|
| 295 |
-
})
|
| 296 |
-
|
| 297 |
except Exception as e:
|
| 298 |
-
error_message = f"Error: {str(e)}"
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
})
|
| 303 |
-
|
| 304 |
-
# Create a CSV file for download
|
| 305 |
csv_path = create_csv_file_simple(image_results)
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
if
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
initial_text = image_results[0]["content"]
|
| 312 |
-
else:
|
| 313 |
-
initial_image = ""
|
| 314 |
-
initial_text = "No images analyzed"
|
| 315 |
-
initial_counter = "0 of 0"
|
| 316 |
-
|
| 317 |
-
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 318 |
initial_text, csv_path)
|
| 319 |
|
| 320 |
-
|
| 321 |
-
# Function to navigate to previous image
|
| 322 |
def go_to_prev(current_idx, images, results):
|
| 323 |
-
if not images or len(images) == 0:
|
| 324 |
-
return current_idx,
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
# Function to navigate to next image
|
| 332 |
def go_to_next(current_idx, images, results):
|
| 333 |
-
if not images or len(images) == 0:
|
| 334 |
-
return current_idx,
|
| 335 |
-
|
| 336 |
new_idx = (current_idx + 1) % len(images)
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
return (new_idx, images[new_idx],
|
| 340 |
-
|
| 341 |
-
# Connect
|
| 342 |
analyze_button.click(
|
| 343 |
fn=analyze_images,
|
| 344 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 345 |
-
outputs=[
|
| 346 |
-
|
| 347 |
-
analysis_text, csv_download
|
| 348 |
-
]
|
| 349 |
)
|
| 350 |
-
|
| 351 |
# Connect navigation buttons
|
| 352 |
prev_button.click(
|
| 353 |
-
fn=go_to_prev,
|
| 354 |
-
|
| 355 |
-
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 356 |
)
|
| 357 |
-
|
| 358 |
next_button.click(
|
| 359 |
-
fn=go_to_next,
|
| 360 |
-
|
| 361 |
-
outputs=[current_index, current_image, image_counter, analysis_text]
|
| 362 |
)
|
| 363 |
-
|
| 364 |
-
#
|
| 365 |
with gr.Accordion("About", open=False):
|
| 366 |
-
|
| 367 |
-
## About
|
| 368 |
|
| 369 |
This demo generates alternative text for images.
|
| 370 |
|
|
@@ -378,10 +289,11 @@ def create_demo():
|
|
| 378 |
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 379 |
chris.addis@nhm.ac.uk
|
| 380 |
""")
|
| 381 |
-
|
| 382 |
return demo
|
| 383 |
|
| 384 |
# Launch the app
|
| 385 |
if __name__ == "__main__":
|
|
|
|
| 386 |
app = create_demo()
|
| 387 |
app.launch()
|
|
|
|
| 41 |
dev_prompt = """You are a museum curator tasked with generating alt-text (as defined by W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
|
| 42 |
elif length == "medium":
|
| 43 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
|
| 44 |
+
else: # long
|
| 45 |
dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in W3C) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
|
| 46 |
return dev_prompt
|
| 47 |
|
| 48 |
def create_csv_file_simple(results):
|
| 49 |
"""Create a CSV file from the results and return the path"""
|
| 50 |
+
try:
|
| 51 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, newline='', encoding='utf-8') as f:
|
| 52 |
+
path = f.name
|
| 53 |
+
writer = csv.writer(f)
|
| 54 |
+
writer.writerow(['image_id', 'content'])
|
| 55 |
+
for result in results:
|
| 56 |
+
writer.writerow([
|
| 57 |
+
result.get('image_id', ''),
|
| 58 |
+
result.get('content', '')
|
| 59 |
+
])
|
| 60 |
+
return path
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error creating CSV: {e}")
|
| 63 |
+
return None
|
| 64 |
+
|
| 65 |
|
|
|
|
| 66 |
def get_base_filename(filepath):
|
| 67 |
if not filepath:
|
| 68 |
return ""
|
|
|
|
| 69 |
basename = os.path.basename(filepath)
|
|
|
|
| 70 |
filename = os.path.splitext(basename)[0]
|
| 71 |
return filename
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# Define the Gradio interface
|
| 74 |
def create_demo():
|
| 75 |
+
# --- Reintroduce CSS ---
|
| 76 |
+
custom_css = """
|
| 77 |
+
/* Container for the image component (#current-image-display is the elem_id of gr.Image) */
|
| 78 |
+
#current-image-display {
|
| 79 |
+
height: 600px; /* Define container height */
|
| 80 |
+
width: 100%; /* Define container width (takes column width) */
|
| 81 |
+
display: flex; /* Use flexbox for alignment */
|
| 82 |
+
justify-content: center; /* Center content horizontally */
|
| 83 |
+
align-items: center; /* Center content vertically */
|
| 84 |
+
overflow: hidden; /* Hide any potential overflow from container */
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/* The actual <img> element inside the container */
|
| 88 |
+
#current-image-display img {
|
| 89 |
+
object-fit: contain !important; /* Scale keeping aspect ratio, within bounds */
|
| 90 |
+
max-width: 100%; /* Prevent image exceeding container width */
|
| 91 |
+
max-height: 600px !important; /* Prevent image exceeding container height */
|
| 92 |
+
width: auto; /* Use natural width unless constrained by max-width */
|
| 93 |
+
height: auto; /* Use natural height unless constrained by max-height */
|
| 94 |
+
display: block; /* Ensure image behaves predictably in flex */
|
| 95 |
+
}
|
| 96 |
+
"""
|
| 97 |
+
# --- Pass css to gr.Blocks ---
|
| 98 |
+
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
|
| 99 |
with gr.Row():
|
| 100 |
with gr.Column(scale=3):
|
| 101 |
gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
|
|
|
|
| 103 |
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 104 |
with gr.Column(scale=1):
|
| 105 |
with gr.Row():
|
| 106 |
+
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 107 |
+
interactive=False, show_download_button=False,
|
| 108 |
+
show_share_button=False, show_fullscreen_button=False,
|
| 109 |
+
container=False, elem_id="nhm-logo")
|
| 110 |
+
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 111 |
+
interactive=False, show_download_button=False,
|
| 112 |
+
show_share_button=False, show_fullscreen_button=False,
|
| 113 |
+
container=False, elem_id="nml-logo")
|
| 114 |
+
|
|
|
|
| 115 |
with gr.Row():
|
| 116 |
# Left column: Controls and uploads
|
| 117 |
with gr.Column(scale=1):
|
|
|
|
| 118 |
upload_button = gr.UploadButton(
|
| 119 |
+
"Click to Upload Images",
|
| 120 |
+
file_types=["image"],
|
| 121 |
file_count="multiple"
|
| 122 |
)
|
|
|
|
|
|
|
| 123 |
model_choices = [
|
|
|
|
| 124 |
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 125 |
+
("GPT-4.1 Nano", "gpt-4.1-nano"), ("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 126 |
+
("GPT-4.1", "gpt-4.1"), ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 128 |
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
|
|
|
| 129 |
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 130 |
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 131 |
]
|
|
|
|
|
|
|
| 132 |
default_model_internal_value = "google/gemini-2.0-flash-001"
|
|
|
|
|
|
|
| 133 |
model_choice = gr.Dropdown(
|
| 134 |
+
choices=model_choices, label="Select Model",
|
| 135 |
+
value=default_model_internal_value, visible=True
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
|
|
|
|
|
|
|
|
|
| 137 |
length_choice = gr.Radio(
|
| 138 |
+
choices=["short", "medium", "long"], label="Response Length",
|
| 139 |
+
value="medium", info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
|
|
|
|
|
|
| 140 |
)
|
|
|
|
|
|
|
| 141 |
gr.Markdown("### Uploaded Images")
|
| 142 |
input_gallery = gr.Gallery(
|
| 143 |
+
label="Uploaded Image Previews", columns=3, height=150,
|
| 144 |
+
object_fit="contain", show_label=False
|
|
|
|
|
|
|
| 145 |
)
|
|
|
|
|
|
|
| 146 |
analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
|
|
|
|
|
|
|
| 147 |
image_state = gr.State([])
|
| 148 |
filename_state = gr.State([])
|
| 149 |
+
csv_download = gr.File(label="Download CSV Results")
|
| 150 |
+
|
|
|
|
|
|
|
| 151 |
# Right column: Display area
|
| 152 |
with gr.Column(scale=2):
|
| 153 |
+
current_image = gr.Image(
|
| 154 |
+
label="Current Image",
|
| 155 |
+
type="filepath",
|
| 156 |
+
elem_id="current-image-display", # ADDED - for CSS targeting
|
| 157 |
+
show_fullscreen_button=True,
|
| 158 |
+
show_download_button=False,
|
| 159 |
+
show_share_button=False,
|
| 160 |
+
show_label=False
|
| 161 |
+
)
|
| 162 |
+
|
|
|
|
|
|
|
|
|
|
| 163 |
with gr.Row():
|
| 164 |
prev_button = gr.Button("← Previous", size="sm")
|
| 165 |
+
image_counter = gr.Markdown("0 of 0", elem_id="image-counter")
|
| 166 |
next_button = gr.Button("Next →", size="sm")
|
| 167 |
+
|
|
|
|
| 168 |
gr.Markdown("### Generated Alt-text")
|
|
|
|
|
|
|
| 169 |
analysis_text = gr.Textbox(
|
| 170 |
+
label="Generated Text",
|
| 171 |
+
value="Upload images and click 'Generate Alt-Text'.",
|
| 172 |
+
lines=6, max_lines=10, interactive=True, show_label=False
|
|
|
|
|
|
|
|
|
|
| 173 |
)
|
|
|
|
|
|
|
| 174 |
current_index = gr.State(0)
|
| 175 |
all_images = gr.State([])
|
| 176 |
all_results = gr.State([])
|
| 177 |
+
|
| 178 |
+
# --- Functions (handle_upload, analyze_images, navigators) remain the same ---
|
| 179 |
+
# Handle file uploads
|
| 180 |
+
def handle_upload(files, current_paths, current_filenames):
|
| 181 |
file_paths = []
|
| 182 |
file_names = []
|
| 183 |
+
if files:
|
| 184 |
+
for file in files:
|
| 185 |
+
file_paths.append(file.name)
|
| 186 |
+
file_names.append(get_base_filename(file.name))
|
| 187 |
+
return file_paths, file_paths, file_names, 0, None, "0 of 0", "Upload images and click 'Generate Alt-Text'."
|
| 188 |
+
|
| 189 |
upload_button.upload(
|
| 190 |
fn=handle_upload,
|
| 191 |
+
inputs=[upload_button, image_state, filename_state],
|
| 192 |
+
outputs=[input_gallery, image_state, filename_state,
|
| 193 |
+
current_index, current_image, image_counter, analysis_text]
|
| 194 |
)
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
# Analyze images
|
| 197 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 198 |
if not image_paths:
|
| 199 |
+
return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None
|
| 200 |
+
|
|
|
|
| 201 |
sys_prompt = get_sys_prompt(length_choice)
|
|
|
|
| 202 |
image_results = []
|
| 203 |
+
analysis_progress = gr.Progress(track_tqdm=True)
|
| 204 |
+
|
| 205 |
+
for i, image_path in enumerate(analysis_progress.tqdm(image_paths, desc="Analyzing Images")):
|
| 206 |
+
image_id = filenames[i] if i < len(filenames) and filenames[i] else f"Image_{i+1}_{os.path.basename(image_path)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
try:
|
|
|
|
| 208 |
img = Image.open(image_path)
|
| 209 |
+
prompt0 = prompt_new()
|
| 210 |
+
model_name = model_choice
|
| 211 |
+
client_to_use = OR # Default client
|
| 212 |
+
# Add logic here if you need to switch between OR and gemini clients based on model_name
|
| 213 |
+
# Example:
|
| 214 |
+
# if model_name.startswith("google/gemini") and gemini:
|
| 215 |
+
# client_to_use = gemini
|
| 216 |
+
|
| 217 |
+
result = client_to_use.generate_caption(
|
| 218 |
+
img, model=model_name, max_image_size=512,
|
| 219 |
+
prompt=prompt0, prompt_dev=sys_prompt, temperature=1
|
| 220 |
+
)
|
| 221 |
+
image_results.append({"image_id": image_id, "content": result.strip()})
|
| 222 |
+
except FileNotFoundError:
|
| 223 |
+
error_message = f"Error: File not found at path '{image_path}'"
|
| 224 |
+
print(error_message)
|
| 225 |
+
image_results.append({"image_id": image_id, "content": error_message})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
except Exception as e:
|
| 227 |
+
error_message = f"Error processing {image_id}: {str(e)}"
|
| 228 |
+
print(error_message)
|
| 229 |
+
image_results.append({"image_id": image_id, "content": error_message})
|
| 230 |
+
|
|
|
|
|
|
|
|
|
|
| 231 |
csv_path = create_csv_file_simple(image_results)
|
| 232 |
+
initial_image = image_paths[0] if image_paths else None
|
| 233 |
+
initial_counter = f"1 of {len(image_paths)}" if image_paths else "0 of 0"
|
| 234 |
+
initial_text = image_results[0]["content"] if image_results else "Analysis complete, but no results generated."
|
| 235 |
+
|
| 236 |
+
return (image_paths, image_results, 0, initial_image, initial_counter,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
initial_text, csv_path)
|
| 238 |
|
| 239 |
+
# Navigate previous
|
|
|
|
| 240 |
def go_to_prev(current_idx, images, results):
|
| 241 |
+
if not images or not results or len(images) == 0:
|
| 242 |
+
return current_idx, None, "0 of 0", ""
|
| 243 |
+
new_idx = (current_idx - 1 + len(images)) % len(images)
|
| 244 |
+
counter_text = f"{new_idx + 1} of {len(images)}"
|
| 245 |
+
result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
|
| 246 |
+
return (new_idx, images[new_idx], counter_text, result_content)
|
| 247 |
+
|
| 248 |
+
# Navigate next
|
|
|
|
| 249 |
def go_to_next(current_idx, images, results):
|
| 250 |
+
if not images or not results or len(images) == 0:
|
| 251 |
+
return current_idx, None, "0 of 0", ""
|
|
|
|
| 252 |
new_idx = (current_idx + 1) % len(images)
|
| 253 |
+
counter_text = f"{new_idx + 1} of {len(images)}"
|
| 254 |
+
result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
|
| 255 |
+
return (new_idx, images[new_idx], counter_text, result_content)
|
| 256 |
+
|
| 257 |
+
# Connect analyze button
|
| 258 |
analyze_button.click(
|
| 259 |
fn=analyze_images,
|
| 260 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 261 |
+
outputs=[all_images, all_results, current_index, current_image, image_counter,
|
| 262 |
+
analysis_text, csv_download]
|
|
|
|
|
|
|
| 263 |
)
|
| 264 |
+
|
| 265 |
# Connect navigation buttons
|
| 266 |
prev_button.click(
|
| 267 |
+
fn=go_to_prev, inputs=[current_index, all_images, all_results],
|
| 268 |
+
outputs=[current_index, current_image, image_counter, analysis_text], queue=False
|
|
|
|
| 269 |
)
|
|
|
|
| 270 |
next_button.click(
|
| 271 |
+
fn=go_to_next, inputs=[current_index, all_images, all_results],
|
| 272 |
+
outputs=[current_index, current_image, image_counter, analysis_text], queue=False
|
|
|
|
| 273 |
)
|
| 274 |
+
|
| 275 |
+
# About section
|
| 276 |
with gr.Accordion("About", open=False):
|
| 277 |
+
gr.Markdown("""
|
| 278 |
+
## About MATCHA 🍵:
|
| 279 |
|
| 280 |
This demo generates alternative text for images.
|
| 281 |
|
|
|
|
| 289 |
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 290 |
chris.addis@nhm.ac.uk
|
| 291 |
""")
|
| 292 |
+
|
| 293 |
return demo
|
| 294 |
|
| 295 |
# Launch the app
|
| 296 |
if __name__ == "__main__":
|
| 297 |
+
|
| 298 |
app = create_demo()
|
| 299 |
app.launch()
|
app.py
CHANGED
|
@@ -36,15 +36,24 @@ PREFERENCES_FILE = "data/user_preferences.csv"
|
|
| 36 |
# Ensure directory exists
|
| 37 |
os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
|
| 38 |
|
| 39 |
-
def get_sys_prompt(length="medium"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
if length == "short":
|
| 41 |
-
dev_prompt = "
|
| 42 |
elif length == "medium":
|
| 43 |
-
dev_prompt = "
|
| 44 |
else: # long
|
| 45 |
-
dev_prompt = "
|
| 46 |
return dev_prompt
|
| 47 |
|
|
|
|
| 48 |
def create_csv_file_simple(results):
|
| 49 |
"""Create a CSV file from the results and return the path"""
|
| 50 |
try:
|
|
@@ -62,7 +71,6 @@ def create_csv_file_simple(results):
|
|
| 62 |
print(f"Error creating CSV: {e}")
|
| 63 |
return None
|
| 64 |
|
| 65 |
-
|
| 66 |
def get_base_filename(filepath):
|
| 67 |
if not filepath:
|
| 68 |
return ""
|
|
@@ -127,7 +135,8 @@ def create_demo():
|
|
| 127 |
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 128 |
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 129 |
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 130 |
-
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
|
|
|
| 131 |
]
|
| 132 |
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 133 |
model_choice = gr.Dropdown(
|
|
@@ -138,6 +147,16 @@ def create_demo():
|
|
| 138 |
choices=["short", "medium", "long"], label="Response Length",
|
| 139 |
value="medium", info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 140 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
gr.Markdown("### Uploaded Images")
|
| 142 |
input_gallery = gr.Gallery(
|
| 143 |
label="Uploaded Image Previews", columns=3, height=150,
|
|
@@ -194,11 +213,12 @@ def create_demo():
|
|
| 194 |
)
|
| 195 |
|
| 196 |
# Analyze images
|
| 197 |
-
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 198 |
if not image_paths:
|
| 199 |
return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None
|
| 200 |
|
| 201 |
-
|
|
|
|
| 202 |
image_results = []
|
| 203 |
analysis_progress = gr.Progress(track_tqdm=True)
|
| 204 |
|
|
@@ -257,7 +277,7 @@ def create_demo():
|
|
| 257 |
# Connect analyze button
|
| 258 |
analyze_button.click(
|
| 259 |
fn=analyze_images,
|
| 260 |
-
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 261 |
outputs=[all_images, all_results, current_index, current_image, image_counter,
|
| 262 |
analysis_text, csv_download]
|
| 263 |
)
|
|
@@ -275,9 +295,20 @@ def create_demo():
|
|
| 275 |
# About section
|
| 276 |
with gr.Accordion("About", open=False):
|
| 277 |
gr.Markdown("""
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
return demo
|
| 283 |
|
|
|
|
| 36 |
# Ensure directory exists
|
| 37 |
os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
|
| 38 |
|
| 39 |
+
def get_sys_prompt(length="medium", photograph=False):
|
| 40 |
+
|
| 41 |
+
if photograph:
|
| 42 |
+
object_type = "wildlife photography"
|
| 43 |
+
else:
|
| 44 |
+
object_type = "museum objects"
|
| 45 |
+
|
| 46 |
+
dev_prompt = """You are a museum curator tasked with generating alt-text (as defined by W3C) of {object_type} for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements."""
|
| 47 |
+
|
| 48 |
if length == "short":
|
| 49 |
+
dev_prompt += " Repsonses should be a maximum of 130 characters."
|
| 50 |
elif length == "medium":
|
| 51 |
+
dev_prompt += " Repsonses should be a maximum of 250-300 characters."
|
| 52 |
else: # long
|
| 53 |
+
dev_prompt += " Repsonses should be a maximum of 450 characters."
|
| 54 |
return dev_prompt
|
| 55 |
|
| 56 |
+
|
| 57 |
def create_csv_file_simple(results):
|
| 58 |
"""Create a CSV file from the results and return the path"""
|
| 59 |
try:
|
|
|
|
| 71 |
print(f"Error creating CSV: {e}")
|
| 72 |
return None
|
| 73 |
|
|
|
|
| 74 |
def get_base_filename(filepath):
|
| 75 |
if not filepath:
|
| 76 |
return ""
|
|
|
|
| 135 |
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 136 |
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 137 |
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 138 |
+
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21"),
|
| 139 |
+
("Gemini 2.5 Flash Thinking (Preview)", "google/gemini-2.5-flash-preview:thinking")
|
| 140 |
]
|
| 141 |
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 142 |
model_choice = gr.Dropdown(
|
|
|
|
| 147 |
choices=["short", "medium", "long"], label="Response Length",
|
| 148 |
value="medium", info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 149 |
)
|
| 150 |
+
|
| 151 |
+
# Advanced settings accordion
|
| 152 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 153 |
+
content_type = gr.Radio(
|
| 154 |
+
choices=["museum objects", "wildlife photography"],
|
| 155 |
+
label="Content Type",
|
| 156 |
+
value="museum objects",
|
| 157 |
+
info="Choose the type of content in your images"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
gr.Markdown("### Uploaded Images")
|
| 161 |
input_gallery = gr.Gallery(
|
| 162 |
label="Uploaded Image Previews", columns=3, height=150,
|
|
|
|
| 213 |
)
|
| 214 |
|
| 215 |
# Analyze images
|
| 216 |
+
def analyze_images(image_paths, model_choice, length_choice, filenames, content_type_choice):
|
| 217 |
if not image_paths:
|
| 218 |
return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None
|
| 219 |
|
| 220 |
+
is_photography = content_type_choice == "wildlife photography"
|
| 221 |
+
sys_prompt = get_sys_prompt(length_choice, photograph=is_photography)
|
| 222 |
image_results = []
|
| 223 |
analysis_progress = gr.Progress(track_tqdm=True)
|
| 224 |
|
|
|
|
| 277 |
# Connect analyze button
|
| 278 |
analyze_button.click(
|
| 279 |
fn=analyze_images,
|
| 280 |
+
inputs=[image_state, model_choice, length_choice, filename_state, content_type],
|
| 281 |
outputs=[all_images, all_results, current_index, current_image, image_counter,
|
| 282 |
analysis_text, csv_download]
|
| 283 |
)
|
|
|
|
| 295 |
# About section
|
| 296 |
with gr.Accordion("About", open=False):
|
| 297 |
gr.Markdown("""
|
| 298 |
+
## About MATCHA 🍵:
|
| 299 |
+
|
| 300 |
+
This demo generates alternative text for images.
|
| 301 |
+
|
| 302 |
+
- Upload one or more images using the upload button
|
| 303 |
+
- Choose a model and response length for generation
|
| 304 |
+
- Navigate through the images with the Previous and Next buttons
|
| 305 |
+
- Download CSV with all results
|
| 306 |
+
|
| 307 |
+
Developed by the Natural History Museum in Partnership with National Museums Liverpool.
|
| 308 |
+
|
| 309 |
+
If you find any bugs/have any problems/have any suggestions please feel free to get in touch:
|
| 310 |
+
chris.addis@nhm.ac.uk
|
| 311 |
+
""")
|
| 312 |
|
| 313 |
return demo
|
| 314 |
|
library/utils_prompt.py
CHANGED
|
@@ -11,7 +11,7 @@ def prompt_new(title=None):
|
|
| 11 |
title_info = {}
|
| 12 |
else:
|
| 13 |
title_info = f"(titled: {title})"
|
| 14 |
-
return f"
|
| 15 |
|
| 16 |
def prompt_1(title=None):
|
| 17 |
if title == None:
|
|
|
|
| 11 |
title_info = {}
|
| 12 |
else:
|
| 13 |
title_info = f"(titled: {title})"
|
| 14 |
+
return f"Generate alt-text for this object {title_info}:"
|
| 15 |
|
| 16 |
def prompt_1(title=None):
|
| 17 |
if title == None:
|