Chris Addis commited on
Commit ·
af23186
1
Parent(s): b81c5d1
Matcha 2
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import os
|
|
| 6 |
import requests
|
| 7 |
import json
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
-
# import openai
|
| 10 |
import base64
|
| 11 |
import csv
|
| 12 |
import tempfile
|
|
@@ -18,32 +18,14 @@ if os.path.exists(".env"):
|
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
from io import BytesIO
|
| 21 |
-
# import numpy as np # Already imported
|
| 22 |
-
# import requests # Already imported
|
| 23 |
-
# from PIL import Image # Already imported
|
| 24 |
-
|
| 25 |
-
# Assume these are defined elsewhere or replace with actual implementations if needed
|
| 26 |
-
class OpenRouterAPI:
|
| 27 |
-
def __init__(self, api_key=None, base_url=None):
|
| 28 |
-
pass
|
| 29 |
-
def generate_caption(self, img, model, max_image_size, prompt, prompt_dev, temperature):
|
| 30 |
-
# Dummy implementation for testing
|
| 31 |
-
print(f"Generating caption with model: {model}")
|
| 32 |
-
return f"Generated caption for image using {model}."
|
| 33 |
-
|
| 34 |
-
def prompt_new():
|
| 35 |
-
# Dummy implementation
|
| 36 |
-
return "Describe this image."
|
| 37 |
-
# --- End Dummy implementations ---
|
| 38 |
-
|
| 39 |
|
| 40 |
OR = OpenRouterAPI()
|
| 41 |
# Ensure GEMINI_API_KEY is set in your environment or .env file
|
| 42 |
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
| 43 |
if not gemini_api_key:
|
| 44 |
print("Warning: GEMINI_API_KEY environment variable not set. Using placeholder.")
|
| 45 |
-
# Handle the case where the key might be missing
|
| 46 |
-
gemini = OpenRouterAPI(api_key=gemini_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 47 |
|
| 48 |
# Path for storing user preferences
|
| 49 |
PREFERENCES_FILE = "data/user_preferences.csv"
|
|
@@ -62,15 +44,11 @@ def get_sys_prompt(length="medium"):
|
|
| 62 |
|
| 63 |
def create_csv_file_simple(results):
|
| 64 |
"""Create a CSV file from the results and return the path"""
|
| 65 |
-
# Create a temporary file
|
| 66 |
try:
|
| 67 |
-
# Use NamedTemporaryFile to simplify cleanup
|
| 68 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, newline='', encoding='utf-8') as f:
|
| 69 |
path = f.name
|
| 70 |
writer = csv.writer(f)
|
| 71 |
-
# Write header
|
| 72 |
writer.writerow(['image_id', 'content'])
|
| 73 |
-
# Write data
|
| 74 |
for result in results:
|
| 75 |
writer.writerow([
|
| 76 |
result.get('image_id', ''),
|
|
@@ -82,21 +60,32 @@ def create_csv_file_simple(results):
|
|
| 82 |
return None
|
| 83 |
|
| 84 |
|
| 85 |
-
# Extract original filename without path or extension
|
| 86 |
def get_base_filename(filepath):
|
| 87 |
if not filepath:
|
| 88 |
return ""
|
| 89 |
-
# Get the basename (filename with extension)
|
| 90 |
basename = os.path.basename(filepath)
|
| 91 |
-
# Remove extension
|
| 92 |
filename = os.path.splitext(basename)[0]
|
| 93 |
return filename
|
| 94 |
|
| 95 |
# Define the Gradio interface
|
| 96 |
def create_demo():
|
| 97 |
-
#
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
with gr.Row():
|
| 101 |
with gr.Column(scale=3):
|
| 102 |
gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
|
|
@@ -104,328 +93,189 @@ def create_demo():
|
|
| 104 |
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 105 |
with gr.Column(scale=1):
|
| 106 |
with gr.Row():
|
| 107 |
-
# Use gr.Image with all interactive features disabled
|
| 108 |
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 109 |
interactive=False, show_download_button=False,
|
| 110 |
show_share_button=False, show_fullscreen_button=False,
|
| 111 |
-
container=False, elem_id="nhm-logo")
|
| 112 |
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 113 |
interactive=False, show_download_button=False,
|
| 114 |
show_share_button=False, show_fullscreen_button=False,
|
| 115 |
-
container=False, elem_id="nml-logo")
|
| 116 |
|
| 117 |
with gr.Row():
|
| 118 |
# Left column: Controls and uploads
|
| 119 |
with gr.Column(scale=1):
|
| 120 |
-
# Upload interface
|
| 121 |
upload_button = gr.UploadButton(
|
| 122 |
"Click to Upload Images",
|
| 123 |
file_types=["image"],
|
| 124 |
file_count="multiple"
|
| 125 |
)
|
| 126 |
-
|
| 127 |
-
# Define choices as a list of tuples: (Display Name, Internal Value)
|
| 128 |
model_choices = [
|
| 129 |
-
# Gemini
|
| 130 |
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 131 |
-
|
| 132 |
-
("GPT-4.1
|
| 133 |
-
("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 134 |
-
("GPT-4.1", "gpt-4.1"),
|
| 135 |
-
("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
| 136 |
-
# Other Models
|
| 137 |
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 138 |
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
| 139 |
-
# Experimental Models
|
| 140 |
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 141 |
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 142 |
]
|
| 143 |
-
|
| 144 |
-
# Find the internal value of the default choice
|
| 145 |
default_model_internal_value = "google/gemini-2.0-flash-001"
|
| 146 |
-
|
| 147 |
-
# Add model selection dropdown
|
| 148 |
model_choice = gr.Dropdown(
|
| 149 |
-
choices=model_choices,
|
| 150 |
-
|
| 151 |
-
value=default_model_internal_value, # Use the internal value for the default
|
| 152 |
-
# info="Choose the language model to use." # Optional: Add extra info tooltip
|
| 153 |
-
visible=True
|
| 154 |
)
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
# Add response length selection
|
| 158 |
length_choice = gr.Radio(
|
| 159 |
-
choices=["short", "medium", "long"],
|
| 160 |
-
|
| 161 |
-
value="medium",
|
| 162 |
-
info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
| 163 |
)
|
| 164 |
-
|
| 165 |
-
# Preview gallery for uploaded images
|
| 166 |
gr.Markdown("### Uploaded Images")
|
| 167 |
input_gallery = gr.Gallery(
|
| 168 |
-
label="Uploaded Image Previews",
|
| 169 |
-
|
| 170 |
-
height=150, # Reduced height slightly if needed
|
| 171 |
-
object_fit="contain", # Ensure gallery previews also fit well
|
| 172 |
-
show_label=False # Hide the label text above the gallery
|
| 173 |
)
|
| 174 |
-
|
| 175 |
-
# Analysis button
|
| 176 |
analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
|
| 177 |
-
|
| 178 |
-
# Hidden state component to store image info
|
| 179 |
image_state = gr.State([])
|
| 180 |
filename_state = gr.State([])
|
| 181 |
-
|
| 182 |
-
# CSV download component
|
| 183 |
-
csv_download = gr.File(label="Download CSV Results") # Clarified label
|
| 184 |
|
| 185 |
# Right column: Display area
|
| 186 |
with gr.Column(scale=2):
|
| 187 |
-
#
|
| 188 |
-
# Use object_fit='contain' and set height. Width will adapt.
|
| 189 |
current_image = gr.Image(
|
| 190 |
label="Current Image",
|
| 191 |
-
height=600,
|
| 192 |
-
# width=1000, # REMOVED fixed width
|
| 193 |
type="filepath",
|
| 194 |
-
object_fit="contain", #
|
|
|
|
| 195 |
show_fullscreen_button=True,
|
| 196 |
-
show_download_button=False,
|
| 197 |
-
show_share_button=False,
|
| 198 |
-
show_label=False
|
| 199 |
-
# Removed elem_classes="image-container" as object_fit handles it
|
| 200 |
)
|
| 201 |
|
| 202 |
-
# Navigation row
|
| 203 |
with gr.Row():
|
| 204 |
prev_button = gr.Button("← Previous", size="sm")
|
| 205 |
-
image_counter = gr.Markdown("0 of 0", elem_id="image-counter")
|
| 206 |
next_button = gr.Button("Next →", size="sm")
|
| 207 |
|
| 208 |
-
# Alt-text heading and output
|
| 209 |
gr.Markdown("### Generated Alt-text")
|
| 210 |
-
|
| 211 |
-
# Alt-text
|
| 212 |
analysis_text = gr.Textbox(
|
| 213 |
-
label="Generated Text",
|
| 214 |
-
value="Upload images and click 'Generate Alt-Text'.",
|
| 215 |
-
lines=6,
|
| 216 |
-
max_lines=10,
|
| 217 |
-
interactive=True, # Allow user to edit if desired? Set back to False if not.
|
| 218 |
-
show_label=False # Hide the label text
|
| 219 |
)
|
| 220 |
-
|
| 221 |
-
# Hidden state for gallery navigation
|
| 222 |
current_index = gr.State(0)
|
| 223 |
all_images = gr.State([])
|
| 224 |
all_results = gr.State([])
|
| 225 |
|
| 226 |
-
#
|
|
|
|
| 227 |
def handle_upload(files, current_paths, current_filenames):
|
| 228 |
-
# Append new files to existing ones if needed, or replace
|
| 229 |
-
# This version replaces existing uploads each time
|
| 230 |
file_paths = []
|
| 231 |
file_names = []
|
| 232 |
-
if files:
|
| 233 |
for file in files:
|
| 234 |
file_paths.append(file.name)
|
| 235 |
-
# Extract filename without path or extension for later use
|
| 236 |
file_names.append(get_base_filename(file.name))
|
| 237 |
-
# Reset view if new files are uploaded
|
| 238 |
return file_paths, file_paths, file_names, 0, None, "0 of 0", "Upload images and click 'Generate Alt-Text'."
|
| 239 |
|
| 240 |
upload_button.upload(
|
| 241 |
fn=handle_upload,
|
| 242 |
-
inputs=[upload_button, image_state, filename_state],
|
| 243 |
-
outputs=[input_gallery, image_state, filename_state,
|
| 244 |
-
current_index, current_image, image_counter, analysis_text]
|
| 245 |
)
|
| 246 |
|
| 247 |
-
#
|
| 248 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 249 |
if not image_paths:
|
| 250 |
-
|
| 251 |
-
return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None # No CSV path
|
| 252 |
|
| 253 |
-
# Get system prompt based on length selection
|
| 254 |
sys_prompt = get_sys_prompt(length_choice)
|
| 255 |
-
|
| 256 |
image_results = []
|
| 257 |
-
analysis_progress = gr.Progress(track_tqdm=True)
|
| 258 |
|
| 259 |
for i, image_path in enumerate(analysis_progress.tqdm(image_paths, desc="Analyzing Images")):
|
| 260 |
-
|
| 261 |
-
if i < len(filenames) and filenames[i]:
|
| 262 |
-
image_id = filenames[i]
|
| 263 |
-
else:
|
| 264 |
-
# Fallback if filename extraction failed or list mismatch
|
| 265 |
-
image_id = f"Image_{i+1}_{os.path.basename(image_path)}"
|
| 266 |
-
|
| 267 |
-
|
| 268 |
try:
|
| 269 |
-
# Open the image file for analysis
|
| 270 |
img = Image.open(image_path)
|
| 271 |
-
prompt0 = prompt_new()
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
#
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
#
|
| 278 |
-
# Note: This check might need adjustment based on how OpenRouterAPI handles different model endpoints/APIs
|
| 279 |
-
is_experimental_gemini = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
|
| 280 |
-
is_google_gemini = model_name.startswith("google/gemini")
|
| 281 |
-
|
| 282 |
-
client_to_use = OR # Default to standard OpenRouter client
|
| 283 |
-
|
| 284 |
-
# Example logic: Use dedicated client if API key and specific model match
|
| 285 |
-
# Adjust this based on your OpenRouterAPI class capabilities
|
| 286 |
-
# if is_experimental_gemini and gemini: # And potentially check if gemini client is configured
|
| 287 |
-
# client_to_use = gemini
|
| 288 |
-
# elif is_google_gemini and gemini:
|
| 289 |
-
# client_to_use = gemini # Or maybe all google models use the specific client?
|
| 290 |
|
| 291 |
result = client_to_use.generate_caption(
|
| 292 |
-
img,
|
| 293 |
-
|
| 294 |
-
max_image_size=512, # Consider if this should be configurable
|
| 295 |
-
prompt=prompt0,
|
| 296 |
-
prompt_dev=sys_prompt,
|
| 297 |
-
temperature=1 # Consider if this should be configurable
|
| 298 |
)
|
| 299 |
-
|
| 300 |
-
# Add to results
|
| 301 |
-
image_results.append({
|
| 302 |
-
"image_id": image_id,
|
| 303 |
-
"content": result.strip() # Trim whitespace
|
| 304 |
-
})
|
| 305 |
-
|
| 306 |
except FileNotFoundError:
|
| 307 |
error_message = f"Error: File not found at path '{image_path}'"
|
| 308 |
-
print(error_message)
|
| 309 |
image_results.append({"image_id": image_id, "content": error_message})
|
| 310 |
except Exception as e:
|
| 311 |
error_message = f"Error processing {image_id}: {str(e)}"
|
| 312 |
-
print(error_message)
|
| 313 |
-
image_results.append({
|
| 314 |
-
"image_id": image_id,
|
| 315 |
-
"content": error_message
|
| 316 |
-
})
|
| 317 |
|
| 318 |
-
# Create a CSV file for download
|
| 319 |
csv_path = create_csv_file_simple(image_results)
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
if image_results
|
| 323 |
-
initial_image = image_paths[0]
|
| 324 |
-
initial_counter = f"1 of {len(image_paths)}"
|
| 325 |
-
initial_text = image_results[0]["content"]
|
| 326 |
-
else: # Should not happen if image_paths is not empty, but good fallback
|
| 327 |
-
initial_image = None
|
| 328 |
-
initial_text = "Analysis complete, but no results generated."
|
| 329 |
-
initial_counter = "0 of 0"
|
| 330 |
|
| 331 |
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 332 |
initial_text, csv_path)
|
| 333 |
|
| 334 |
-
|
| 335 |
-
# Function to navigate to previous image
|
| 336 |
def go_to_prev(current_idx, images, results):
|
| 337 |
-
if not images or not results or len(images) == 0:
|
| 338 |
-
return current_idx, None, "0 of 0", ""
|
| 339 |
-
|
| 340 |
-
# Calculate new index correctly wrapping around
|
| 341 |
new_idx = (current_idx - 1 + len(images)) % len(images)
|
| 342 |
counter_text = f"{new_idx + 1} of {len(images)}"
|
| 343 |
-
|
| 344 |
-
# Ensure result exists for the index
|
| 345 |
result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
|
| 346 |
-
|
| 347 |
return (new_idx, images[new_idx], counter_text, result_content)
|
| 348 |
|
| 349 |
-
#
|
| 350 |
def go_to_next(current_idx, images, results):
|
| 351 |
-
if not images or not results or len(images) == 0:
|
| 352 |
-
return current_idx, None, "0 of 0", ""
|
| 353 |
-
|
| 354 |
new_idx = (current_idx + 1) % len(images)
|
| 355 |
counter_text = f"{new_idx + 1} of {len(images)}"
|
| 356 |
-
|
| 357 |
-
# Ensure result exists for the index
|
| 358 |
result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
|
| 359 |
-
|
| 360 |
return (new_idx, images[new_idx], counter_text, result_content)
|
| 361 |
|
| 362 |
-
# Connect
|
| 363 |
analyze_button.click(
|
| 364 |
fn=analyze_images,
|
| 365 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 366 |
-
outputs=[
|
| 367 |
-
|
| 368 |
-
analysis_text, csv_download
|
| 369 |
-
]
|
| 370 |
)
|
| 371 |
|
| 372 |
# Connect navigation buttons
|
| 373 |
prev_button.click(
|
| 374 |
-
fn=go_to_prev,
|
| 375 |
-
|
| 376 |
-
outputs=[current_index, current_image, image_counter, analysis_text],
|
| 377 |
-
# Add queue=False if navigation should be instant and not wait for analysis
|
| 378 |
-
queue=False
|
| 379 |
)
|
| 380 |
-
|
| 381 |
next_button.click(
|
| 382 |
-
fn=go_to_next,
|
| 383 |
-
|
| 384 |
-
outputs=[current_index, current_image, image_counter, analysis_text],
|
| 385 |
-
# Add queue=False if navigation should be instant
|
| 386 |
-
queue=False
|
| 387 |
)
|
| 388 |
|
| 389 |
-
#
|
| 390 |
with gr.Accordion("About", open=False):
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
- Upload one or more images using the 'Click to Upload Images' button.
|
| 397 |
-
- Select the AI model and desired response length.
|
| 398 |
-
- Click 'Generate Alt-Text'. Processing time depends on the number of images and the selected model.
|
| 399 |
-
- View the generated text for each image using the Previous and Next buttons.
|
| 400 |
-
- Download a CSV file containing all results using the 'Download CSV Results' link.
|
| 401 |
-
|
| 402 |
-
Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme.
|
| 403 |
-
|
| 404 |
-
If you find any bugs, have problems, or have suggestions, please feel free to get in touch:
|
| 405 |
-
chris.addis@nhm.ac.uk
|
| 406 |
-
""")
|
| 407 |
|
| 408 |
return demo
|
| 409 |
|
| 410 |
# Launch the app
|
| 411 |
if __name__ == "__main__":
|
| 412 |
-
# --- Dummy classes/functions for local execution ---
|
| 413 |
-
# You would remove these if running with your actual library files
|
| 414 |
-
# class OpenRouterAPI:
|
| 415 |
-
# def __init__(self, api_key=None, base_url=None): pass
|
| 416 |
-
# def generate_caption(self, img, model, max_image_size, prompt, prompt_dev, temperature): return f"Dummy caption for {model}"
|
| 417 |
-
# def prompt_new(): return "Describe."
|
| 418 |
-
# OR = OpenRouterAPI()
|
| 419 |
-
# gemini = OpenRouterAPI()
|
| 420 |
-
# --- End Dummy section ---
|
| 421 |
-
|
| 422 |
-
# Create dummy image files if they don't exist for local testing
|
| 423 |
-
os.makedirs("images", exist_ok=True)
|
| 424 |
-
if not os.path.exists("images/nhm_logo.png"):
|
| 425 |
-
Image.new('RGB', (60, 30), color = 'red').save('images/nhm_logo.png')
|
| 426 |
-
if not os.path.exists("images/nml_logo.png"):
|
| 427 |
-
Image.new('RGB', (60, 30), color = 'blue').save('images/nml_logo.png')
|
| 428 |
-
|
| 429 |
|
| 430 |
app = create_demo()
|
| 431 |
-
app.launch()
|
|
|
|
| 6 |
import requests
|
| 7 |
import json
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
+
# import openai
|
| 10 |
import base64
|
| 11 |
import csv
|
| 12 |
import tempfile
|
|
|
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
from io import BytesIO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
OR = OpenRouterAPI()
|
| 23 |
# Ensure GEMINI_API_KEY is set in your environment or .env file
|
| 24 |
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
| 25 |
if not gemini_api_key:
|
| 26 |
print("Warning: GEMINI_API_KEY environment variable not set. Using placeholder.")
|
| 27 |
+
# Handle the case where the key might be missing
|
| 28 |
+
gemini = OpenRouterAPI(api_key=gemini_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 29 |
|
| 30 |
# Path for storing user preferences
|
| 31 |
PREFERENCES_FILE = "data/user_preferences.csv"
|
|
|
|
| 44 |
|
| 45 |
def create_csv_file_simple(results):
|
| 46 |
"""Create a CSV file from the results and return the path"""
|
|
|
|
| 47 |
try:
|
|
|
|
| 48 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, newline='', encoding='utf-8') as f:
|
| 49 |
path = f.name
|
| 50 |
writer = csv.writer(f)
|
|
|
|
| 51 |
writer.writerow(['image_id', 'content'])
|
|
|
|
| 52 |
for result in results:
|
| 53 |
writer.writerow([
|
| 54 |
result.get('image_id', ''),
|
|
|
|
| 60 |
return None
|
| 61 |
|
| 62 |
|
|
|
|
| 63 |
def get_base_filename(filepath):
|
| 64 |
if not filepath:
|
| 65 |
return ""
|
|
|
|
| 66 |
basename = os.path.basename(filepath)
|
|
|
|
| 67 |
filename = os.path.splitext(basename)[0]
|
| 68 |
return filename
|
| 69 |
|
| 70 |
# Define the Gradio interface
|
| 71 |
def create_demo():
|
| 72 |
+
# --- Reintroduce CSS ---
|
| 73 |
+
custom_css = """
|
| 74 |
+
/* Target the img element *inside* the component with ID 'current-image-display' */
|
| 75 |
+
#current-image-display img {
|
| 76 |
+
object-fit: contain !important; /* Scale down while maintaining aspect ratio */
|
| 77 |
+
width: 100% !important; /* Make image width fill the container */
|
| 78 |
+
height: 100% !important; /* Make image height fill the container */
|
| 79 |
+
}
|
| 80 |
+
/* Optional: Ensure the container itself respects the height */
|
| 81 |
+
#current-image-display {
|
| 82 |
+
height: 600px; /* Match the height set in gr.Image */
|
| 83 |
+
/* width: 100%; /* Usually takes column width */
|
| 84 |
+
/* overflow: hidden; /* Can prevent potential overflow */
|
| 85 |
+
}
|
| 86 |
+
"""
|
| 87 |
+
# --- Pass css to gr.Blocks ---
|
| 88 |
+
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
|
| 89 |
with gr.Row():
|
| 90 |
with gr.Column(scale=3):
|
| 91 |
gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
|
|
|
|
| 93 |
gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
|
| 94 |
with gr.Column(scale=1):
|
| 95 |
with gr.Row():
|
|
|
|
| 96 |
gr.Image("images/nhm_logo.png", show_label=False, height=120,
|
| 97 |
interactive=False, show_download_button=False,
|
| 98 |
show_share_button=False, show_fullscreen_button=False,
|
| 99 |
+
container=False, elem_id="nhm-logo")
|
| 100 |
gr.Image("images/nml_logo.png", show_label=False, height=120,
|
| 101 |
interactive=False, show_download_button=False,
|
| 102 |
show_share_button=False, show_fullscreen_button=False,
|
| 103 |
+
container=False, elem_id="nml-logo")
|
| 104 |
|
| 105 |
with gr.Row():
|
| 106 |
# Left column: Controls and uploads
|
| 107 |
with gr.Column(scale=1):
|
|
|
|
| 108 |
upload_button = gr.UploadButton(
|
| 109 |
"Click to Upload Images",
|
| 110 |
file_types=["image"],
|
| 111 |
file_count="multiple"
|
| 112 |
)
|
|
|
|
|
|
|
| 113 |
model_choices = [
|
|
|
|
| 114 |
("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
|
| 115 |
+
("GPT-4.1 Nano", "gpt-4.1-nano"), ("GPT-4.1 Mini", "gpt-4.1-mini"),
|
| 116 |
+
("GPT-4.1", "gpt-4.1"), ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
|
| 118 |
("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
|
|
|
|
| 119 |
("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
|
| 120 |
("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
|
| 121 |
]
|
|
|
|
|
|
|
| 122 |
default_model_internal_value = "google/gemini-2.0-flash-001"
|
|
|
|
|
|
|
| 123 |
model_choice = gr.Dropdown(
|
| 124 |
+
choices=model_choices, label="Select Model",
|
| 125 |
+
value=default_model_internal_value, visible=True
|
|
|
|
|
|
|
|
|
|
| 126 |
)
|
|
|
|
|
|
|
|
|
|
| 127 |
length_choice = gr.Radio(
|
| 128 |
+
choices=["short", "medium", "long"], label="Response Length",
|
| 129 |
+
value="medium", info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
|
|
|
|
|
|
|
| 130 |
)
|
|
|
|
|
|
|
| 131 |
gr.Markdown("### Uploaded Images")
|
| 132 |
input_gallery = gr.Gallery(
|
| 133 |
+
label="Uploaded Image Previews", columns=3, height=150,
|
| 134 |
+
object_fit="contain", show_label=False
|
|
|
|
|
|
|
|
|
|
| 135 |
)
|
|
|
|
|
|
|
| 136 |
analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
|
|
|
|
|
|
|
| 137 |
image_state = gr.State([])
|
| 138 |
filename_state = gr.State([])
|
| 139 |
+
csv_download = gr.File(label="Download CSV Results")
|
|
|
|
|
|
|
| 140 |
|
| 141 |
# Right column: Display area
|
| 142 |
with gr.Column(scale=2):
|
| 143 |
+
# --- Use elem_id for CSS targeting, remove unsupported object_fit ---
|
|
|
|
| 144 |
current_image = gr.Image(
|
| 145 |
label="Current Image",
|
| 146 |
+
height=600,
|
|
|
|
| 147 |
type="filepath",
|
| 148 |
+
# object_fit="contain", # REMOVED - Unsupported argument
|
| 149 |
+
elem_id="current-image-display", # ADDED - for CSS targeting
|
| 150 |
show_fullscreen_button=True,
|
| 151 |
+
show_download_button=False,
|
| 152 |
+
show_share_button=False,
|
| 153 |
+
show_label=False
|
|
|
|
| 154 |
)
|
| 155 |
|
|
|
|
| 156 |
with gr.Row():
|
| 157 |
prev_button = gr.Button("← Previous", size="sm")
|
| 158 |
+
image_counter = gr.Markdown("0 of 0", elem_id="image-counter")
|
| 159 |
next_button = gr.Button("Next →", size="sm")
|
| 160 |
|
|
|
|
| 161 |
gr.Markdown("### Generated Alt-text")
|
|
|
|
|
|
|
| 162 |
analysis_text = gr.Textbox(
|
| 163 |
+
label="Generated Text",
|
| 164 |
+
value="Upload images and click 'Generate Alt-Text'.",
|
| 165 |
+
lines=6, max_lines=10, interactive=True, show_label=False
|
|
|
|
|
|
|
|
|
|
| 166 |
)
|
|
|
|
|
|
|
| 167 |
current_index = gr.State(0)
|
| 168 |
all_images = gr.State([])
|
| 169 |
all_results = gr.State([])
|
| 170 |
|
| 171 |
+
# --- Functions (handle_upload, analyze_images, navigators) remain the same ---
|
| 172 |
+
# Handle file uploads
|
| 173 |
def handle_upload(files, current_paths, current_filenames):
|
|
|
|
|
|
|
| 174 |
file_paths = []
|
| 175 |
file_names = []
|
| 176 |
+
if files:
|
| 177 |
for file in files:
|
| 178 |
file_paths.append(file.name)
|
|
|
|
| 179 |
file_names.append(get_base_filename(file.name))
|
|
|
|
| 180 |
return file_paths, file_paths, file_names, 0, None, "0 of 0", "Upload images and click 'Generate Alt-Text'."
|
| 181 |
|
| 182 |
upload_button.upload(
|
| 183 |
fn=handle_upload,
|
| 184 |
+
inputs=[upload_button, image_state, filename_state],
|
| 185 |
+
outputs=[input_gallery, image_state, filename_state,
|
| 186 |
+
current_index, current_image, image_counter, analysis_text]
|
| 187 |
)
|
| 188 |
|
| 189 |
+
# Analyze images
|
| 190 |
def analyze_images(image_paths, model_choice, length_choice, filenames):
|
| 191 |
if not image_paths:
|
| 192 |
+
return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None
|
|
|
|
| 193 |
|
|
|
|
| 194 |
sys_prompt = get_sys_prompt(length_choice)
|
|
|
|
| 195 |
image_results = []
|
| 196 |
+
analysis_progress = gr.Progress(track_tqdm=True)
|
| 197 |
|
| 198 |
for i, image_path in enumerate(analysis_progress.tqdm(image_paths, desc="Analyzing Images")):
|
| 199 |
+
image_id = filenames[i] if i < len(filenames) and filenames[i] else f"Image_{i+1}_{os.path.basename(image_path)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
try:
|
|
|
|
| 201 |
img = Image.open(image_path)
|
| 202 |
+
prompt0 = prompt_new()
|
| 203 |
+
model_name = model_choice
|
| 204 |
+
client_to_use = OR # Default client
|
| 205 |
+
# Add logic here if you need to switch between OR and gemini clients based on model_name
|
| 206 |
+
# Example:
|
| 207 |
+
# if model_name.startswith("google/gemini") and gemini:
|
| 208 |
+
# client_to_use = gemini
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
result = client_to_use.generate_caption(
|
| 211 |
+
img, model=model_name, max_image_size=512,
|
| 212 |
+
prompt=prompt0, prompt_dev=sys_prompt, temperature=1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
)
|
| 214 |
+
image_results.append({"image_id": image_id, "content": result.strip()})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
except FileNotFoundError:
|
| 216 |
error_message = f"Error: File not found at path '{image_path}'"
|
| 217 |
+
print(error_message)
|
| 218 |
image_results.append({"image_id": image_id, "content": error_message})
|
| 219 |
except Exception as e:
|
| 220 |
error_message = f"Error processing {image_id}: {str(e)}"
|
| 221 |
+
print(error_message)
|
| 222 |
+
image_results.append({"image_id": image_id, "content": error_message})
|
|
|
|
|
|
|
|
|
|
| 223 |
|
|
|
|
| 224 |
csv_path = create_csv_file_simple(image_results)
|
| 225 |
+
initial_image = image_paths[0] if image_paths else None
|
| 226 |
+
initial_counter = f"1 of {len(image_paths)}" if image_paths else "0 of 0"
|
| 227 |
+
initial_text = image_results[0]["content"] if image_results else "Analysis complete, but no results generated."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
return (image_paths, image_results, 0, initial_image, initial_counter,
|
| 230 |
initial_text, csv_path)
|
| 231 |
|
| 232 |
+
# Navigate previous
|
|
|
|
| 233 |
def go_to_prev(current_idx, images, results):
|
| 234 |
+
if not images or not results or len(images) == 0:
|
| 235 |
+
return current_idx, None, "0 of 0", ""
|
|
|
|
|
|
|
| 236 |
new_idx = (current_idx - 1 + len(images)) % len(images)
|
| 237 |
counter_text = f"{new_idx + 1} of {len(images)}"
|
|
|
|
|
|
|
| 238 |
result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
|
|
|
|
| 239 |
return (new_idx, images[new_idx], counter_text, result_content)
|
| 240 |
|
| 241 |
+
# Navigate next
|
| 242 |
def go_to_next(current_idx, images, results):
|
| 243 |
+
if not images or not results or len(images) == 0:
|
| 244 |
+
return current_idx, None, "0 of 0", ""
|
|
|
|
| 245 |
new_idx = (current_idx + 1) % len(images)
|
| 246 |
counter_text = f"{new_idx + 1} of {len(images)}"
|
|
|
|
|
|
|
| 247 |
result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
|
|
|
|
| 248 |
return (new_idx, images[new_idx], counter_text, result_content)
|
| 249 |
|
| 250 |
+
# Connect analyze button
|
| 251 |
analyze_button.click(
|
| 252 |
fn=analyze_images,
|
| 253 |
inputs=[image_state, model_choice, length_choice, filename_state],
|
| 254 |
+
outputs=[all_images, all_results, current_index, current_image, image_counter,
|
| 255 |
+
analysis_text, csv_download]
|
|
|
|
|
|
|
| 256 |
)
|
| 257 |
|
| 258 |
# Connect navigation buttons
|
| 259 |
prev_button.click(
|
| 260 |
+
fn=go_to_prev, inputs=[current_index, all_images, all_results],
|
| 261 |
+
outputs=[current_index, current_image, image_counter, analysis_text], queue=False
|
|
|
|
|
|
|
|
|
|
| 262 |
)
|
|
|
|
| 263 |
next_button.click(
|
| 264 |
+
fn=go_to_next, inputs=[current_index, all_images, all_results],
|
| 265 |
+
outputs=[current_index, current_image, image_counter, analysis_text], queue=False
|
|
|
|
|
|
|
|
|
|
| 266 |
)
|
| 267 |
|
| 268 |
+
# About section
|
| 269 |
with gr.Accordion("About", open=False):
|
| 270 |
+
gr.Markdown("""
|
| 271 |
+
## About this demo
|
| 272 |
+
... [content unchanged] ...
|
| 273 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
return demo
|
| 276 |
|
| 277 |
# Launch the app
|
| 278 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
app = create_demo()
|
| 281 |
+
app.launch()
|