Chris Addis commited on
Commit
af23186
·
1 Parent(s): b81c5d1
Files changed (1) hide show
  1. app.py +87 -237
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  import requests
7
  import json
8
  from dotenv import load_dotenv
9
- # import openai # Assuming openai is not directly used in this snippet anymore
10
  import base64
11
  import csv
12
  import tempfile
@@ -18,32 +18,14 @@ if os.path.exists(".env"):
18
  load_dotenv()
19
 
20
  from io import BytesIO
21
- # import numpy as np # Already imported
22
- # import requests # Already imported
23
- # from PIL import Image # Already imported
24
-
25
- # Assume these are defined elsewhere or replace with actual implementations if needed
26
- class OpenRouterAPI:
27
- def __init__(self, api_key=None, base_url=None):
28
- pass
29
- def generate_caption(self, img, model, max_image_size, prompt, prompt_dev, temperature):
30
- # Dummy implementation for testing
31
- print(f"Generating caption with model: {model}")
32
- return f"Generated caption for image using {model}."
33
-
34
- def prompt_new():
35
- # Dummy implementation
36
- return "Describe this image."
37
- # --- End Dummy implementations ---
38
-
39
 
40
  OR = OpenRouterAPI()
41
  # Ensure GEMINI_API_KEY is set in your environment or .env file
42
  gemini_api_key = os.getenv("GEMINI_API_KEY")
43
  if not gemini_api_key:
44
  print("Warning: GEMINI_API_KEY environment variable not set. Using placeholder.")
45
- # Handle the case where the key might be missing, perhaps disable the Gemini models or use a default key if applicable
46
- gemini = OpenRouterAPI(api_key=gemini_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/") # Note: This base_url looks like OpenAI, ensure it's correct for Gemini via OpenRouter or direct API
47
 
48
  # Path for storing user preferences
49
  PREFERENCES_FILE = "data/user_preferences.csv"
@@ -62,15 +44,11 @@ def get_sys_prompt(length="medium"):
62
 
63
  def create_csv_file_simple(results):
64
  """Create a CSV file from the results and return the path"""
65
- # Create a temporary file
66
  try:
67
- # Use NamedTemporaryFile to simplify cleanup
68
  with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, newline='', encoding='utf-8') as f:
69
  path = f.name
70
  writer = csv.writer(f)
71
- # Write header
72
  writer.writerow(['image_id', 'content'])
73
- # Write data
74
  for result in results:
75
  writer.writerow([
76
  result.get('image_id', ''),
@@ -82,21 +60,32 @@ def create_csv_file_simple(results):
82
  return None
83
 
84
 
85
- # Extract original filename without path or extension
86
  def get_base_filename(filepath):
87
  if not filepath:
88
  return ""
89
- # Get the basename (filename with extension)
90
  basename = os.path.basename(filepath)
91
- # Remove extension
92
  filename = os.path.splitext(basename)[0]
93
  return filename
94
 
95
  # Define the Gradio interface
96
  def create_demo():
97
- # Removed custom_css as we will use the built-in object_fit parameter
98
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo: # Removed css=custom_css
99
- # Replace the existing logo code section:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  with gr.Row():
101
  with gr.Column(scale=3):
102
  gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
@@ -104,328 +93,189 @@ def create_demo():
104
  gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
105
  with gr.Column(scale=1):
106
  with gr.Row():
107
- # Use gr.Image with all interactive features disabled
108
  gr.Image("images/nhm_logo.png", show_label=False, height=120,
109
  interactive=False, show_download_button=False,
110
  show_share_button=False, show_fullscreen_button=False,
111
- container=False, elem_id="nhm-logo") # Added elem_id for clarity
112
  gr.Image("images/nml_logo.png", show_label=False, height=120,
113
  interactive=False, show_download_button=False,
114
  show_share_button=False, show_fullscreen_button=False,
115
- container=False, elem_id="nml-logo") # Added elem_id for clarity
116
 
117
  with gr.Row():
118
  # Left column: Controls and uploads
119
  with gr.Column(scale=1):
120
- # Upload interface
121
  upload_button = gr.UploadButton(
122
  "Click to Upload Images",
123
  file_types=["image"],
124
  file_count="multiple"
125
  )
126
-
127
- # Define choices as a list of tuples: (Display Name, Internal Value)
128
  model_choices = [
129
- # Gemini
130
  ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
131
- # GPT-4.1 Series
132
- ("GPT-4.1 Nano", "gpt-4.1-nano"),
133
- ("GPT-4.1 Mini", "gpt-4.1-mini"),
134
- ("GPT-4.1", "gpt-4.1"),
135
- ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
136
- # Other Models
137
  ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
138
  ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
139
- # Experimental Models
140
  ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
141
  ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
142
  ]
143
-
144
- # Find the internal value of the default choice
145
  default_model_internal_value = "google/gemini-2.0-flash-001"
146
-
147
- # Add model selection dropdown
148
  model_choice = gr.Dropdown(
149
- choices=model_choices,
150
- label="Select Model",
151
- value=default_model_internal_value, # Use the internal value for the default
152
- # info="Choose the language model to use." # Optional: Add extra info tooltip
153
- visible=True
154
  )
155
-
156
-
157
- # Add response length selection
158
  length_choice = gr.Radio(
159
- choices=["short", "medium", "long"],
160
- label="Response Length",
161
- value="medium",
162
- info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
163
  )
164
-
165
- # Preview gallery for uploaded images
166
  gr.Markdown("### Uploaded Images")
167
  input_gallery = gr.Gallery(
168
- label="Uploaded Image Previews", # Added label
169
- columns=3,
170
- height=150, # Reduced height slightly if needed
171
- object_fit="contain", # Ensure gallery previews also fit well
172
- show_label=False # Hide the label text above the gallery
173
  )
174
-
175
- # Analysis button
176
  analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
177
-
178
- # Hidden state component to store image info
179
  image_state = gr.State([])
180
  filename_state = gr.State([])
181
-
182
- # CSV download component
183
- csv_download = gr.File(label="Download CSV Results") # Clarified label
184
 
185
  # Right column: Display area
186
  with gr.Column(scale=2):
187
- # Directly place the Image component here
188
- # Use object_fit='contain' and set height. Width will adapt.
189
  current_image = gr.Image(
190
  label="Current Image",
191
- height=600, # Set the maximum desired height
192
- # width=1000, # REMOVED fixed width
193
  type="filepath",
194
- object_fit="contain", # ADDED: Scale image while preserving aspect ratio
 
195
  show_fullscreen_button=True,
196
- show_download_button=False, # Keep false as per original code
197
- show_share_button=False, # Keep false as per original code
198
- show_label=False # Hide the "Current Image" label above the image
199
- # Removed elem_classes="image-container" as object_fit handles it
200
  )
201
 
202
- # Navigation row
203
  with gr.Row():
204
  prev_button = gr.Button("← Previous", size="sm")
205
- image_counter = gr.Markdown("0 of 0", elem_id="image-counter") # Default text
206
  next_button = gr.Button("Next →", size="sm")
207
 
208
- # Alt-text heading and output
209
  gr.Markdown("### Generated Alt-text")
210
-
211
- # Alt-text
212
  analysis_text = gr.Textbox(
213
- label="Generated Text", # Added label
214
- value="Upload images and click 'Generate Alt-Text'.", # Initial message
215
- lines=6,
216
- max_lines=10,
217
- interactive=True, # Allow user to edit if desired? Set back to False if not.
218
- show_label=False # Hide the label text
219
  )
220
-
221
- # Hidden state for gallery navigation
222
  current_index = gr.State(0)
223
  all_images = gr.State([])
224
  all_results = gr.State([])
225
 
226
- # Handle file uploads - store files for use during analysis
 
227
  def handle_upload(files, current_paths, current_filenames):
228
- # Append new files to existing ones if needed, or replace
229
- # This version replaces existing uploads each time
230
  file_paths = []
231
  file_names = []
232
- if files: # Check if files is not None
233
  for file in files:
234
  file_paths.append(file.name)
235
- # Extract filename without path or extension for later use
236
  file_names.append(get_base_filename(file.name))
237
- # Reset view if new files are uploaded
238
  return file_paths, file_paths, file_names, 0, None, "0 of 0", "Upload images and click 'Generate Alt-Text'."
239
 
240
  upload_button.upload(
241
  fn=handle_upload,
242
- inputs=[upload_button, image_state, filename_state], # Pass current state if appending needed
243
- outputs=[input_gallery, image_state, filename_state, # Outputs updated state
244
- current_index, current_image, image_counter, analysis_text] # Reset display
245
  )
246
 
247
- # Function to analyze images
248
  def analyze_images(image_paths, model_choice, length_choice, filenames):
249
  if not image_paths:
250
- # Return state that clears/resets the output fields
251
- return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None # No CSV path
252
 
253
- # Get system prompt based on length selection
254
  sys_prompt = get_sys_prompt(length_choice)
255
-
256
  image_results = []
257
- analysis_progress = gr.Progress(track_tqdm=True) # Add progress bar
258
 
259
  for i, image_path in enumerate(analysis_progress.tqdm(image_paths, desc="Analyzing Images")):
260
- # Use original filename as image_id if available
261
- if i < len(filenames) and filenames[i]:
262
- image_id = filenames[i]
263
- else:
264
- # Fallback if filename extraction failed or list mismatch
265
- image_id = f"Image_{i+1}_{os.path.basename(image_path)}"
266
-
267
-
268
  try:
269
- # Open the image file for analysis
270
  img = Image.open(image_path)
271
- prompt0 = prompt_new() # Using the new prompt function
272
-
273
- # Determine the actual model name (strip extra labels)
274
- # Using the selected internal value directly is safer
275
- model_name = model_choice # Already the internal value from dropdown
276
-
277
- # Check if this is one of the Gemini models that needs special handling
278
- # Note: This check might need adjustment based on how OpenRouterAPI handles different model endpoints/APIs
279
- is_experimental_gemini = "gemini-2.5-pro" in model_name or "gemini-2.0-flash-thinking" in model_name
280
- is_google_gemini = model_name.startswith("google/gemini")
281
-
282
- client_to_use = OR # Default to standard OpenRouter client
283
-
284
- # Example logic: Use dedicated client if API key and specific model match
285
- # Adjust this based on your OpenRouterAPI class capabilities
286
- # if is_experimental_gemini and gemini: # And potentially check if gemini client is configured
287
- # client_to_use = gemini
288
- # elif is_google_gemini and gemini:
289
- # client_to_use = gemini # Or maybe all google models use the specific client?
290
 
291
  result = client_to_use.generate_caption(
292
- img,
293
- model=model_name,
294
- max_image_size=512, # Consider if this should be configurable
295
- prompt=prompt0,
296
- prompt_dev=sys_prompt,
297
- temperature=1 # Consider if this should be configurable
298
  )
299
-
300
- # Add to results
301
- image_results.append({
302
- "image_id": image_id,
303
- "content": result.strip() # Trim whitespace
304
- })
305
-
306
  except FileNotFoundError:
307
  error_message = f"Error: File not found at path '{image_path}'"
308
- print(error_message) # Log error
309
  image_results.append({"image_id": image_id, "content": error_message})
310
  except Exception as e:
311
  error_message = f"Error processing {image_id}: {str(e)}"
312
- print(error_message) # Log error
313
- image_results.append({
314
- "image_id": image_id,
315
- "content": error_message
316
- })
317
 
318
- # Create a CSV file for download
319
  csv_path = create_csv_file_simple(image_results)
320
-
321
- # Set up initial display with first image result
322
- if image_results: # Check if there are results (even errors)
323
- initial_image = image_paths[0]
324
- initial_counter = f"1 of {len(image_paths)}"
325
- initial_text = image_results[0]["content"]
326
- else: # Should not happen if image_paths is not empty, but good fallback
327
- initial_image = None
328
- initial_text = "Analysis complete, but no results generated."
329
- initial_counter = "0 of 0"
330
 
331
  return (image_paths, image_results, 0, initial_image, initial_counter,
332
  initial_text, csv_path)
333
 
334
-
335
- # Function to navigate to previous image
336
  def go_to_prev(current_idx, images, results):
337
- if not images or not results or len(images) == 0: # Check results too
338
- return current_idx, None, "0 of 0", "" # Return None for image path
339
-
340
- # Calculate new index correctly wrapping around
341
  new_idx = (current_idx - 1 + len(images)) % len(images)
342
  counter_text = f"{new_idx + 1} of {len(images)}"
343
-
344
- # Ensure result exists for the index
345
  result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
346
-
347
  return (new_idx, images[new_idx], counter_text, result_content)
348
 
349
- # Function to navigate to next image
350
  def go_to_next(current_idx, images, results):
351
- if not images or not results or len(images) == 0: # Check results too
352
- return current_idx, None, "0 of 0", "" # Return None for image path
353
-
354
  new_idx = (current_idx + 1) % len(images)
355
  counter_text = f"{new_idx + 1} of {len(images)}"
356
-
357
- # Ensure result exists for the index
358
  result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
359
-
360
  return (new_idx, images[new_idx], counter_text, result_content)
361
 
362
- # Connect the analyze button
363
  analyze_button.click(
364
  fn=analyze_images,
365
  inputs=[image_state, model_choice, length_choice, filename_state],
366
- outputs=[
367
- all_images, all_results, current_index, current_image, image_counter,
368
- analysis_text, csv_download
369
- ]
370
  )
371
 
372
  # Connect navigation buttons
373
  prev_button.click(
374
- fn=go_to_prev,
375
- inputs=[current_index, all_images, all_results],
376
- outputs=[current_index, current_image, image_counter, analysis_text],
377
- # Add queue=False if navigation should be instant and not wait for analysis
378
- queue=False
379
  )
380
-
381
  next_button.click(
382
- fn=go_to_next,
383
- inputs=[current_index, all_images, all_results],
384
- outputs=[current_index, current_image, image_counter, analysis_text],
385
- # Add queue=False if navigation should be instant
386
- queue=False
387
  )
388
 
389
- # Optional: Add additional information
390
  with gr.Accordion("About", open=False):
391
- gr.Markdown("""
392
- ## About this demo
393
-
394
- This demo generates alternative text for museum object images using various AI models.
395
-
396
- - Upload one or more images using the 'Click to Upload Images' button.
397
- - Select the AI model and desired response length.
398
- - Click 'Generate Alt-Text'. Processing time depends on the number of images and the selected model.
399
- - View the generated text for each image using the Previous and Next buttons.
400
- - Download a CSV file containing all results using the 'Download CSV Results' link.
401
-
402
- Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme.
403
-
404
- If you find any bugs, have problems, or have suggestions, please feel free to get in touch:
405
- chris.addis@nhm.ac.uk
406
- """)
407
 
408
  return demo
409
 
410
  # Launch the app
411
  if __name__ == "__main__":
412
- # --- Dummy classes/functions for local execution ---
413
- # You would remove these if running with your actual library files
414
- # class OpenRouterAPI:
415
- # def __init__(self, api_key=None, base_url=None): pass
416
- # def generate_caption(self, img, model, max_image_size, prompt, prompt_dev, temperature): return f"Dummy caption for {model}"
417
- # def prompt_new(): return "Describe."
418
- # OR = OpenRouterAPI()
419
- # gemini = OpenRouterAPI()
420
- # --- End Dummy section ---
421
-
422
- # Create dummy image files if they don't exist for local testing
423
- os.makedirs("images", exist_ok=True)
424
- if not os.path.exists("images/nhm_logo.png"):
425
- Image.new('RGB', (60, 30), color = 'red').save('images/nhm_logo.png')
426
- if not os.path.exists("images/nml_logo.png"):
427
- Image.new('RGB', (60, 30), color = 'blue').save('images/nml_logo.png')
428
-
429
 
430
  app = create_demo()
431
- app.launch() # Add share=True if you want a public link when running locally
 
6
  import requests
7
  import json
8
  from dotenv import load_dotenv
9
+ # import openai
10
  import base64
11
  import csv
12
  import tempfile
 
18
  load_dotenv()
19
 
20
  from io import BytesIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  OR = OpenRouterAPI()
23
  # Ensure GEMINI_API_KEY is set in your environment or .env file
24
  gemini_api_key = os.getenv("GEMINI_API_KEY")
25
  if not gemini_api_key:
26
  print("Warning: GEMINI_API_KEY environment variable not set. Using placeholder.")
27
+ # Handle the case where the key might be missing
28
+ gemini = OpenRouterAPI(api_key=gemini_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
29
 
30
  # Path for storing user preferences
31
  PREFERENCES_FILE = "data/user_preferences.csv"
 
44
 
45
  def create_csv_file_simple(results):
46
  """Create a CSV file from the results and return the path"""
 
47
  try:
 
48
  with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, newline='', encoding='utf-8') as f:
49
  path = f.name
50
  writer = csv.writer(f)
 
51
  writer.writerow(['image_id', 'content'])
 
52
  for result in results:
53
  writer.writerow([
54
  result.get('image_id', ''),
 
60
  return None
61
 
62
 
 
63
  def get_base_filename(filepath):
64
  if not filepath:
65
  return ""
 
66
  basename = os.path.basename(filepath)
 
67
  filename = os.path.splitext(basename)[0]
68
  return filename
69
 
70
  # Define the Gradio interface
71
  def create_demo():
72
+ # --- Reintroduce CSS ---
73
+ custom_css = """
74
+ /* Target the img element *inside* the component with ID 'current-image-display' */
75
+ #current-image-display img {
76
+ object-fit: contain !important; /* Scale down while maintaining aspect ratio */
77
+ width: 100% !important; /* Make image width fill the container */
78
+ height: 100% !important; /* Make image height fill the container */
79
+ }
80
+ /* Optional: Ensure the container itself respects the height */
81
+ #current-image-display {
82
+ height: 600px; /* Match the height set in gr.Image */
83
+ /* width: 100%; /* Usually takes column width */
84
+ /* overflow: hidden; /* Can prevent potential overflow */
85
+ }
86
+ """
87
+ # --- Pass css to gr.Blocks ---
88
+ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
89
  with gr.Row():
90
  with gr.Column(scale=3):
91
  gr.Markdown("# MATCHA: Museum Alt-Text for Cultural Heritage with AI 🍵 🌿")
 
93
  gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool. Funded by the DCMS Pilot Scheme")
94
  with gr.Column(scale=1):
95
  with gr.Row():
 
96
  gr.Image("images/nhm_logo.png", show_label=False, height=120,
97
  interactive=False, show_download_button=False,
98
  show_share_button=False, show_fullscreen_button=False,
99
+ container=False, elem_id="nhm-logo")
100
  gr.Image("images/nml_logo.png", show_label=False, height=120,
101
  interactive=False, show_download_button=False,
102
  show_share_button=False, show_fullscreen_button=False,
103
+ container=False, elem_id="nml-logo")
104
 
105
  with gr.Row():
106
  # Left column: Controls and uploads
107
  with gr.Column(scale=1):
 
108
  upload_button = gr.UploadButton(
109
  "Click to Upload Images",
110
  file_types=["image"],
111
  file_count="multiple"
112
  )
 
 
113
  model_choices = [
 
114
  ("Gemini 2.0 Flash (default)", "google/gemini-2.0-flash-001"),
115
+ ("GPT-4.1 Nano", "gpt-4.1-nano"), ("GPT-4.1 Mini", "gpt-4.1-mini"),
116
+ ("GPT-4.1", "gpt-4.1"), ("ChatGPT Latest", "openai/chatgpt-4o-latest"),
 
 
 
 
117
  ("Claude 3.7 Sonnet", "anthropic/claude-3.7-sonnet"),
118
  ("Llama 4 Maverick", "meta-llama/llama-4-maverick"),
 
119
  ("Gemini 2.5 Pro (Experimental, limited)", "gemini-2.5-pro-exp-03-25"),
120
  ("Gemini 2.0 Flash Thinking (Experimental, limited)", "gemini-2.0-flash-thinking-exp-01-21")
121
  ]
 
 
122
  default_model_internal_value = "google/gemini-2.0-flash-001"
 
 
123
  model_choice = gr.Dropdown(
124
+ choices=model_choices, label="Select Model",
125
+ value=default_model_internal_value, visible=True
 
 
 
126
  )
 
 
 
127
  length_choice = gr.Radio(
128
+ choices=["short", "medium", "long"], label="Response Length",
129
+ value="medium", info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
 
 
130
  )
 
 
131
  gr.Markdown("### Uploaded Images")
132
  input_gallery = gr.Gallery(
133
+ label="Uploaded Image Previews", columns=3, height=150,
134
+ object_fit="contain", show_label=False
 
 
 
135
  )
 
 
136
  analyze_button = gr.Button("Generate Alt-Text", variant="primary", size="lg")
 
 
137
  image_state = gr.State([])
138
  filename_state = gr.State([])
139
+ csv_download = gr.File(label="Download CSV Results")
 
 
140
 
141
  # Right column: Display area
142
  with gr.Column(scale=2):
143
+ # --- Use elem_id for CSS targeting, remove unsupported object_fit ---
 
144
  current_image = gr.Image(
145
  label="Current Image",
146
+ height=600,
 
147
  type="filepath",
148
+ # object_fit="contain", # REMOVED - Unsupported argument
149
+ elem_id="current-image-display", # ADDED - for CSS targeting
150
  show_fullscreen_button=True,
151
+ show_download_button=False,
152
+ show_share_button=False,
153
+ show_label=False
 
154
  )
155
 
 
156
  with gr.Row():
157
  prev_button = gr.Button("← Previous", size="sm")
158
+ image_counter = gr.Markdown("0 of 0", elem_id="image-counter")
159
  next_button = gr.Button("Next →", size="sm")
160
 
 
161
  gr.Markdown("### Generated Alt-text")
 
 
162
  analysis_text = gr.Textbox(
163
+ label="Generated Text",
164
+ value="Upload images and click 'Generate Alt-Text'.",
165
+ lines=6, max_lines=10, interactive=True, show_label=False
 
 
 
166
  )
 
 
167
  current_index = gr.State(0)
168
  all_images = gr.State([])
169
  all_results = gr.State([])
170
 
171
+ # --- Functions (handle_upload, analyze_images, navigators) remain the same ---
172
+ # Handle file uploads
173
  def handle_upload(files, current_paths, current_filenames):
 
 
174
  file_paths = []
175
  file_names = []
176
+ if files:
177
  for file in files:
178
  file_paths.append(file.name)
 
179
  file_names.append(get_base_filename(file.name))
 
180
  return file_paths, file_paths, file_names, 0, None, "0 of 0", "Upload images and click 'Generate Alt-Text'."
181
 
182
  upload_button.upload(
183
  fn=handle_upload,
184
+ inputs=[upload_button, image_state, filename_state],
185
+ outputs=[input_gallery, image_state, filename_state,
186
+ current_index, current_image, image_counter, analysis_text]
187
  )
188
 
189
+ # Analyze images
190
  def analyze_images(image_paths, model_choice, length_choice, filenames):
191
  if not image_paths:
192
+ return [], [], 0, None, "0 of 0", "No images uploaded to analyze.", None
 
193
 
 
194
  sys_prompt = get_sys_prompt(length_choice)
 
195
  image_results = []
196
+ analysis_progress = gr.Progress(track_tqdm=True)
197
 
198
  for i, image_path in enumerate(analysis_progress.tqdm(image_paths, desc="Analyzing Images")):
199
+ image_id = filenames[i] if i < len(filenames) and filenames[i] else f"Image_{i+1}_{os.path.basename(image_path)}"
 
 
 
 
 
 
 
200
  try:
 
201
  img = Image.open(image_path)
202
+ prompt0 = prompt_new()
203
+ model_name = model_choice
204
+ client_to_use = OR # Default client
205
+ # Add logic here if you need to switch between OR and gemini clients based on model_name
206
+ # Example:
207
+ # if model_name.startswith("google/gemini") and gemini:
208
+ # client_to_use = gemini
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  result = client_to_use.generate_caption(
211
+ img, model=model_name, max_image_size=512,
212
+ prompt=prompt0, prompt_dev=sys_prompt, temperature=1
 
 
 
 
213
  )
214
+ image_results.append({"image_id": image_id, "content": result.strip()})
 
 
 
 
 
 
215
  except FileNotFoundError:
216
  error_message = f"Error: File not found at path '{image_path}'"
217
+ print(error_message)
218
  image_results.append({"image_id": image_id, "content": error_message})
219
  except Exception as e:
220
  error_message = f"Error processing {image_id}: {str(e)}"
221
+ print(error_message)
222
+ image_results.append({"image_id": image_id, "content": error_message})
 
 
 
223
 
 
224
  csv_path = create_csv_file_simple(image_results)
225
+ initial_image = image_paths[0] if image_paths else None
226
+ initial_counter = f"1 of {len(image_paths)}" if image_paths else "0 of 0"
227
+ initial_text = image_results[0]["content"] if image_results else "Analysis complete, but no results generated."
 
 
 
 
 
 
 
228
 
229
  return (image_paths, image_results, 0, initial_image, initial_counter,
230
  initial_text, csv_path)
231
 
232
+ # Navigate previous
 
233
  def go_to_prev(current_idx, images, results):
234
+ if not images or not results or len(images) == 0:
235
+ return current_idx, None, "0 of 0", ""
 
 
236
  new_idx = (current_idx - 1 + len(images)) % len(images)
237
  counter_text = f"{new_idx + 1} of {len(images)}"
 
 
238
  result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
 
239
  return (new_idx, images[new_idx], counter_text, result_content)
240
 
241
+ # Navigate next
242
  def go_to_next(current_idx, images, results):
243
+ if not images or not results or len(images) == 0:
244
+ return current_idx, None, "0 of 0", ""
 
245
  new_idx = (current_idx + 1) % len(images)
246
  counter_text = f"{new_idx + 1} of {len(images)}"
 
 
247
  result_content = results[new_idx]["content"] if new_idx < len(results) else "Error: Result not found"
 
248
  return (new_idx, images[new_idx], counter_text, result_content)
249
 
250
+ # Connect analyze button
251
  analyze_button.click(
252
  fn=analyze_images,
253
  inputs=[image_state, model_choice, length_choice, filename_state],
254
+ outputs=[all_images, all_results, current_index, current_image, image_counter,
255
+ analysis_text, csv_download]
 
 
256
  )
257
 
258
  # Connect navigation buttons
259
  prev_button.click(
260
+ fn=go_to_prev, inputs=[current_index, all_images, all_results],
261
+ outputs=[current_index, current_image, image_counter, analysis_text], queue=False
 
 
 
262
  )
 
263
  next_button.click(
264
+ fn=go_to_next, inputs=[current_index, all_images, all_results],
265
+ outputs=[current_index, current_image, image_counter, analysis_text], queue=False
 
 
 
266
  )
267
 
268
+ # About section
269
  with gr.Accordion("About", open=False):
270
+ gr.Markdown("""
271
+ ## About this demo
272
+ ... [content unchanged] ...
273
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
  return demo
276
 
277
  # Launch the app
278
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  app = create_demo()
281
+ app.launch()