Chris Addis commited on
Commit
e8242a3
·
1 Parent(s): 612285f

remove A/B

Browse files
Files changed (1) hide show
  1. app.py +50 -283
app.py CHANGED
@@ -45,34 +45,9 @@ def get_sys_prompt(length="medium"):
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
48
- def save_preference(image_path, model_a_text, model_b_text, preferred_model):
49
- """Save user preference data to a CSV file"""
50
- # Check if file exists, create with header if not
51
- file_exists = os.path.isfile(PREFERENCES_FILE)
52
-
53
- # Get image filename instead of full path
54
- image_filename = os.path.basename(image_path)
55
-
56
- # Open file in append mode
57
- with open(PREFERENCES_FILE, 'a', newline='') as f:
58
- writer = csv.writer(f)
59
-
60
- # Write header if file is new
61
- if not file_exists:
62
- writer.writerow(['timestamp', 'image', 'model_a_text', 'model_b_text', 'preferred_model'])
63
-
64
- # Write data row
65
- writer.writerow([
66
- datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
67
- image_filename,
68
- model_a_text,
69
- model_b_text,
70
- preferred_model
71
- ])
72
-
73
- return True
74
 
75
- def create_csv_file(results):
76
  """Create a CSV file from the results and return the path"""
77
  # Create a temporary file
78
  fd, path = tempfile.mkstemp(suffix='.csv')
@@ -80,14 +55,12 @@ def create_csv_file(results):
80
  with os.fdopen(fd, 'w', newline='') as f:
81
  writer = csv.writer(f)
82
  # Write header
83
- writer.writerow(['image_id', 'model_a_content', 'model_b_content', 'preferred_model'])
84
  # Write data
85
  for result in results:
86
  writer.writerow([
87
  result.get('image_id', ''),
88
- result.get('model_a_content', ''),
89
- result.get('model_b_content', ''),
90
- result.get('preferred_model', '')
91
  ])
92
 
93
  return path
@@ -135,18 +108,6 @@ def create_demo():
135
  info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
136
  )
137
 
138
- # Add comparison mode checkbox
139
- comparison_mode = gr.Checkbox(
140
- label="Enable A/B Testing Mode",
141
- value=False,
142
- info="Show outputs from both models and select preferred"
143
- )
144
-
145
- # Label the models in comparison mode
146
- with gr.Group(visible=False) as comparison_labels:
147
- gr.Markdown("### Model A: Claude")
148
- gr.Markdown("### Model B: GPT-4o")
149
-
150
  # Preview gallery for uploaded images
151
  gr.Markdown("### Uploaded Images")
152
  input_gallery = gr.Gallery(
@@ -183,76 +144,23 @@ def create_demo():
183
  image_counter = gr.Markdown("", elem_id="image-counter")
184
  next_button = gr.Button("Next →", size="sm")
185
 
186
- # Standard single model output view
187
- with gr.Column(visible=True) as single_model_view:
188
- # Alt-text heading
189
- gr.Markdown("### Generated Alt-text")
190
-
191
- # Alt-text
192
- analysis_text = gr.Textbox(
193
- label="",
194
- value="Please analyze images to see results",
195
- lines=6,
196
- max_lines=10,
197
- interactive=False,
198
- show_label=False
199
- )
200
 
201
- # Comparison view for A/B testing
202
- with gr.Column(visible=False) as comparison_view:
203
- gr.Markdown("### Compare Generated Alt-text")
204
-
205
- with gr.Row() as model_outputs:
206
- # Model A output
207
- with gr.Column():
208
- gr.Markdown("#### Model A (Claude)")
209
- model_a_text = gr.Textbox(
210
- label="",
211
- value="",
212
- lines=5,
213
- max_lines=8,
214
- interactive=False,
215
- show_label=False
216
- )
217
- model_a_button = gr.Button("Select Model A", variant="secondary")
218
-
219
- # Model B output
220
- with gr.Column():
221
- gr.Markdown("#### Model B (GPT-4o)")
222
- model_b_text = gr.Textbox(
223
- label="",
224
- value="",
225
- lines=5,
226
- max_lines=8,
227
- interactive=False,
228
- show_label=False
229
- )
230
- model_b_button = gr.Button("Select Model B", variant="secondary")
231
-
232
- # Preference saved notification
233
- preference_status = gr.Markdown("")
234
 
235
- # Hidden state for gallery navigation and preferences
236
  current_index = gr.State(0)
237
  all_images = gr.State([])
238
  all_results = gr.State([])
239
- preference_state = gr.State([]) # To store user preferences
240
-
241
- # Toggle comparison mode
242
- def toggle_comparison_mode(enable_comparison):
243
- return {
244
- model_choice: not enable_comparison,
245
- length_choice: not enable_comparison,
246
- single_model_view: not enable_comparison,
247
- comparison_view: enable_comparison,
248
- comparison_labels: enable_comparison
249
- }
250
-
251
- comparison_mode.change(
252
- fn=toggle_comparison_mode,
253
- inputs=[comparison_mode],
254
- outputs=[model_choice, length_choice, single_model_view, comparison_view, comparison_labels]
255
- )
256
 
257
  # Handle file uploads - store files for use during analysis
258
  def handle_upload(files):
@@ -271,15 +179,14 @@ def create_demo():
271
  )
272
 
273
  # Function to analyze images
274
- def analyze_images(image_paths, model_choice, length_choice, comparison_mode, filenames):
275
  if not image_paths:
276
- return [], [], 0, "", "No images", "", "", "", [], ""
277
 
278
  # Get system prompt based on length selection
279
  sys_prompt = get_sys_prompt(length_choice)
280
 
281
  image_results = []
282
- empty_preferences = [None] * len(image_paths) # Initialize with no preferences
283
 
284
  for i, image_path in enumerate(image_paths):
285
  # Use original filename as image_id if available
@@ -293,223 +200,86 @@ def create_demo():
293
  img = Image.open(image_path)
294
  prompt0 = prompt_new() # Using the new prompt function
295
 
296
- # In comparison mode, always generate both outputs
297
- if comparison_mode:
298
- # Generate Model A output (Claude)
299
- model_a_result = OR.generate_caption(
300
- img,
301
- model="anthropic/claude-3.7-sonnet",
302
- max_image_size=512,
303
- prompt=prompt0,
304
- prompt_dev=sys_prompt,
305
- temperature=1
306
- )
307
-
308
- # Generate Model B output (GPT-4o)
309
- model_b_result = OR.generate_caption(
310
- img,
311
- model="openai/chatgpt-4o-latest",
312
- max_image_size=512,
313
- prompt=prompt0,
314
- prompt_dev=sys_prompt,
315
- temperature=1
316
- )
317
-
318
- # Add to results
319
- image_results.append({
320
- "image_id": image_id,
321
- "model_a_content": model_a_result,
322
- "model_b_content": model_b_result,
323
- "preferred_model": None # No preference yet
324
- })
325
- else:
326
- # Use the selected model
327
- result = OR.generate_caption(
328
- img,
329
- model=model_choice,
330
- max_image_size=512,
331
- prompt=prompt0,
332
- prompt_dev=sys_prompt,
333
- temperature=1
334
- )
335
-
336
- # For single mode, we still keep the structure compatible with comparison mode
337
- image_results.append({
338
- "image_id": image_id,
339
- "model_a_content": result,
340
- "model_b_content": "",
341
- "preferred_model": None
342
- })
343
 
344
  except Exception as e:
345
  error_message = f"Error: {str(e)}"
346
  image_results.append({
347
  "image_id": image_id,
348
- "model_a_content": error_message,
349
- "model_b_content": error_message if comparison_mode else "",
350
- "preferred_model": None
351
  })
352
 
353
  # Create a CSV file for download
354
- csv_path = create_csv_file(image_results)
355
 
356
  # Set up initial display with first image
357
  if len(image_paths) > 0:
358
  initial_image = image_paths[0]
359
  initial_counter = f"{1} of {len(image_paths)}"
360
-
361
- if comparison_mode:
362
- initial_model_a = image_results[0]["model_a_content"]
363
- initial_model_b = image_results[0]["model_b_content"]
364
- initial_text = "" # Not used in comparison mode
365
- else:
366
- initial_text = image_results[0]["model_a_content"]
367
- initial_model_a = "" # Not used in single mode
368
- initial_model_b = "" # Not used in single mode
369
  else:
370
  initial_image = ""
371
  initial_text = "No images analyzed"
372
- initial_model_a = ""
373
- initial_model_b = ""
374
  initial_counter = "0 of 0"
375
 
376
  return (image_paths, image_results, 0, initial_image, initial_counter,
377
- initial_text, initial_model_a, initial_model_b, empty_preferences,
378
- csv_path, "")
379
 
380
  # Function to navigate to previous image
381
- def go_to_prev(current_idx, images, results, comparison_mode, preferences):
382
  if not images or len(images) == 0:
383
- return current_idx, "", "0 of 0", "", "", "", ""
384
 
385
  new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
386
  counter_html = f"{new_idx + 1} of {len(images)}"
387
 
388
- # Get preference status for this image
389
- preference_message = ""
390
- if preferences[new_idx]:
391
- preferred = "Model A" if preferences[new_idx] == "A" else "Model B"
392
- preference_message = f"You selected {preferred} for this image"
393
-
394
- if comparison_mode:
395
- return (new_idx, images[new_idx], counter_html, "",
396
- results[new_idx]["model_a_content"],
397
- results[new_idx]["model_b_content"],
398
- preference_message)
399
- else:
400
- return (new_idx, images[new_idx], counter_html,
401
- results[new_idx]["model_a_content"], "", "", "")
402
 
403
  # Function to navigate to next image
404
- def go_to_next(current_idx, images, results, comparison_mode, preferences):
405
  if not images or len(images) == 0:
406
- return current_idx, "", "0 of 0", "", "", "", ""
407
 
408
  new_idx = (current_idx + 1) % len(images)
409
  counter_html = f"{new_idx + 1} of {len(images)}"
410
 
411
- # Get preference status for this image
412
- preference_message = ""
413
- if preferences[new_idx]:
414
- preferred = "Model A" if preferences[new_idx] == "A" else "Model B"
415
- preference_message = f"You selected {preferred} for this image"
416
-
417
- if comparison_mode:
418
- return (new_idx, images[new_idx], counter_html, "",
419
- results[new_idx]["model_a_content"],
420
- results[new_idx]["model_b_content"],
421
- preference_message)
422
- else:
423
- return (new_idx, images[new_idx], counter_html,
424
- results[new_idx]["model_a_content"], "", "", "")
425
-
426
- # Function to handle Model A selection
427
- def select_model_a(current_idx, images, results, preferences):
428
- if not images or current_idx >= len(images):
429
- return preferences, "No image selected"
430
-
431
- # Create a copy of preferences to modify
432
- new_preferences = preferences.copy()
433
-
434
- # Update preference for current image
435
- new_preferences[current_idx] = "A"
436
-
437
- # Save preference to CSV
438
- image_path = images[current_idx]
439
- model_a_text = results[current_idx]["model_a_content"]
440
- model_b_text = results[current_idx]["model_b_content"]
441
- save_preference(image_path, model_a_text, model_b_text, "Model A")
442
-
443
- # Also update the results with the preference
444
- results[current_idx]["preferred_model"] = "A"
445
-
446
- # Create confirmation message
447
- message = f"✓ You selected Model A for this image"
448
-
449
- return new_preferences, message
450
-
451
- # Function to handle Model B selection
452
- def select_model_b(current_idx, images, results, preferences):
453
- if not images or current_idx >= len(images):
454
- return preferences, "No image selected"
455
-
456
- # Create a copy of preferences to modify
457
- new_preferences = preferences.copy()
458
-
459
- # Update preference for current image
460
- new_preferences[current_idx] = "B"
461
-
462
- # Save preference to CSV
463
- image_path = images[current_idx]
464
- model_a_text = results[current_idx]["model_a_content"]
465
- model_b_text = results[current_idx]["model_b_content"]
466
- save_preference(image_path, model_a_text, model_b_text, "Model B")
467
-
468
- # Also update the results with the preference
469
- results[current_idx]["preferred_model"] = "B"
470
-
471
- # Create confirmation message
472
- message = f"✓ You selected Model B for this image"
473
-
474
- return new_preferences, message
475
 
476
  # Connect the analyze button
477
  analyze_button.click(
478
  fn=analyze_images,
479
- inputs=[image_state, model_choice, length_choice, comparison_mode, filename_state],
480
  outputs=[
481
  all_images, all_results, current_index, current_image, image_counter,
482
- analysis_text, model_a_text, model_b_text, preference_state,
483
- csv_download, preference_status
484
  ]
485
  )
486
 
487
- # Connect navigation buttons for both modes
488
  prev_button.click(
489
  fn=go_to_prev,
490
- inputs=[current_index, all_images, all_results, comparison_mode, preference_state],
491
- outputs=[current_index, current_image, image_counter, analysis_text,
492
- model_a_text, model_b_text, preference_status]
493
  )
494
 
495
  next_button.click(
496
  fn=go_to_next,
497
- inputs=[current_index, all_images, all_results, comparison_mode, preference_state],
498
- outputs=[current_index, current_image, image_counter, analysis_text,
499
- model_a_text, model_b_text, preference_status]
500
- )
501
-
502
- # Connect model selection buttons with separate functions
503
- model_a_button.click(
504
- fn=select_model_a,
505
- inputs=[current_index, all_images, all_results, preference_state],
506
- outputs=[preference_state, preference_status]
507
- )
508
-
509
- model_b_button.click(
510
- fn=select_model_b,
511
- inputs=[current_index, all_images, all_results, preference_state],
512
- outputs=[preference_state, preference_status]
513
  )
514
 
515
  # Optional: Add additional information
@@ -521,9 +291,6 @@ def create_demo():
521
 
522
  - Upload one or more images using the upload button
523
  - Choose a model and response length for generation
524
- - Choose between standard mode or A/B testing mode
525
- - In standard mode, select one model to generate alt-text
526
- - In A/B testing mode, compare outputs from two models and select your preference
527
  - Navigate through the images with the Previous and Next buttons
528
  - Download CSV with all results
529
 
 
45
  dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
  return dev_prompt
47
 
48
+ # This function is no longer needed since we removed A/B testing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def create_csv_file_simple(results):
51
  """Create a CSV file from the results and return the path"""
52
  # Create a temporary file
53
  fd, path = tempfile.mkstemp(suffix='.csv')
 
55
  with os.fdopen(fd, 'w', newline='') as f:
56
  writer = csv.writer(f)
57
  # Write header
58
+ writer.writerow(['image_id', 'content'])
59
  # Write data
60
  for result in results:
61
  writer.writerow([
62
  result.get('image_id', ''),
63
+ result.get('content', '')
 
 
64
  ])
65
 
66
  return path
 
108
  info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
109
  )
110
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # Preview gallery for uploaded images
112
  gr.Markdown("### Uploaded Images")
113
  input_gallery = gr.Gallery(
 
144
  image_counter = gr.Markdown("", elem_id="image-counter")
145
  next_button = gr.Button("Next →", size="sm")
146
 
147
+ # Alt-text heading and output
148
+ gr.Markdown("### Generated Alt-text")
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ # Alt-text
151
+ analysis_text = gr.Textbox(
152
+ label="",
153
+ value="Please analyze images to see results",
154
+ lines=6,
155
+ max_lines=10,
156
+ interactive=False,
157
+ show_label=False
158
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ # Hidden state for gallery navigation
161
  current_index = gr.State(0)
162
  all_images = gr.State([])
163
  all_results = gr.State([])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  # Handle file uploads - store files for use during analysis
166
  def handle_upload(files):
 
179
  )
180
 
181
  # Function to analyze images
182
+ def analyze_images(image_paths, model_choice, length_choice, filenames):
183
  if not image_paths:
184
+ return [], [], 0, "", "No images", "", ""
185
 
186
  # Get system prompt based on length selection
187
  sys_prompt = get_sys_prompt(length_choice)
188
 
189
  image_results = []
 
190
 
191
  for i, image_path in enumerate(image_paths):
192
  # Use original filename as image_id if available
 
200
  img = Image.open(image_path)
201
  prompt0 = prompt_new() # Using the new prompt function
202
 
203
+ # Use the selected model
204
+ result = OR.generate_caption(
205
+ img,
206
+ model=model_choice,
207
+ max_image_size=512,
208
+ prompt=prompt0,
209
+ prompt_dev=sys_prompt,
210
+ temperature=1
211
+ )
212
+
213
+ # Add to results
214
+ image_results.append({
215
+ "image_id": image_id,
216
+ "content": result
217
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  except Exception as e:
220
  error_message = f"Error: {str(e)}"
221
  image_results.append({
222
  "image_id": image_id,
223
+ "content": error_message
 
 
224
  })
225
 
226
  # Create a CSV file for download
227
+ csv_path = create_csv_file_simple(image_results)
228
 
229
  # Set up initial display with first image
230
  if len(image_paths) > 0:
231
  initial_image = image_paths[0]
232
  initial_counter = f"{1} of {len(image_paths)}"
233
+ initial_text = image_results[0]["content"]
 
 
 
 
 
 
 
 
234
  else:
235
  initial_image = ""
236
  initial_text = "No images analyzed"
 
 
237
  initial_counter = "0 of 0"
238
 
239
  return (image_paths, image_results, 0, initial_image, initial_counter,
240
+ initial_text, csv_path)
 
241
 
242
  # Function to navigate to previous image
243
+ def go_to_prev(current_idx, images, results):
244
  if not images or len(images) == 0:
245
+ return current_idx, "", "0 of 0", ""
246
 
247
  new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
248
  counter_html = f"{new_idx + 1} of {len(images)}"
249
 
250
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  # Function to navigate to next image
253
+ def go_to_next(current_idx, images, results):
254
  if not images or len(images) == 0:
255
+ return current_idx, "", "0 of 0", ""
256
 
257
  new_idx = (current_idx + 1) % len(images)
258
  counter_html = f"{new_idx + 1} of {len(images)}"
259
 
260
+ return (new_idx, images[new_idx], counter_html, results[new_idx]["content"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  # Connect the analyze button
263
  analyze_button.click(
264
  fn=analyze_images,
265
+ inputs=[image_state, model_choice, length_choice, filename_state],
266
  outputs=[
267
  all_images, all_results, current_index, current_image, image_counter,
268
+ analysis_text, csv_download
 
269
  ]
270
  )
271
 
272
+ # Connect navigation buttons
273
  prev_button.click(
274
  fn=go_to_prev,
275
+ inputs=[current_index, all_images, all_results],
276
+ outputs=[current_index, current_image, image_counter, analysis_text]
 
277
  )
278
 
279
  next_button.click(
280
  fn=go_to_next,
281
+ inputs=[current_index, all_images, all_results],
282
+ outputs=[current_index, current_image, image_counter, analysis_text]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  )
284
 
285
  # Optional: Add additional information
 
291
 
292
  - Upload one or more images using the upload button
293
  - Choose a model and response length for generation
 
 
 
294
  - Navigate through the images with the Previous and Next buttons
295
  - Download CSV with all results
296