Chris Addis commited on
Commit
612285f
·
1 Parent(s): 9883bdb

openrouter

Browse files
Files changed (2) hide show
  1. app.py +59 -19
  2. library/utils_prompt.py +8 -0
app.py CHANGED
@@ -36,6 +36,15 @@ PREFERENCES_FILE = "data/user_preferences.csv"
36
  # Ensure directory exists
37
  os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
 
 
 
 
 
 
 
 
 
 
39
  def save_preference(image_path, model_a_text, model_b_text, preferred_model):
40
  """Save user preference data to a CSV file"""
41
  # Check if file exists, create with header if not
@@ -110,14 +119,22 @@ def create_demo():
110
  file_count="multiple"
111
  )
112
 
113
- # Add model selection dropdown
114
  model_choice = gr.Dropdown(
115
- choices=["GPT-4o", "Default"],
116
  label="Select Model",
117
- value="Default",
118
  visible=True
119
  )
120
 
 
 
 
 
 
 
 
 
121
  # Add comparison mode checkbox
122
  comparison_mode = gr.Checkbox(
123
  label="Enable A/B Testing Mode",
@@ -127,8 +144,8 @@ def create_demo():
127
 
128
  # Label the models in comparison mode
129
  with gr.Group(visible=False) as comparison_labels:
130
- gr.Markdown("### Model A: GPT-4o")
131
- gr.Markdown("### Model B: Default (GPT-4o-mini)")
132
 
133
  # Preview gallery for uploaded images
134
  gr.Markdown("### Uploaded Images")
@@ -188,7 +205,7 @@ def create_demo():
188
  with gr.Row() as model_outputs:
189
  # Model A output
190
  with gr.Column():
191
- gr.Markdown("#### Model A (GPT-4o)")
192
  model_a_text = gr.Textbox(
193
  label="",
194
  value="",
@@ -201,7 +218,7 @@ def create_demo():
201
 
202
  # Model B output
203
  with gr.Column():
204
- gr.Markdown("#### Model B (Default)")
205
  model_b_text = gr.Textbox(
206
  label="",
207
  value="",
@@ -225,6 +242,7 @@ def create_demo():
225
  def toggle_comparison_mode(enable_comparison):
226
  return {
227
  model_choice: not enable_comparison,
 
228
  single_model_view: not enable_comparison,
229
  comparison_view: enable_comparison,
230
  comparison_labels: enable_comparison
@@ -233,7 +251,7 @@ def create_demo():
233
  comparison_mode.change(
234
  fn=toggle_comparison_mode,
235
  inputs=[comparison_mode],
236
- outputs=[model_choice, single_model_view, comparison_view, comparison_labels]
237
  )
238
 
239
  # Handle file uploads - store files for use during analysis
@@ -253,10 +271,13 @@ def create_demo():
253
  )
254
 
255
  # Function to analyze images
256
- def analyze_images(image_paths, model_choice, comparison_mode, filenames):
257
  if not image_paths:
258
  return [], [], 0, "", "No images", "", "", "", [], ""
259
 
 
 
 
260
  image_results = []
261
  empty_preferences = [None] * len(image_paths) # Initialize with no preferences
262
 
@@ -270,15 +291,29 @@ def create_demo():
270
  try:
271
  # Open the image file for analysis
272
  img = Image.open(image_path)
273
- prompt0 = base_prompt()
274
 
275
  # In comparison mode, always generate both outputs
276
  if comparison_mode:
277
- # Generate Model A output (GPT-4o)
278
- model_a_result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
 
 
 
 
 
 
 
279
 
280
- # Generate Model B output (Default/GPT-4o-mini)
281
- model_b_result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
 
 
 
 
 
 
 
282
 
283
  # Add to results
284
  image_results.append({
@@ -289,10 +324,14 @@ def create_demo():
289
  })
290
  else:
291
  # Use the selected model
292
- if model_choice == "GPT-4o":
293
- result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
294
- else: # Default model
295
- result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
 
 
 
 
296
 
297
  # For single mode, we still keep the structure compatible with comparison mode
298
  image_results.append({
@@ -437,7 +476,7 @@ def create_demo():
437
  # Connect the analyze button
438
  analyze_button.click(
439
  fn=analyze_images,
440
- inputs=[image_state, model_choice, comparison_mode, filename_state],
441
  outputs=[
442
  all_images, all_results, current_index, current_image, image_counter,
443
  analysis_text, model_a_text, model_b_text, preference_state,
@@ -481,6 +520,7 @@ def create_demo():
481
  This demo generates alt-text for uploaded images.
482
 
483
  - Upload one or more images using the upload button
 
484
  - Choose between standard mode or A/B testing mode
485
  - In standard mode, select one model to generate alt-text
486
  - In A/B testing mode, compare outputs from two models and select your preference
 
36
  # Ensure directory exists
37
  os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
 
39
+ def get_sys_prompt(length="medium"):
40
+ if length == "short":
41
+ dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
42
+ elif length == "medium":
43
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
44
+ else:
45
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
46
+ return dev_prompt
47
+
48
  def save_preference(image_path, model_a_text, model_b_text, preferred_model):
49
  """Save user preference data to a CSV file"""
50
  # Check if file exists, create with header if not
 
119
  file_count="multiple"
120
  )
121
 
122
+ # Add model selection dropdown with new model choices
123
  model_choice = gr.Dropdown(
124
+ choices=["google/gemini-2.0-flash-001", "anthropic/claude-3.7-sonnet", "openai/chatgpt-4o-latest"],
125
  label="Select Model",
126
+ value="anthropic/claude-3.7-sonnet",
127
  visible=True
128
  )
129
 
130
+ # Add response length selection
131
+ length_choice = gr.Radio(
132
+ choices=["short", "medium", "long"],
133
+ label="Response Length",
134
+ value="medium",
135
+ info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
136
+ )
137
+
138
  # Add comparison mode checkbox
139
  comparison_mode = gr.Checkbox(
140
  label="Enable A/B Testing Mode",
 
144
 
145
  # Label the models in comparison mode
146
  with gr.Group(visible=False) as comparison_labels:
147
+ gr.Markdown("### Model A: Claude")
148
+ gr.Markdown("### Model B: GPT-4o")
149
 
150
  # Preview gallery for uploaded images
151
  gr.Markdown("### Uploaded Images")
 
205
  with gr.Row() as model_outputs:
206
  # Model A output
207
  with gr.Column():
208
+ gr.Markdown("#### Model A (Claude)")
209
  model_a_text = gr.Textbox(
210
  label="",
211
  value="",
 
218
 
219
  # Model B output
220
  with gr.Column():
221
+ gr.Markdown("#### Model B (GPT-4o)")
222
  model_b_text = gr.Textbox(
223
  label="",
224
  value="",
 
242
  def toggle_comparison_mode(enable_comparison):
243
  return {
244
  model_choice: not enable_comparison,
245
+ length_choice: not enable_comparison,
246
  single_model_view: not enable_comparison,
247
  comparison_view: enable_comparison,
248
  comparison_labels: enable_comparison
 
251
  comparison_mode.change(
252
  fn=toggle_comparison_mode,
253
  inputs=[comparison_mode],
254
+ outputs=[model_choice, length_choice, single_model_view, comparison_view, comparison_labels]
255
  )
256
 
257
  # Handle file uploads - store files for use during analysis
 
271
  )
272
 
273
  # Function to analyze images
274
+ def analyze_images(image_paths, model_choice, length_choice, comparison_mode, filenames):
275
  if not image_paths:
276
  return [], [], 0, "", "No images", "", "", "", [], ""
277
 
278
+ # Get system prompt based on length selection
279
+ sys_prompt = get_sys_prompt(length_choice)
280
+
281
  image_results = []
282
  empty_preferences = [None] * len(image_paths) # Initialize with no preferences
283
 
 
291
  try:
292
  # Open the image file for analysis
293
  img = Image.open(image_path)
294
+ prompt0 = prompt_new() # Using the new prompt function
295
 
296
  # In comparison mode, always generate both outputs
297
  if comparison_mode:
298
+ # Generate Model A output (Claude)
299
+ model_a_result = OR.generate_caption(
300
+ img,
301
+ model="anthropic/claude-3.7-sonnet",
302
+ max_image_size=512,
303
+ prompt=prompt0,
304
+ prompt_dev=sys_prompt,
305
+ temperature=1
306
+ )
307
 
308
+ # Generate Model B output (GPT-4o)
309
+ model_b_result = OR.generate_caption(
310
+ img,
311
+ model="openai/chatgpt-4o-latest",
312
+ max_image_size=512,
313
+ prompt=prompt0,
314
+ prompt_dev=sys_prompt,
315
+ temperature=1
316
+ )
317
 
318
  # Add to results
319
  image_results.append({
 
324
  })
325
  else:
326
  # Use the selected model
327
+ result = OR.generate_caption(
328
+ img,
329
+ model=model_choice,
330
+ max_image_size=512,
331
+ prompt=prompt0,
332
+ prompt_dev=sys_prompt,
333
+ temperature=1
334
+ )
335
 
336
  # For single mode, we still keep the structure compatible with comparison mode
337
  image_results.append({
 
476
  # Connect the analyze button
477
  analyze_button.click(
478
  fn=analyze_images,
479
+ inputs=[image_state, model_choice, length_choice, comparison_mode, filename_state],
480
  outputs=[
481
  all_images, all_results, current_index, current_image, image_counter,
482
  analysis_text, model_a_text, model_b_text, preference_state,
 
520
  This demo generates alt-text for uploaded images.
521
 
522
  - Upload one or more images using the upload button
523
+ - Choose a model and response length for generation
524
  - Choose between standard mode or A/B testing mode
525
  - In standard mode, select one model to generate alt-text
526
  - In A/B testing mode, compare outputs from two models and select your preference
library/utils_prompt.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  def prompt_new(title=None):
2
  if title == None:
3
  title_info = {}
 
1
+ def get_sys_prompt(length="medium"):
2
+ if length== "short":
3
+ dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
4
+ elif length== "medium":
5
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
6
+ else:
7
+ dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
8
+
9
  def prompt_new(title=None):
10
  if title == None:
11
  title_info = {}