Spaces:

NHMUK
/

MATCHA

Sleeping

App Files Files Community

Chris Addis commited on Apr 11, 2025

Commit

612285f

1 Parent(s): 9883bdb

openrouter

Browse files

Files changed (2) hide show

app.py +59 -19
library/utils_prompt.py +8 -0

app.py CHANGED Viewed

@@ -36,6 +36,15 @@ PREFERENCES_FILE = "data/user_preferences.csv"
 # Ensure directory exists
 os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
 def save_preference(image_path, model_a_text, model_b_text, preferred_model):
     """Save user preference data to a CSV file"""
     # Check if file exists, create with header if not
@@ -110,14 +119,22 @@ def create_demo():
                     file_count="multiple"
                 )
-                # Add model selection dropdown
                 model_choice = gr.Dropdown(
-                    choices=["GPT-4o", "Default"],
                     label="Select Model",
-                    value="Default",
                     visible=True
                 )
                 # Add comparison mode checkbox
                 comparison_mode = gr.Checkbox(
                     label="Enable A/B Testing Mode",
@@ -127,8 +144,8 @@ def create_demo():
                 # Label the models in comparison mode
                 with gr.Group(visible=False) as comparison_labels:
-                    gr.Markdown("### Model A: GPT-4o")
-                    gr.Markdown("### Model B: Default (GPT-4o-mini)")
                 # Preview gallery for uploaded images
                 gr.Markdown("### Uploaded Images")
@@ -188,7 +205,7 @@ def create_demo():
                     with gr.Row() as model_outputs:
                         # Model A output
                         with gr.Column():
-                            gr.Markdown("#### Model A (GPT-4o)")
                             model_a_text = gr.Textbox(
                                 label="",
                                 value="",
@@ -201,7 +218,7 @@ def create_demo():
                         # Model B output
                         with gr.Column():
-                            gr.Markdown("#### Model B (Default)")
                             model_b_text = gr.Textbox(
                                 label="",
                                 value="",
@@ -225,6 +242,7 @@ def create_demo():
         def toggle_comparison_mode(enable_comparison):
             return {
                 model_choice: not enable_comparison,
                 single_model_view: not enable_comparison,
                 comparison_view: enable_comparison,
                 comparison_labels: enable_comparison
@@ -233,7 +251,7 @@ def create_demo():
         comparison_mode.change(
             fn=toggle_comparison_mode,
             inputs=[comparison_mode],
-            outputs=[model_choice, single_model_view, comparison_view, comparison_labels]
         )
         # Handle file uploads - store files for use during analysis
@@ -253,10 +271,13 @@ def create_demo():
         )
         # Function to analyze images
-        def analyze_images(image_paths, model_choice, comparison_mode, filenames):
             if not image_paths:
                 return [], [], 0, "", "No images", "", "", "", [], ""
             image_results = []
             empty_preferences = [None] * len(image_paths)  # Initialize with no preferences
@@ -270,15 +291,29 @@ def create_demo():
                 try:
                     # Open the image file for analysis
                     img = Image.open(image_path)
-                    prompt0 = base_prompt()
                     # In comparison mode, always generate both outputs
                     if comparison_mode:
-                        # Generate Model A output (GPT-4o)
-                        model_a_result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
-                        # Generate Model B output (Default/GPT-4o-mini)
-                        model_b_result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
                         # Add to results
                         image_results.append({
@@ -289,10 +324,14 @@ def create_demo():
                         })
                     else:
                         # Use the selected model
-                        if model_choice == "GPT-4o":
-                            result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
-                        else:  # Default model
-                            result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
                         # For single mode, we still keep the structure compatible with comparison mode
                         image_results.append({
@@ -437,7 +476,7 @@ def create_demo():
         # Connect the analyze button
         analyze_button.click(
             fn=analyze_images,
-            inputs=[image_state, model_choice, comparison_mode, filename_state],
             outputs=[
                 all_images, all_results, current_index, current_image, image_counter,
                 analysis_text, model_a_text, model_b_text, preference_state,
@@ -481,6 +520,7 @@ def create_demo():
             This demo generates alt-text for uploaded images.
             - Upload one or more images using the upload button
             - Choose between standard mode or A/B testing mode
             - In standard mode, select one model to generate alt-text
             - In A/B testing mode, compare outputs from two models and select your preference

 # Ensure directory exists
 os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
+def get_sys_prompt(length="medium"):
+    if length == "short":
+        dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
+    elif length == "medium":
+        dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
+    else:
+        dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
+    return dev_prompt
 def save_preference(image_path, model_a_text, model_b_text, preferred_model):
     """Save user preference data to a CSV file"""
     # Check if file exists, create with header if not
                     file_count="multiple"
                 )
+                # Add model selection dropdown with new model choices
                 model_choice = gr.Dropdown(
+                    choices=["google/gemini-2.0-flash-001", "anthropic/claude-3.7-sonnet", "openai/chatgpt-4o-latest"],
                     label="Select Model",
+                    value="anthropic/claude-3.7-sonnet",
                     visible=True
                 )
+                # Add response length selection
+                length_choice = gr.Radio(
+                    choices=["short", "medium", "long"],
+                    label="Response Length",
+                    value="medium",
+                    info="Short: max 130 chars | Medium: 250-300 chars | Long: max 450 chars"
+                )
                 # Add comparison mode checkbox
                 comparison_mode = gr.Checkbox(
                     label="Enable A/B Testing Mode",
                 # Label the models in comparison mode
                 with gr.Group(visible=False) as comparison_labels:
+                    gr.Markdown("### Model A: Claude")
+                    gr.Markdown("### Model B: GPT-4o")
                 # Preview gallery for uploaded images
                 gr.Markdown("### Uploaded Images")
                     with gr.Row() as model_outputs:
                         # Model A output
                         with gr.Column():
+                            gr.Markdown("#### Model A (Claude)")
                             model_a_text = gr.Textbox(
                                 label="",
                                 value="",
                         # Model B output
                         with gr.Column():
+                            gr.Markdown("#### Model B (GPT-4o)")
                             model_b_text = gr.Textbox(
                                 label="",
                                 value="",
         def toggle_comparison_mode(enable_comparison):
             return {
                 model_choice: not enable_comparison,
+                length_choice: not enable_comparison,
                 single_model_view: not enable_comparison,
                 comparison_view: enable_comparison,
                 comparison_labels: enable_comparison
         comparison_mode.change(
             fn=toggle_comparison_mode,
             inputs=[comparison_mode],
+            outputs=[model_choice, length_choice, single_model_view, comparison_view, comparison_labels]
         )
         # Handle file uploads - store files for use during analysis
         )
         # Function to analyze images
+        def analyze_images(image_paths, model_choice, length_choice, comparison_mode, filenames):
             if not image_paths:
                 return [], [], 0, "", "No images", "", "", "", [], ""
+            # Get system prompt based on length selection
+            sys_prompt = get_sys_prompt(length_choice)
             image_results = []
             empty_preferences = [None] * len(image_paths)  # Initialize with no preferences
                 try:
                     # Open the image file for analysis
                     img = Image.open(image_path)
+                    prompt0 = prompt_new()  # Using the new prompt function
                     # In comparison mode, always generate both outputs
                     if comparison_mode:
+                        # Generate Model A output (Claude)
+                        model_a_result = OR.generate_caption(
+                            img,
+                            model="anthropic/claude-3.7-sonnet",
+                            max_image_size=512,
+                            prompt=prompt0,
+                            prompt_dev=sys_prompt,
+                            temperature=1
+                        )
+                        # Generate Model B output (GPT-4o)
+                        model_b_result = OR.generate_caption(
+                            img,
+                            model="openai/chatgpt-4o-latest",
+                            max_image_size=512,
+                            prompt=prompt0,
+                            prompt_dev=sys_prompt,
+                            temperature=1
+                        )
                         # Add to results
                         image_results.append({
                         })
                     else:
                         # Use the selected model
+                        result = OR.generate_caption(
+                            img,
+                            model=model_choice,
+                            max_image_size=512,
+                            prompt=prompt0,
+                            prompt_dev=sys_prompt,
+                            temperature=1
+                        )
                         # For single mode, we still keep the structure compatible with comparison mode
                         image_results.append({
         # Connect the analyze button
         analyze_button.click(
             fn=analyze_images,
+            inputs=[image_state, model_choice, length_choice, comparison_mode, filename_state],
             outputs=[
                 all_images, all_results, current_index, current_image, image_counter,
                 analysis_text, model_a_text, model_b_text, preference_state,
             This demo generates alt-text for uploaded images.
             - Upload one or more images using the upload button
+            - Choose a model and response length for generation
             - Choose between standard mode or A/B testing mode
             - In standard mode, select one model to generate alt-text
             - In A/B testing mode, compare outputs from two models and select your preference

library/utils_prompt.py CHANGED Viewed

@@ -1,3 +1,11 @@
 def prompt_new(title=None):
     if title == None:
         title_info = {}

+def get_sys_prompt(length="medium"):
+    if length== "short":
+        dev_prompt = """You are a museum curator tasked with generating alt-text (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maximum of 130 characters."""
+    elif length== "medium":
+        dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be between 250-300 characters in length."""
+    else:
+        dev_prompt = """You are a museum curator tasked with generating long descriptions (as defined in WCAG 2.1) of museum objects for visually impaired and blind users from images. Use British English and follow museum accessibility best practices. Do not start with phrases like 'The image shows' or 'This is an image of'. Be precise, concise and avoid filler and subjective statements. Repsonses should be a maxium of 450 characters."""
 def prompt_new(title=None):
     if title == None:
         title_info = {}