Spaces:

NHMUK
/

MATCHA

Sleeping

App Files Files Community

Chris Addis commited on Apr 11, 2025

Commit

9883bdb

1 Parent(s): eec37f2

base version

Browse files

Files changed (15) hide show

.ipynb_checkpoints/README-checkpoint.md +12 -0
.ipynb_checkpoints/app-checkpoint.py +27 -0
.ipynb_checkpoints/requirements-checkpoint.txt +2 -0
app.py +489 -18
library/.ipynb_checkpoints/utils_html-checkpoint.py +0 -0
library/.ipynb_checkpoints/utils_model-checkpoint.py +298 -0
library/.ipynb_checkpoints/utils_prompt-checkpoint.py +76 -0
library/__pycache__/utils.cpython-311.pyc +0 -0
library/__pycache__/utils_html.cpython-311.pyc +0 -0
library/__pycache__/utils_model.cpython-311.pyc +0 -0
library/__pycache__/utils_prompt.cpython-311.pyc +0 -0
library/utils_html.py +840 -0
library/utils_model.py +298 -0
library/utils_prompt.py +76 -0
requirements.txt +5 -1

.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Alt Text Gen
+emoji: 📈
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.24.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

.ipynb_checkpoints/app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import gradio as gr
+def process_image(image):
+    # Simply return the image as is for display
+    return image
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Image Uploader and Viewer")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload an image")
+            upload_button = gr.Button("Display Image")
+        with gr.Column():
+            output_image = gr.Image(label="Displayed Image")
+    upload_button.click(
+        fn=process_image,
+        inputs=input_image,
+        outputs=output_image
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio==5.24.0
2	+ pillow

app.py CHANGED Viewed

@@ -1,27 +1,498 @@
 import gradio as gr
-def process_image(image):
-    # Simply return the image as is for display
-    return image
-# Create the Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Image Uploader and Viewer")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload an image")
-            upload_button = gr.Button("Display Image")
-        with gr.Column():
-            output_image = gr.Image(label="Displayed Image")
-    upload_button.click(
-        fn=process_image,
-        inputs=input_image,
-        outputs=output_image
-    )
 # Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import numpy as np
+from PIL import Image
+import io
+import os
+import requests
+import json
+from dotenv import load_dotenv
+import openai
+import base64
+import csv
+import tempfile
+import datetime
+# Load environment variables from .env file if it exists (for local development)
+# On Hugging Face Spaces, the secrets are automatically available as environment variables
+if os.path.exists(".env"):
+    load_dotenv()
+from io import BytesIO
+import numpy as np
+import requests
+from PIL import Image
+# import libraries
+from library.utils_model import *
+from library.utils_html import *
+from library.utils_prompt import *
+OR = OpenRouterAPI()
+gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
+# Path for storing user preferences
+PREFERENCES_FILE = "data/user_preferences.csv"
+# Ensure directory exists
+os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
+def save_preference(image_path, model_a_text, model_b_text, preferred_model):
+    """Save user preference data to a CSV file"""
+    # Check if file exists, create with header if not
+    file_exists = os.path.isfile(PREFERENCES_FILE)
+    # Get image filename instead of full path
+    image_filename = os.path.basename(image_path)
+    # Open file in append mode
+    with open(PREFERENCES_FILE, 'a', newline='') as f:
+        writer = csv.writer(f)
+        # Write header if file is new
+        if not file_exists:
+            writer.writerow(['timestamp', 'image', 'model_a_text', 'model_b_text', 'preferred_model'])
+        # Write data row
+        writer.writerow([
+            datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            image_filename,
+            model_a_text,
+            model_b_text,
+            preferred_model
+        ])
+    return True
+def create_csv_file(results):
+    """Create a CSV file from the results and return the path"""
+    # Create a temporary file
+    fd, path = tempfile.mkstemp(suffix='.csv')
+    with os.fdopen(fd, 'w', newline='') as f:
+        writer = csv.writer(f)
+        # Write header
+        writer.writerow(['image_id', 'model_a_content', 'model_b_content', 'preferred_model'])
+        # Write data
+        for result in results:
+            writer.writerow([
+                result.get('image_id', ''),
+                result.get('model_a_content', ''),
+                result.get('model_b_content', ''),
+                result.get('preferred_model', '')
+            ])
+    return path
+# Extract original filename without path or extension
+def get_base_filename(filepath):
+    if not filepath:
+        return ""
+    # Get the basename (filename with extension)
+    basename = os.path.basename(filepath)
+    # Remove extension
+    filename = os.path.splitext(basename)[0]
+    return filename
+# Define the Gradio interface
+def create_demo():
+    with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+        gr.Markdown("# AI Alt-text Generator")
+        gr.Markdown("Upload one or more images to generate Alt-text")
+        gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool.")
+        with gr.Row():
+            # Left column: Controls and uploads
+            with gr.Column(scale=1):
+                # Upload interface
+                upload_button = gr.UploadButton(
+                    "Click to Upload Images",
+                    file_types=["image"],
+                    file_count="multiple"
+                )
+                # Add model selection dropdown
+                model_choice = gr.Dropdown(
+                    choices=["GPT-4o", "Default"],
+                    label="Select Model",
+                    value="Default",
+                    visible=True
+                )
+                # Add comparison mode checkbox
+                comparison_mode = gr.Checkbox(
+                    label="Enable A/B Testing Mode",
+                    value=False,
+                    info="Show outputs from both models and select preferred"
+                )
+                # Label the models in comparison mode
+                with gr.Group(visible=False) as comparison_labels:
+                    gr.Markdown("### Model A: GPT-4o")
+                    gr.Markdown("### Model B: Default (GPT-4o-mini)")
+                # Preview gallery for uploaded images
+                gr.Markdown("### Uploaded Images")
+                input_gallery = gr.Gallery(
+                    label="",
+                    columns=3,
+                    height=150,
+                    object_fit="contain"
+                )
+                # Analysis button
+                analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
+                # Hidden state component to store image info
+                image_state = gr.State([])
+                filename_state = gr.State([])
+                # CSV download component
+                csv_download = gr.File(label="CSV Results")
+            # Right column: Display area
+            with gr.Column(scale=2):
+                # Image display
+                current_image = gr.Image(
+                    label="Current Image",
+                    height=400,
+                    type="filepath",
+                    show_download_button=True,
+                    show_share_button=True
+                )
+                # Navigation row
+                with gr.Row():
+                    prev_button = gr.Button("← Previous", size="sm")
+                    image_counter = gr.Markdown("", elem_id="image-counter")
+                    next_button = gr.Button("Next →", size="sm")
+                # Standard single model output view
+                with gr.Column(visible=True) as single_model_view:
+                    # Alt-text heading
+                    gr.Markdown("### Generated Alt-text")
+                    # Alt-text
+                    analysis_text = gr.Textbox(
+                        label="",
+                        value="Please analyze images to see results",
+                        lines=6,
+                        max_lines=10,
+                        interactive=False,
+                        show_label=False
+                    )
+                # Comparison view for A/B testing
+                with gr.Column(visible=False) as comparison_view:
+                    gr.Markdown("### Compare Generated Alt-text")
+                    with gr.Row() as model_outputs:
+                        # Model A output
+                        with gr.Column():
+                            gr.Markdown("#### Model A (GPT-4o)")
+                            model_a_text = gr.Textbox(
+                                label="",
+                                value="",
+                                lines=5,
+                                max_lines=8,
+                                interactive=False,
+                                show_label=False
+                            )
+                            model_a_button = gr.Button("Select Model A", variant="secondary")
+                        # Model B output
+                        with gr.Column():
+                            gr.Markdown("#### Model B (Default)")
+                            model_b_text = gr.Textbox(
+                                label="",
+                                value="",
+                                lines=5,
+                                max_lines=8,
+                                interactive=False,
+                                show_label=False
+                            )
+                            model_b_button = gr.Button("Select Model B", variant="secondary")
+                    # Preference saved notification
+                    preference_status = gr.Markdown("")
+                # Hidden state for gallery navigation and preferences
+                current_index = gr.State(0)
+                all_images = gr.State([])
+                all_results = gr.State([])
+                preference_state = gr.State([])  # To store user preferences
+        # Toggle comparison mode
+        def toggle_comparison_mode(enable_comparison):
+            return {
+                model_choice: not enable_comparison,
+                single_model_view: not enable_comparison,
+                comparison_view: enable_comparison,
+                comparison_labels: enable_comparison
+            }
+        comparison_mode.change(
+            fn=toggle_comparison_mode,
+            inputs=[comparison_mode],
+            outputs=[model_choice, single_model_view, comparison_view, comparison_labels]
+        )
+        # Handle file uploads - store files for use during analysis
+        def handle_upload(files):
+            file_paths = []
+            file_names = []
+            for file in files:
+                file_paths.append(file.name)
+                # Extract filename without path or extension for later use
+                file_names.append(get_base_filename(file.name))
+            return file_paths, file_paths, file_names
+        upload_button.upload(
+            fn=handle_upload,
+            inputs=[upload_button],
+            outputs=[input_gallery, image_state, filename_state]
+        )
+        # Function to analyze images
+        def analyze_images(image_paths, model_choice, comparison_mode, filenames):
+            if not image_paths:
+                return [], [], 0, "", "No images", "", "", "", [], ""
+            image_results = []
+            empty_preferences = [None] * len(image_paths)  # Initialize with no preferences
+            for i, image_path in enumerate(image_paths):
+                # Use original filename as image_id if available
+                if i < len(filenames) and filenames[i]:
+                    image_id = filenames[i]
+                else:
+                    image_id = f"Image {i+1}"
+                try:
+                    # Open the image file for analysis
+                    img = Image.open(image_path)
+                    prompt0 = base_prompt()
+                    # In comparison mode, always generate both outputs
+                    if comparison_mode:
+                        # Generate Model A output (GPT-4o)
+                        model_a_result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
+                        # Generate Model B output (Default/GPT-4o-mini)
+                        model_b_result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
+                        # Add to results
+                        image_results.append({
+                            "image_id": image_id,
+                            "model_a_content": model_a_result,
+                            "model_b_content": model_b_result,
+                            "preferred_model": None  # No preference yet
+                        })
+                    else:
+                        # Use the selected model
+                        if model_choice == "GPT-4o":
+                            result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
+                        else:  # Default model
+                            result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
+                        # For single mode, we still keep the structure compatible with comparison mode
+                        image_results.append({
+                            "image_id": image_id,
+                            "model_a_content": result,
+                            "model_b_content": "",
+                            "preferred_model": None
+                        })
+                except Exception as e:
+                    error_message = f"Error: {str(e)}"
+                    image_results.append({
+                        "image_id": image_id,
+                        "model_a_content": error_message,
+                        "model_b_content": error_message if comparison_mode else "",
+                        "preferred_model": None
+                    })
+            # Create a CSV file for download
+            csv_path = create_csv_file(image_results)
+            # Set up initial display with first image
+            if len(image_paths) > 0:
+                initial_image = image_paths[0]
+                initial_counter = f"{1} of {len(image_paths)}"
+                if comparison_mode:
+                    initial_model_a = image_results[0]["model_a_content"]
+                    initial_model_b = image_results[0]["model_b_content"]
+                    initial_text = ""  # Not used in comparison mode
+                else:
+                    initial_text = image_results[0]["model_a_content"]
+                    initial_model_a = ""  # Not used in single mode
+                    initial_model_b = ""  # Not used in single mode
+            else:
+                initial_image = ""
+                initial_text = "No images analyzed"
+                initial_model_a = ""
+                initial_model_b = ""
+                initial_counter = "0 of 0"
+            return (image_paths, image_results, 0, initial_image, initial_counter,
+                    initial_text, initial_model_a, initial_model_b, empty_preferences,
+                    csv_path, "")
+        # Function to navigate to previous image
+        def go_to_prev(current_idx, images, results, comparison_mode, preferences):
+            if not images or len(images) == 0:
+                return current_idx, "", "0 of 0", "", "", "", ""
+            new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
+            counter_html = f"{new_idx + 1} of {len(images)}"
+            # Get preference status for this image
+            preference_message = ""
+            if preferences[new_idx]:
+                preferred = "Model A" if preferences[new_idx] == "A" else "Model B"
+                preference_message = f"You selected {preferred} for this image"
+            if comparison_mode:
+                return (new_idx, images[new_idx], counter_html, "",
+                        results[new_idx]["model_a_content"],
+                        results[new_idx]["model_b_content"],
+                        preference_message)
+            else:
+                return (new_idx, images[new_idx], counter_html,
+                        results[new_idx]["model_a_content"], "", "", "")
+        # Function to navigate to next image
+        def go_to_next(current_idx, images, results, comparison_mode, preferences):
+            if not images or len(images) == 0:
+                return current_idx, "", "0 of 0", "", "", "", ""
+            new_idx = (current_idx + 1) % len(images)
+            counter_html = f"{new_idx + 1} of {len(images)}"
+            # Get preference status for this image
+            preference_message = ""
+            if preferences[new_idx]:
+                preferred = "Model A" if preferences[new_idx] == "A" else "Model B"
+                preference_message = f"You selected {preferred} for this image"
+            if comparison_mode:
+                return (new_idx, images[new_idx], counter_html, "",
+                        results[new_idx]["model_a_content"],
+                        results[new_idx]["model_b_content"],
+                        preference_message)
+            else:
+                return (new_idx, images[new_idx], counter_html,
+                        results[new_idx]["model_a_content"], "", "", "")
+        # Function to handle Model A selection
+        def select_model_a(current_idx, images, results, preferences):
+            if not images or current_idx >= len(images):
+                return preferences, "No image selected"
+            # Create a copy of preferences to modify
+            new_preferences = preferences.copy()
+            # Update preference for current image
+            new_preferences[current_idx] = "A"
+            # Save preference to CSV
+            image_path = images[current_idx]
+            model_a_text = results[current_idx]["model_a_content"]
+            model_b_text = results[current_idx]["model_b_content"]
+            save_preference(image_path, model_a_text, model_b_text, "Model A")
+            # Also update the results with the preference
+            results[current_idx]["preferred_model"] = "A"
+            # Create confirmation message
+            message = f"✓ You selected Model A for this image"
+            return new_preferences, message
+        # Function to handle Model B selection
+        def select_model_b(current_idx, images, results, preferences):
+            if not images or current_idx >= len(images):
+                return preferences, "No image selected"
+            # Create a copy of preferences to modify
+            new_preferences = preferences.copy()
+            # Update preference for current image
+            new_preferences[current_idx] = "B"
+            # Save preference to CSV
+            image_path = images[current_idx]
+            model_a_text = results[current_idx]["model_a_content"]
+            model_b_text = results[current_idx]["model_b_content"]
+            save_preference(image_path, model_a_text, model_b_text, "Model B")
+            # Also update the results with the preference
+            results[current_idx]["preferred_model"] = "B"
+            # Create confirmation message
+            message = f"✓ You selected Model B for this image"
+            return new_preferences, message
+        # Connect the analyze button
+        analyze_button.click(
+            fn=analyze_images,
+            inputs=[image_state, model_choice, comparison_mode, filename_state],
+            outputs=[
+                all_images, all_results, current_index, current_image, image_counter,
+                analysis_text, model_a_text, model_b_text, preference_state,
+                csv_download, preference_status
+            ]
+        )
+        # Connect navigation buttons for both modes
+        prev_button.click(
+            fn=go_to_prev,
+            inputs=[current_index, all_images, all_results, comparison_mode, preference_state],
+            outputs=[current_index, current_image, image_counter, analysis_text,
+                    model_a_text, model_b_text, preference_status]
+        )
+        next_button.click(
+            fn=go_to_next,
+            inputs=[current_index, all_images, all_results, comparison_mode, preference_state],
+            outputs=[current_index, current_image, image_counter, analysis_text,
+                    model_a_text, model_b_text, preference_status]
+        )
+        # Connect model selection buttons with separate functions
+        model_a_button.click(
+            fn=select_model_a,
+            inputs=[current_index, all_images, all_results, preference_state],
+            outputs=[preference_state, preference_status]
+        )
+        model_b_button.click(
+            fn=select_model_b,
+            inputs=[current_index, all_images, all_results, preference_state],
+            outputs=[preference_state, preference_status]
+        )
+        # Optional: Add additional information
+        with gr.Accordion("About", open=False):
+            gr.Markdown("""
+            ## About this demo
+            This demo generates alt-text for uploaded images.
+            - Upload one or more images using the upload button
+            - Choose between standard mode or A/B testing mode
+            - In standard mode, select one model to generate alt-text
+            - In A/B testing mode, compare outputs from two models and select your preference
+            - Navigate through the images with the Previous and Next buttons
+            - Download CSV with all results
+            Developed by the Natural History Museum in Partnership with National Museums Liverpool.
+            """)
+    return demo
 # Launch the app
 if __name__ == "__main__":
+    app = create_demo()
+    app.launch()

library/.ipynb_checkpoints/utils_html-checkpoint.py ADDED Viewed

File without changes

library/.ipynb_checkpoints/utils_model-checkpoint.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import base64
+from openai import OpenAI
+from PIL import Image
+from io import BytesIO
+import os
+def resize_image(image_input, max_size=1024, preserve_aspect_ratio=True):
+    """
+    Resize an image while preserving aspect ratio
+    Args:
+        image_input: Either a PIL Image object or a string file path to an image
+        max_size (int): Maximum width or height (whichever is larger)
+        preserve_aspect_ratio (bool): Whether to preserve the aspect ratio
+    Returns:
+        PIL.Image: Resized image
+    """
+    # Load the image if it's a file path
+    if isinstance(image_input, str):
+        if not os.path.exists(image_input):
+            raise FileNotFoundError(f"Image file not found: {image_input}")
+        img = Image.open(image_input)
+    else:
+        # Already a PIL Image
+        img = image_input
+    # Get original dimensions
+    width, height = img.size
+    # Skip if already smaller than max_size
+    if width <= max_size and height <= max_size:
+        return img
+    if preserve_aspect_ratio:
+        # Calculate the ratio
+        if width > height:
+            new_width = max_size
+            new_height = int(height * (max_size / width))
+        else:
+            new_height = max_size
+            new_width = int(width * (max_size / height))
+    else:
+        new_width = max_size
+        new_height = max_size
+    # Resize the image
+    resized_img = img.resize((new_width, new_height), Image.LANCZOS)
+    return resized_img
+def encode_image(image_input, format="JPEG", max_size=None):
+    """
+    Convert an image to a base64 encoded string, with optional resizing.
+    Args:
+        image_input: Either a PIL Image object or a string file path to an image
+        format: Image format for saving (default: "JPEG")
+        max_size: Maximum size for the image (width or height). If None, no resizing is done.
+    Returns:
+        base64 encoded string of the image
+    """
+    # Check if input is a file path (string) or PIL Image
+    if isinstance(image_input, str):
+        # Input is a file path
+        if not os.path.exists(image_input):
+            raise FileNotFoundError(f"Image file not found: {image_input}")
+        if max_size:
+            # Load, resize, and encode
+            img = Image.open(image_input)
+            resized_img = resize_image(img, max_size=max_size)
+            buffered = BytesIO()
+            resized_img.save(buffered, format=format)
+            return base64.b64encode(buffered.getvalue()).decode("utf-8")
+        else:
+            # Read file directly without resizing
+            with open(image_input, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode("utf-8")
+    else:
+        # Input is a PIL Image object
+        if max_size:
+            image_input = resize_image(image_input, max_size=max_size)
+        buffered = BytesIO()
+        image_input.save(buffered, format=format)
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+class OpenRouterAPI:
+    def __init__(self, api_key=None,base_url="https://openrouter.ai/api/v1"):
+        """
+        Initialize the OpenRouter client
+        Args:
+            api_key (str, optional): OpenRouter API key. If None, will try to get from environment variable
+        """
+        api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+        if not api_key:
+            raise ValueError("OpenRouter API key not provided and not found in environment variables")
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url=base_url
+        )
+    def list_models(self):
+        """
+        List all available models on OpenRouter
+        Returns:
+            list: List of model IDs
+        """
+        models = self.client.models.list()
+        model_ids = [model.id for model in models.data]
+        return model_ids
+    def generate_caption(self, image_path,
+                         model="anthropic/claude-3-7-sonnet",
+                         prompt_dev="",
+                         prompt="Give a very brief description of this image.",
+                         detail="high",
+                         temperature=0.7,
+                         max_image_size=1024):
+        """
+        Generate captions for an image using OpenRouter models
+        Args:
+            image_path (str): Path to the image file
+            model (str): Model to use (e.g., 'anthropic/claude-3-7-sonnet', 'openai/gpt-4o')
+            prompt_dev (str): System prompt or developer prompt
+            prompt (str): Text prompt to guide caption generation
+            detail (str): Level of detail for image analysis ('low', 'high', etc.) - only applies to OpenAI models
+            temperature (float): Sampling temperature for generation
+            max_image_size (int): Maximum dimension of the image before encoding. Set to None to disable resizing.
+        Returns:
+            str: Generated caption
+        """
+        # Getting the Base64 string with optional resizing
+        base64_image = encode_image(image_path, max_size=max_image_size)
+        # Prepare messages based on OpenRouter's format
+        messages = []
+        # Add system message if prompt_dev is provided
+        if prompt_dev:
+            messages.append({
+                "role": "system",
+                "content": prompt_dev
+            })
+        # Add user message with text and image
+        content = [
+            {
+                "type": "text",
+                "text": prompt,
+            }
+        ]
+        # Add image with detail parameter only for OpenAI models
+        if "openai" in model.lower():
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}", "detail": detail},
+            })
+        else:
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+            })
+        messages.append({
+            "role": "user",
+            "content": content,
+        })
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+        )
+        return response.choices[0].message.content
+    def generate_text_response(self, text_prompt,
+                              model="anthropic/claude-3-5-haiku",
+                              prompt_dev="",
+                              temperature=0.7):
+        """
+        Generate responses based on text input using OpenRouter models
+        Args:
+            text_prompt (str): The text to analyze or respond to
+            model (str): Model to use (e.g., 'anthropic/claude-3-5-haiku', 'openai/gpt-4o-mini', 'google/gemini-pro')
+            prompt_dev (str): System prompt or developer prompt
+            temperature (float): Sampling temperature for generation
+        Returns:
+            str: Generated response
+        """
+        # Prepare messages based on OpenRouter's format
+        messages = []
+        # Add system message if prompt_dev is provided
+        if prompt_dev:
+            messages.append({
+                "role": "system",
+                "content": prompt_dev
+            })
+        # Add user message with text
+        messages.append({
+            "role": "user",
+            "content": text_prompt
+        })
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+        )
+        return response.choices[0].message.content
+    def classify_objs(self, image_path,
+                     categories=["Painting/sketch", "Statue/Bust", "Clothing", "Porcelain/Ceramic tableware", "Text based Document", "Other"],
+                     model="openai/gpt-4o-mini",
+                     detail="low",
+                     max_image_size=512):  # Using smaller size for classification since less detail is needed
+        """
+        Classify objects in an image into predefined categories
+        Args:
+            image_path (str): Path to the image file
+            categories (list): List of categories for classification
+            model (str): Model to use for classification
+            detail (str): Level of detail for image analysis ('low', 'high') - only applies to OpenAI models
+            max_image_size (int): Maximum dimension for the image. Can be smaller for classification tasks.
+        Returns:
+            str: Classification result
+        """
+        prompt = f"This is an image of a museum object. Classify it into one of these categories: {categories}. Only classify it if you are confident it belongs in that category and the category represents the main portion of the image, otherwise return 'Other'. Respond with only the category name."
+        return self.generate_caption(image_path, model=model, prompt=prompt, detail=detail, max_image_size=max_image_size)
+    def estimate_cost(self, model, tokens_in=1000, tokens_out=200, image=False, detail="low"):
+        """
+        Estimate the cost of using a specific model based on input/output tokens
+        Args:
+            model (str): Model identifier
+            tokens_in (int): Number of input tokens
+            tokens_out (int): Number of output tokens
+            image (bool): Whether the request includes an image
+            detail (str): Image detail level ('low', 'high')
+        Returns:
+            dict: Cost estimate information
+        """
+        # This is a simplified approach - in a real implementation,
+        # you might want to use OpenRouter's pricing API or maintain
+        # a more complete pricing table
+        # Simplified pricing mapping (in USD per 1M tokens)
+        # These are example values - please update with actual OpenRouter pricing
+        pricing = {
+            "anthropic/claude-3-7-sonnet": {"input": 15.0, "output": 75.0},
+            "anthropic/claude-3-5-haiku": {"input": 1.0, "output": 5.0},
+            "openai/gpt-4o": {"input": 10.0, "output": 30.0},
+            "openai/gpt-4o-mini": {"input": 0.2, "output": 0.6},
+            "google/gemini-pro": {"input": 0.5, "output": 1.5},
+        }
+        # Default to a moderate pricing if model not found
+        model_pricing = pricing.get(model, {"input": 5.0, "output": 15.0})
+        # Image tokens estimation
+        image_tokens = 0
+        if image:
+            if detail == "low":
+                image_tokens = 1200
+            else:  # high
+                image_tokens = 4000
+        # Calculate costs
+        input_cost = (tokens_in + image_tokens) * model_pricing["input"] / 1000000
+        output_cost = tokens_out * model_pricing["output"] / 1000000
+        total_cost = input_cost + output_cost
+        return {
+            "model": model,
+            "input_tokens": tokens_in + image_tokens,
+            "output_tokens": tokens_out,
+            "input_cost": input_cost,
+            "output_cost": output_cost,
+            "total_cost": total_cost
+        }

library/.ipynb_checkpoints/utils_prompt-checkpoint.py ADDED Viewed

	@@ -0,0 +1,76 @@

+def prompt_new(title=None):
+    if title == None:
+        title_info = {}
+    else:
+        title_info = f"(titled: {title})"
+    return f"This object {title_info} is part of a museum collection."
+def prompt_1(title=None):
+    if title == None:
+        title_info = {}
+    else:
+        title_info = f"(titled: {title})"
+    return f"The image {title_info} is part of a museum collection. Please give a short description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
+def context_prompt():
+    prompt = """
+    Additionally add a paragraph below starting with 'Additional context:' where you mention the broader context of the image. Mention additional information that will be useful when viewing this image considering race, sexuality, gender and class where relevant. This is a museum artifact and the description will be used for educational purposes.
+    So it should have this format:
+    main text
+    Additional context: additional text
+    The main text should be a maximum of 300 characters and the additional context should be a maximum of 120.
+    """
+    return prompt
+def generate_prompt(title,category='Other',max_characters=450):
+    pre_prompt = f"Keep language simple, plain and informative and limit responses to a maximum of {max_characters} characters. "
+    def main_prompt(image='image',title=''):
+        return f"This {image} (titled: {title}) is part of a museum collection. Please give a concise description of the {image} that will be used as alt-text for accessibility purposes. "
+        #Please give a concise description of the garment that will be used as alt-text to make the museum more accessible to visually impaired people. Don't discuss historical context other than what is included in the image. Don't mention the background or setting, simply focus on describing the garment itself and try to avoid using artistic interpretations and evaluative descriptions.
+    extra_prompt = "Do not explicitly state the title in the description. Do not discuss historical context other than what is included in the image. Avoid concluding statements. "
+    extra_prompt2 = "Avoid artistic interpretations and evaluative descriptions. "
+    background_prompt  = "Do not mention the background or setting, simply focus on describing the item itself. "
+    bb = "keep the description clear, concise and direct to assist visually impaired users - avoid artistic interpretations and evaluative descriptions"
+    if category == 'Clothing':
+        prompt = main_prompt(image='garment',title=title)
+        prompt += "Provide a concise, factual description of the garment, including its type, material, color, shape, notable design features, and any visible embellishments. "
+        prompt += extra_prompt
+        prompt += extra_prompt2
+    elif category == 'Statue/Bust':
+        prompt = main_prompt(image='sculpture',title=title)
+        prompt += extra_prompt
+        prompt += extra_prompt2
+        prompt += background_prompt
+    elif category == 'Painting/sketch':
+        prompt = main_prompt(image='artwork',title=title)
+        prompt += extra_prompt
+        prompt += "Focus on providing a description of the artwork including its content and also briefly its style. "
+        prompt += extra_prompt2
+    elif category == 'Porcelain/Ceramic tableware':
+        prompt = main_prompt(image='tablewear',title=title)
+        prompt += "Describe its type (e.g., plate, bowl, teacup) and notable elements of it's appearance. "
+        prompt += extra_prompt
+        prompt += extra_prompt2
+        prompt += background_prompt
+    elif category == 'Text based document':
+        prompt = main_prompt(image='image',title=title)
+        prompt = "If the text is long do not include the whole text but summarise it. "
+        prompt += extra_prompt
+        prompt += extra_prompt2
+    else:
+        #prompt = main_prompt(image='image',title=title) + extra_prompt + extra_prompt2
+        prompt = f"This image is titled: {title} and is part of a museum collection. Please give a concise description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
+    return pre_prompt + prompt

library/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (7.82 kB). View file

library/__pycache__/utils_html.cpython-311.pyc ADDED Viewed

Binary file (32.9 kB). View file

library/__pycache__/utils_model.cpython-311.pyc ADDED Viewed

Binary file (11.6 kB). View file

library/__pycache__/utils_prompt.cpython-311.pyc ADDED Viewed

Binary file (4.77 kB). View file

library/utils_html.py ADDED Viewed

	@@ -0,0 +1,840 @@

+def generate_slides_html(image_paths, image_ids, desc1, desc2, desc3, output_file='gallery_with_descriptions.html'):
+    # Start of HTML content
+    html_content = '''<!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Image Gallery</title>
+        <style>
+            body {
+                font-family: Arial, sans-serif;
+                max-width: 900px;
+                margin: 0 auto;
+                padding: 20px;
+                background: #f5f5f5;
+            }
+            .gallery-container {
+                position: relative;
+                background: white;
+                border-radius: 8px;
+                padding: 30px;
+                margin-bottom: 30px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                min-height: 700px;
+            }
+            .gallery-container img {
+                max-width: 700px;
+                max-height: 500px;
+                height: auto;
+                border-radius: 4px;
+                display: block;
+                margin: 0 auto 20px;
+            }
+            .slide {
+                display: none;
+            }
+            .slide.active {
+                display: block;
+                animation: fadeIn 0.5s;
+            }
+            @keyframes fadeIn {
+                from { opacity: 0; }
+                to { opacity: 1; }
+            }
+            .nav-buttons {
+                display: flex;
+                justify-content: space-between;
+                margin: 20px 0;
+            }
+            .nav-button {
+                padding: 10px 20px;
+                background: #007bff;
+                color: white;
+                border: none;
+                border-radius: 4px;
+                cursor: pointer;
+                font-size: 16px;
+            }
+            .nav-button:disabled {
+                background: #cccccc;
+                cursor: not-allowed;
+            }
+            .image-counter {
+                text-align: center;
+                font-weight: bold;
+                margin-bottom: 20px;
+                color: #555;
+            }
+            .description {
+                background: #f8f8f8;
+                padding: 15px;
+                margin: 15px 0;
+                border-radius: 4px;
+                white-space: pre-line;
+            }
+            .image-title {
+                font-size: 1.5em;
+                font-weight: bold;
+                margin-bottom: 15px;
+                color: #333;
+                text-align: center;
+            }
+            .model-title {
+                font-weight: bold;
+                color: #666;
+                margin-bottom: 5px;
+            }
+            .random-button {
+                padding: 10px 20px;
+                background: #28a745;
+                color: white;
+                border: none;
+                border-radius: 4px;
+                cursor: pointer;
+                font-size: 16px;
+            }
+            .random-button:hover {
+                background: #218838;
+            }
+        </style>
+    </head>
+    <body>
+        <div class="gallery-container" id="gallery">
+            <div class="image-counter">Image <span id="current-index">1</span> of <span id="total-images">0</span></div>
+            <!-- Slides will be generated here -->
+    '''
+    # Generate content for each image
+    for i in range(len(image_paths)):
+        # Process descriptions to handle line breaks
+        desc1_html = desc1[i].replace('\n', '<br>')
+        desc2_html = desc2[i].replace('\n', '<br>')
+        desc3_html = desc3[i].replace('\n', '<br>')
+        html_content += f'''
+            <div class="slide" data-image-id="{image_ids[i]}" id="slide-{i}">
+                <div class="image-title">Image ID: {image_ids[i]}</div>
+                <img src="{image_paths[i]}" alt="Image {image_ids[i]}">
+                <div class="description">
+                    <div class="model-title">Model 1</div>
+                    {desc1_html}
+                </div>
+                <div class="description">
+                    <div class="model-title">Model 2</div>
+                    {desc2_html}
+                </div>
+                <div class="description">
+                    <div class="model-title">Model 3</div>
+                    {desc3_html}
+                </div>
+            </div>
+        '''
+    html_content += '''
+            <div class="nav-buttons">
+                <button id="prev-button" class="nav-button">Previous</button>
+                <button id="next-button" class="nav-button">Next</button>
+            </div>
+        </div>
+        <script>
+        // Variables to track current slide and history
+        let currentSlide = 0;
+        const slides = document.querySelectorAll('.slide');
+        const totalSlides = slides.length;
+        const viewedSlides = new Set([0]); // Track which slides have been viewed
+        const slideHistory = [0]; // Track navigation history
+        let historyPosition = 0; // Current position in history
+        // Update total images counter
+        document.getElementById('total-images').textContent = totalSlides;
+        // Function to show a specific slide
+        function goToSlide(index) {
+            // Hide all slides
+            slides.forEach(slide => {
+                slide.classList.remove('active');
+            });
+            // Show the selected slide
+            slides[index].classList.add('active');
+            currentSlide = index;
+            // Add to viewed slides
+            viewedSlides.add(index);
+            // Update the counter
+            document.getElementById('current-index').textContent = index + 1;
+            // Update button states
+            document.getElementById('prev-button').disabled = slideHistory.length <= 1;
+        }
+        // Function to go to a random slide and track in history
+        function goToRandomSlide() {
+            // Get array of unviewed slide indices
+            const unviewedSlides = Array.from(Array(totalSlides).keys())
+                .filter(index => !viewedSlides.has(index) && index !== currentSlide);
+            // If we've seen all slides except the current one, reset
+            if (unviewedSlides.length === 0) {
+                viewedSlides.clear();
+                // Don't add current slide to viewed set so we don't repeat it immediately
+                // Recalculate unviewed slides (now all except current)
+                const allSlides = Array.from(Array(totalSlides).keys())
+                    .filter(index => index !== currentSlide);
+                // Select a random slide from all slides except current
+                const randomIndex = Math.floor(Math.random() * allSlides.length);
+                const newSlideIndex = allSlides[randomIndex];
+                // Add to history and update position
+                slideHistory.push(newSlideIndex);
+                historyPosition = slideHistory.length - 1;
+                goToSlide(newSlideIndex);
+            } else {
+                // Select a random unviewed slide
+                const randomIndex = Math.floor(Math.random() * unviewedSlides.length);
+                const newSlideIndex = unviewedSlides[randomIndex];
+                // Add to history and update position
+                slideHistory.push(newSlideIndex);
+                historyPosition = slideHistory.length - 1;
+                goToSlide(newSlideIndex);
+            }
+        }
+        // Function to go to previous slide in history
+        function goToPreviousSlide() {
+            if (slideHistory.length > 1 && historyPosition > 0) {
+                historyPosition--;
+                goToSlide(slideHistory[historyPosition]);
+            }
+        }
+        // Function for next slide (completely random, no repeats until all seen)
+        function goToNextSlide() {
+            goToRandomSlide();
+        }
+        // Function for previous slide (removed history navigation)
+        function goToPrevSlide() {
+            // No longer tracking history - just go to a random slide
+            goToRandomSlide();
+        }
+        // Initialize the first slide
+        goToSlide(0);
+        // Event listeners for navigation buttons
+        document.getElementById('next-button').addEventListener('click', goToRandomSlide);
+        document.getElementById('prev-button').addEventListener('click', goToPreviousSlide);
+        // Add keyboard navigation
+        document.addEventListener('keydown', (e) => {
+            if (e.key === 'ArrowRight' || e.key === ' ' || e.key === 'Enter') {
+                goToRandomSlide(); // Right arrow, space, or enter goes to next random
+            } else if (e.key === 'ArrowLeft') {
+                goToPreviousSlide(); // Left arrow goes to previous
+            }
+        });
+        // Initialize - disable previous button at start
+        document.getElementById('prev-button').disabled = true;
+        </script>
+    </body>
+    </html>
+    '''
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(html_content)
+    print(f"Gallery with three model outputs has been generated as {output_file}")
+def generate_rating_html(image_paths, image_ids, desc1, desc2, desc3, desc4, desc5, output_file='gallery_with_ratings.html'):
+    # Start of HTML content with linebreaks
+    html_content = '''<!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Image Gallery with Ratings</title>
+        <style>
+            body {
+                font-family: Arial, sans-serif;
+                max-width: 800px;
+                margin: 0 auto;
+                padding: 20px;
+                background: #f5f5f5;
+            }
+            .image-container {
+                background: white;
+                border-radius: 8px;
+                padding: 20px;
+                margin-bottom: 30px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }
+            .image-container img {
+                max-width: 500px;
+                height: auto;
+                border-radius: 4px;
+                display: block;
+                margin: 0 auto;
+            }
+            .description {
+                background: #f8f8f8;
+                padding: 15px;
+                margin: 10px 0;
+                border-radius: 4px;
+                white-space: pre-line; /* This helps preserve line breaks */
+            }
+            .image-title {
+                font-size: 1.5em;
+                font-weight: bold;
+                margin-bottom: 15px;
+                color: #333;
+                text-align: center;
+            }
+            .model-title {
+                font-weight: bold;
+                color: #666;
+                margin-bottom: 5px;
+            }
+            .rating {
+                display: flex;
+                align-items: center;
+                margin-top: 10px;
+                padding: 10px;
+                background: #fff;
+                border-radius: 4px;
+            }
+            .rating-label {
+                margin-right: 10px;
+                font-weight: bold;
+            }
+            .rating-group {
+                display: flex;
+                gap: 10px;
+            }
+            .rating-radio {
+                display: none;
+            }
+            .rating-button {
+                padding: 8px 12px;
+                border: 1px solid #ccc;
+                border-radius: 4px;
+                cursor: pointer;
+                transition: all 0.2s;
+            }
+            .rating-radio:checked + .rating-button {
+                background: #007bff;
+                color: white;
+                border-color: #0056b3;
+            }
+            .save-button {
+                position: fixed;
+                bottom: 20px;
+                right: 20px;
+                padding: 10px 20px;
+                background: #007bff;
+                color: white;
+                border: none;
+                border-radius: 4px;
+                cursor: pointer;
+            }
+            .save-button:disabled {
+                background: #cccccc;
+                cursor: not-allowed;
+            }
+            #images-container {
+                /* Container for all image blocks */
+            }
+        </style>
+    </head>
+    <body>
+        <div id="rater-name-container">
+            <label for="rater-name" style="font-weight: bold;">Labeller id:</label>
+            <input type="text" id="rater-name" style="margin: 10px 0; padding: 5px; width: 200px;">
+        </div>
+        <div id="images-container">
+            <!-- Image containers will be inserted here dynamically -->
+        </div>
+    '''
+    # Create JavaScript arrays for each piece of data
+    js_image_paths = []
+    js_image_ids = []
+    js_desc1 = []
+    js_desc2 = []
+    js_desc3 = []
+    js_desc4 = []
+    js_desc5 = []
+    for i in range(len(image_paths)):
+        js_image_paths.append(f'"{image_paths[i]}"')
+        js_image_ids.append(f'"{image_ids[i]}"')
+        # Process descriptions to properly handle line breaks - replace \n with <br> for HTML rendering
+        desc1_html = desc1[i].replace('"', '\\"').replace('\n', '<br>')
+        desc2_html = desc2[i].replace('"', '\\"').replace('\n', '<br>')
+        desc3_html = desc3[i].replace('"', '\\"').replace('\n', '<br>')
+        desc4_html = desc4[i].replace('"', '\\"').replace('\n', '<br>')
+        desc5_html = desc5[i].replace('"', '\\"').replace('\n', '<br>')
+        js_desc1.append(f'"{desc1_html}"')
+        js_desc2.append(f'"{desc2_html}"')
+        js_desc3.append(f'"{desc3_html}"')
+        js_desc4.append(f'"{desc4_html}"')
+        js_desc5.append(f'"{desc5_html}"')
+    # Add JavaScript to handle randomization and image display
+    html_content += f'''
+        <button onclick="saveRatings()" class="save-button" id="save-button">Save Ratings as CSV</button>
+        <script>
+        // Store all image data as separate arrays
+        const imagePaths = [{', '.join(js_image_paths)}];
+        const imageIds = [{', '.join(js_image_ids)}];
+        const desc1 = [{', '.join(js_desc1)}];
+        const desc2 = [{', '.join(js_desc2)}];
+        const desc3 = [{', '.join(js_desc3)}];
+        const desc4 = [{', '.join(js_desc4)}];
+        const desc5 = [{', '.join(js_desc5)}];
+        // Create an array of indices to shuffle for images
+        let indices = [];
+        for (let i = 0; i < imageIds.length; i++) {{
+            indices.push(i);
+        }}
+        // Function to render all images in randomized order with randomized model order
+        function renderImages() {{
+            const container = document.getElementById('images-container');
+            // Shuffle the indices (this randomizes image order)
+            shuffleArray(indices);
+            // Clear the container
+            container.innerHTML = '';
+            // Add each image in shuffled order
+            indices.forEach((originalIndex, newIndex) => {{
+                // For each image, we'll create a different random order for the models
+                let modelOrder = [1, 2, 3, 4, 5];
+                shuffleArray(modelOrder);
+                // Start building the image container HTML
+                let imageHtml = `
+                <div class="image-container" data-image-id="${{imageIds[originalIndex]}}">
+                    <div class="image-title">Image ID: ${{imageIds[originalIndex]}}</div>
+                    <img src="${{imagePaths[originalIndex]}}" alt="Image ${{imageIds[originalIndex]}}">
+                `;
+                // Add descriptions and rating UI for each model in the randomized order
+                modelOrder.forEach((modelNum, modelIndex) => {{
+                    // Get the description data for this model
+                    let descData;
+                    if (modelNum === 1) descData = desc1[originalIndex];
+                    else if (modelNum === 2) descData = desc2[originalIndex];
+                    else if (modelNum === 3) descData = desc3[originalIndex];
+                    else if (modelNum === 4) descData = desc4[originalIndex];
+                    else if (modelNum === 5) descData = desc5[originalIndex];
+                    // Create HTML for this model's description and rating
+                    imageHtml += `
+                    <div class="description">
+                        <div class="model-title">Model ${{modelNum}}</div>
+                        ${{descData}}
+                        <div class="rating" data-model="${{modelNum}}">
+                            <span class="rating-label">Rating:</span>
+                            <div class="rating-group">
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="1" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-1">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-1">1</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="2" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-2">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-2">2</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="3" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-3">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-3">3</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="4" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-4">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-4">4</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="5" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-5">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-5">5</label>
+                            </div>
+                        </div>
+                    </div>`;
+                }});
+                // Close the image container div
+                imageHtml += `</div>`;
+                // Add the complete HTML for this image to the page
+                container.innerHTML += imageHtml;
+            }});
+        }}
+        // Fisher-Yates shuffle algorithm
+        function shuffleArray(array) {{
+            for (let i = array.length - 1; i > 0; i--) {{
+                const j = Math.floor(Math.random() * (i + 1));
+                [array[i], array[j]] = [array[j], array[i]];
+            }}
+            return array;
+        }}
+        function saveRatings() {{
+            const labellerId = document.getElementById('rater-name').value.trim();
+            if (!labellerId) {{
+                alert('Please enter a Labeller ID before saving');
+                return;
+            }}
+            const ratings = [];
+            // Collect all ratings
+            document.querySelectorAll('.rating-radio:checked').forEach(radio => {{
+                ratings.push({{
+                    model: radio.dataset.model,
+                    image_id: radio.dataset.image,
+                    rating: radio.value
+                }});
+            }});
+            // Convert to CSV
+            const headers = ['model', 'image_id', 'rating'];
+            const csvContent = [
+                headers.join(','),
+                ...ratings.map(row => [
+                    row.model,
+                    row.image_id,
+                    row.rating
+                ].join(','))
+            ].join('\\n');
+            // Create and trigger download with labeller ID in filename
+            const blob = new Blob([csvContent], {{ type: 'text/csv;charset=utf-8;' }});
+            const link = document.createElement('a');
+            link.href = URL.createObjectURL(blob);
+            link.download = `ratings_${{labellerId}}.csv`;
+            link.click();
+        }}
+        // Add event listener to enable/disable save button based on labeller ID
+        document.getElementById('rater-name').addEventListener('input', function() {{
+            const saveButton = document.getElementById('save-button');
+            saveButton.disabled = !this.value.trim();
+        }});
+        // Initially disable save button
+        document.getElementById('save-button').disabled = true;
+        // Render images when the page loads
+        document.addEventListener('DOMContentLoaded', function() {{
+            renderImages();
+        }});
+        </script>
+    </body>
+    </html>
+    '''
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(html_content)
+    print(f"Rating form with 5 models has been generated as {output_file}")
+def generate_rating_html4(image_paths, image_ids, desc1, desc2, desc3, desc4, output_file='gallery_with_ratings.html'):
+    # Start of HTML content
+    html_content = '''<!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Image Gallery with Ratings</title>
+        <style>
+            body {
+                font-family: Arial, sans-serif;
+                max-width: 800px;
+                margin: 0 auto;
+                padding: 20px;
+                background: #f5f5f5;
+            }
+            .image-container {
+                background: white;
+                border-radius: 8px;
+                padding: 20px;
+                margin-bottom: 30px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }
+            .image-container img {
+                max-width: 500px;
+                height: auto;
+                border-radius: 4px;
+                display: block;
+                margin: 0 auto;
+            }
+            .description {
+                background: #f8f8f8;
+                padding: 15px;
+                margin: 10px 0;
+                border-radius: 4px;
+                white-space: pre-line; /* This helps preserve line breaks */
+            }
+            .image-title {
+                font-size: 1.5em;
+                font-weight: bold;
+                margin-bottom: 15px;
+                color: #333;
+                text-align: center;
+            }
+            .model-title {
+                font-weight: bold;
+                color: #666;
+                margin-bottom: 5px;
+            }
+            .rating {
+                display: flex;
+                align-items: center;
+                margin-top: 10px;
+                padding: 10px;
+                background: #fff;
+                border-radius: 4px;
+            }
+            .rating-label {
+                margin-right: 10px;
+                font-weight: bold;
+            }
+            .rating-group {
+                display: flex;
+                gap: 10px;
+            }
+            .rating-radio {
+                display: none;
+            }
+            .rating-button {
+                padding: 8px 12px;
+                border: 1px solid #ccc;
+                border-radius: 4px;
+                cursor: pointer;
+                transition: all 0.2s;
+            }
+            .rating-radio:checked + .rating-button {
+                background: #007bff;
+                color: white;
+                border-color: #0056b3;
+            }
+            .save-button {
+                position: fixed;
+                bottom: 20px;
+                right: 20px;
+                padding: 10px 20px;
+                background: #007bff;
+                color: white;
+                border: none;
+                border-radius: 4px;
+                cursor: pointer;
+            }
+            .save-button:disabled {
+                background: #cccccc;
+                cursor: not-allowed;
+            }
+            #images-container {
+                /* Container for all image blocks */
+            }
+        </style>
+    </head>
+    <body>
+        <div id="rater-name-container">
+            <label for="rater-name" style="font-weight: bold;">Labeller id:</label>
+            <input type="text" id="rater-name" style="margin: 10px 0; padding: 5px; width: 200px;">
+        </div>
+        <div id="images-container">
+            <!-- Image containers will be inserted here dynamically -->
+        </div>
+    '''
+    # Create JavaScript arrays for each piece of data
+    js_image_paths = []
+    js_image_ids = []
+    js_desc1 = []
+    js_desc2 = []
+    js_desc3 = []
+    js_desc4 = []
+    for i in range(len(image_paths)):
+        js_image_paths.append(f'"{image_paths[i]}"')
+        js_image_ids.append(f'"{image_ids[i]}"')
+        # Process descriptions to properly handle line breaks - replace \n with <br> for HTML rendering
+        desc1_html = desc1[i].replace('"', '\\"').replace('\n', '<br>')
+        desc2_html = desc2[i].replace('"', '\\"').replace('\n', '<br>')
+        desc3_html = desc3[i].replace('"', '\\"').replace('\n', '<br>')
+        desc4_html = desc4[i].replace('"', '\\"').replace('\n', '<br>')
+        js_desc1.append(f'"{desc1_html}"')
+        js_desc2.append(f'"{desc2_html}"')
+        js_desc3.append(f'"{desc3_html}"')
+        js_desc4.append(f'"{desc4_html}"')
+    # Add JavaScript to handle randomization and image display
+    html_content += f'''
+        <button onclick="saveRatings()" class="save-button" id="save-button">Save Ratings as CSV</button>
+        <script>
+        // Store all image data as separate arrays
+        const imagePaths = [{', '.join(js_image_paths)}];
+        const imageIds = [{', '.join(js_image_ids)}];
+        const desc1 = [{', '.join(js_desc1)}];
+        const desc2 = [{', '.join(js_desc2)}];
+        const desc3 = [{', '.join(js_desc3)}];
+        const desc4 = [{', '.join(js_desc4)}];
+        // Create an array of indices to shuffle for images
+        let indices = [];
+        for (let i = 0; i < imageIds.length; i++) {{
+            indices.push(i);
+        }}
+        // Function to render all images in randomized order with randomized model order
+        function renderImages() {{
+            const container = document.getElementById('images-container');
+            // Shuffle the indices (this randomizes image order)
+            shuffleArray(indices);
+            // Clear the container
+            container.innerHTML = '';
+            // Add each image in shuffled order
+            indices.forEach((originalIndex, newIndex) => {{
+                // For each image, we'll create a different random order for the models
+                let modelOrder = [1, 2, 3, 4];
+                shuffleArray(modelOrder);
+                // Start building the image container HTML
+                let imageHtml = `
+                <div class="image-container" data-image-id="${{imageIds[originalIndex]}}">
+                    <div class="image-title">Image ID: ${{imageIds[originalIndex]}}</div>
+                    <img src="${{imagePaths[originalIndex]}}" alt="Image ${{imageIds[originalIndex]}}">
+                `;
+                // Add descriptions and rating UI for each model in the randomized order
+                modelOrder.forEach((modelNum, modelIndex) => {{
+                    // Get the description data for this model
+                    let descData;
+                    if (modelNum === 1) descData = desc1[originalIndex];
+                    else if (modelNum === 2) descData = desc2[originalIndex];
+                    else if (modelNum === 3) descData = desc3[originalIndex];
+                    else if (modelNum === 4) descData = desc4[originalIndex];
+                    // Create HTML for this model's description and rating
+                    imageHtml += `
+                    <div class="description">
+                        <div class="model-title">Model ${{modelNum}}</div>
+                        ${{descData}}
+                        <div class="rating" data-model="${{modelNum}}">
+                            <span class="rating-label">Rating:</span>
+                            <div class="rating-group">
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="1" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-1">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-1">1</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="2" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-2">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-2">2</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="3" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-3">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-3">3</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="4" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-4">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-4">4</label>
+                                <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="5" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-5">
+                                <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-5">5</label>
+                            </div>
+                        </div>
+                    </div>`;
+                }});
+                // Close the image container div
+                imageHtml += `</div>`;
+                // Add the complete HTML for this image to the page
+                container.innerHTML += imageHtml;
+            }});
+        }}
+        // Fisher-Yates shuffle algorithm
+        function shuffleArray(array) {{
+            for (let i = array.length - 1; i > 0; i--) {{
+                const j = Math.floor(Math.random() * (i + 1));
+                [array[i], array[j]] = [array[j], array[i]];
+            }}
+            return array;
+        }}
+        function saveRatings() {{
+            const labellerId = document.getElementById('rater-name').value.trim();
+            if (!labellerId) {{
+                alert('Please enter a Labeller ID before saving');
+                return;
+            }}
+            const ratings = [];
+            // Collect all ratings
+            document.querySelectorAll('.rating-radio:checked').forEach(radio => {{
+                ratings.push({{
+                    model: radio.dataset.model,
+                    image_id: radio.dataset.image,
+                    rating: radio.value
+                }});
+            }});
+            // Convert to CSV
+            const headers = ['model', 'image_id', 'rating'];
+            const csvContent = [
+                headers.join(','),
+                ...ratings.map(row => [
+                    row.model,
+                    row.image_id,
+                    row.rating
+                ].join(','))
+            ].join('\\n');
+            // Create and trigger download with labeller ID in filename
+            const blob = new Blob([csvContent], {{ type: 'text/csv;charset=utf-8;' }});
+            const link = document.createElement('a');
+            link.href = URL.createObjectURL(blob);
+            link.download = `ratings_${{labellerId}}.csv`;
+            link.click();
+        }}
+        // Add event listener to enable/disable save button based on labeller ID
+        document.getElementById('rater-name').addEventListener('input', function() {{
+            const saveButton = document.getElementById('save-button');
+            saveButton.disabled = !this.value.trim();
+        }});
+        // Initially disable save button
+        document.getElementById('save-button').disabled = true;
+        // Render images when the page loads
+        document.addEventListener('DOMContentLoaded', function() {{
+            renderImages();
+        }});
+        </script>
+    </body>
+    </html>
+    '''
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(html_content)
+    print(f"Rating form with 4 models has been generated as {output_file}")

library/utils_model.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import base64
+from openai import OpenAI
+from PIL import Image
+from io import BytesIO
+import os
+def resize_image(image_input, max_size=1024, preserve_aspect_ratio=True):
+    """
+    Resize an image while preserving aspect ratio
+    Args:
+        image_input: Either a PIL Image object or a string file path to an image
+        max_size (int): Maximum width or height (whichever is larger)
+        preserve_aspect_ratio (bool): Whether to preserve the aspect ratio
+    Returns:
+        PIL.Image: Resized image
+    """
+    # Load the image if it's a file path
+    if isinstance(image_input, str):
+        if not os.path.exists(image_input):
+            raise FileNotFoundError(f"Image file not found: {image_input}")
+        img = Image.open(image_input)
+    else:
+        # Already a PIL Image
+        img = image_input
+    # Get original dimensions
+    width, height = img.size
+    # Skip if already smaller than max_size
+    if width <= max_size and height <= max_size:
+        return img
+    if preserve_aspect_ratio:
+        # Calculate the ratio
+        if width > height:
+            new_width = max_size
+            new_height = int(height * (max_size / width))
+        else:
+            new_height = max_size
+            new_width = int(width * (max_size / height))
+    else:
+        new_width = max_size
+        new_height = max_size
+    # Resize the image
+    resized_img = img.resize((new_width, new_height), Image.LANCZOS)
+    return resized_img
+def encode_image(image_input, format="JPEG", max_size=None):
+    """
+    Convert an image to a base64 encoded string, with optional resizing.
+    Args:
+        image_input: Either a PIL Image object or a string file path to an image
+        format: Image format for saving (default: "JPEG")
+        max_size: Maximum size for the image (width or height). If None, no resizing is done.
+    Returns:
+        base64 encoded string of the image
+    """
+    # Check if input is a file path (string) or PIL Image
+    if isinstance(image_input, str):
+        # Input is a file path
+        if not os.path.exists(image_input):
+            raise FileNotFoundError(f"Image file not found: {image_input}")
+        if max_size:
+            # Load, resize, and encode
+            img = Image.open(image_input)
+            resized_img = resize_image(img, max_size=max_size)
+            buffered = BytesIO()
+            resized_img.save(buffered, format=format)
+            return base64.b64encode(buffered.getvalue()).decode("utf-8")
+        else:
+            # Read file directly without resizing
+            with open(image_input, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode("utf-8")
+    else:
+        # Input is a PIL Image object
+        if max_size:
+            image_input = resize_image(image_input, max_size=max_size)
+        buffered = BytesIO()
+        image_input.save(buffered, format=format)
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+class OpenRouterAPI:
+    def __init__(self, api_key=None,base_url="https://openrouter.ai/api/v1"):
+        """
+        Initialize the OpenRouter client
+        Args:
+            api_key (str, optional): OpenRouter API key. If None, will try to get from environment variable
+        """
+        api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+        if not api_key:
+            raise ValueError("OpenRouter API key not provided and not found in environment variables")
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url=base_url
+        )
+    def list_models(self):
+        """
+        List all available models on OpenRouter
+        Returns:
+            list: List of model IDs
+        """
+        models = self.client.models.list()
+        model_ids = [model.id for model in models.data]
+        return model_ids
+    def generate_caption(self, image_path,
+                         model="anthropic/claude-3-7-sonnet",
+                         prompt_dev="",
+                         prompt="Give a very brief description of this image.",
+                         detail="high",
+                         temperature=0.7,
+                         max_image_size=1024):
+        """
+        Generate captions for an image using OpenRouter models
+        Args:
+            image_path (str): Path to the image file
+            model (str): Model to use (e.g., 'anthropic/claude-3-7-sonnet', 'openai/gpt-4o')
+            prompt_dev (str): System prompt or developer prompt
+            prompt (str): Text prompt to guide caption generation
+            detail (str): Level of detail for image analysis ('low', 'high', etc.) - only applies to OpenAI models
+            temperature (float): Sampling temperature for generation
+            max_image_size (int): Maximum dimension of the image before encoding. Set to None to disable resizing.
+        Returns:
+            str: Generated caption
+        """
+        # Getting the Base64 string with optional resizing
+        base64_image = encode_image(image_path, max_size=max_image_size)
+        # Prepare messages based on OpenRouter's format
+        messages = []
+        # Add system message if prompt_dev is provided
+        if prompt_dev:
+            messages.append({
+                "role": "system",
+                "content": prompt_dev
+            })
+        # Add user message with text and image
+        content = [
+            {
+                "type": "text",
+                "text": prompt,
+            }
+        ]
+        # Add image with detail parameter only for OpenAI models
+        if "openai" in model.lower():
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}", "detail": detail},
+            })
+        else:
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+            })
+        messages.append({
+            "role": "user",
+            "content": content,
+        })
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+        )
+        return response.choices[0].message.content
+    def generate_text_response(self, text_prompt,
+                              model="anthropic/claude-3-5-haiku",
+                              prompt_dev="",
+                              temperature=0.7):
+        """
+        Generate responses based on text input using OpenRouter models
+        Args:
+            text_prompt (str): The text to analyze or respond to
+            model (str): Model to use (e.g., 'anthropic/claude-3-5-haiku', 'openai/gpt-4o-mini', 'google/gemini-pro')
+            prompt_dev (str): System prompt or developer prompt
+            temperature (float): Sampling temperature for generation
+        Returns:
+            str: Generated response
+        """
+        # Prepare messages based on OpenRouter's format
+        messages = []
+        # Add system message if prompt_dev is provided
+        if prompt_dev:
+            messages.append({
+                "role": "system",
+                "content": prompt_dev
+            })
+        # Add user message with text
+        messages.append({
+            "role": "user",
+            "content": text_prompt
+        })
+        response = self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+        )
+        return response.choices[0].message.content
+    def classify_objs(self, image_path,
+                     categories=["Painting/sketch", "Statue/Bust", "Clothing", "Porcelain/Ceramic tableware", "Text based Document", "Other"],
+                     model="openai/gpt-4o-mini",
+                     detail="low",
+                     max_image_size=512):  # Using smaller size for classification since less detail is needed
+        """
+        Classify objects in an image into predefined categories
+        Args:
+            image_path (str): Path to the image file
+            categories (list): List of categories for classification
+            model (str): Model to use for classification
+            detail (str): Level of detail for image analysis ('low', 'high') - only applies to OpenAI models
+            max_image_size (int): Maximum dimension for the image. Can be smaller for classification tasks.
+        Returns:
+            str: Classification result
+        """
+        prompt = f"This is an image of a museum object. Classify it into one of these categories: {categories}. Only classify it if you are confident it belongs in that category and the category represents the main portion of the image, otherwise return 'Other'. Respond with only the category name."
+        return self.generate_caption(image_path, model=model, prompt=prompt, detail=detail, max_image_size=max_image_size)
+    def estimate_cost(self, model, tokens_in=1000, tokens_out=200, image=False, detail="low"):
+        """
+        Estimate the cost of using a specific model based on input/output tokens
+        Args:
+            model (str): Model identifier
+            tokens_in (int): Number of input tokens
+            tokens_out (int): Number of output tokens
+            image (bool): Whether the request includes an image
+            detail (str): Image detail level ('low', 'high')
+        Returns:
+            dict: Cost estimate information
+        """
+        # This is a simplified approach - in a real implementation,
+        # you might want to use OpenRouter's pricing API or maintain
+        # a more complete pricing table
+        # Simplified pricing mapping (in USD per 1M tokens)
+        # These are example values - please update with actual OpenRouter pricing
+        pricing = {
+            "anthropic/claude-3-7-sonnet": {"input": 15.0, "output": 75.0},
+            "anthropic/claude-3-5-haiku": {"input": 1.0, "output": 5.0},
+            "openai/gpt-4o": {"input": 10.0, "output": 30.0},
+            "openai/gpt-4o-mini": {"input": 0.2, "output": 0.6},
+            "google/gemini-pro": {"input": 0.5, "output": 1.5},
+        }
+        # Default to a moderate pricing if model not found
+        model_pricing = pricing.get(model, {"input": 5.0, "output": 15.0})
+        # Image tokens estimation
+        image_tokens = 0
+        if image:
+            if detail == "low":
+                image_tokens = 1200
+            else:  # high
+                image_tokens = 4000
+        # Calculate costs
+        input_cost = (tokens_in + image_tokens) * model_pricing["input"] / 1000000
+        output_cost = tokens_out * model_pricing["output"] / 1000000
+        total_cost = input_cost + output_cost
+        return {
+            "model": model,
+            "input_tokens": tokens_in + image_tokens,
+            "output_tokens": tokens_out,
+            "input_cost": input_cost,
+            "output_cost": output_cost,
+            "total_cost": total_cost
+        }

library/utils_prompt.py ADDED Viewed

	@@ -0,0 +1,76 @@

+def prompt_new(title=None):
+    if title == None:
+        title_info = {}
+    else:
+        title_info = f"(titled: {title})"
+    return f"This object {title_info} is part of a museum collection."
+def prompt_1(title=None):
+    if title == None:
+        title_info = {}
+    else:
+        title_info = f"(titled: {title})"
+    return f"The image {title_info} is part of a museum collection. Please give a short description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
+def context_prompt():
+    prompt = """
+    Additionally add a paragraph below starting with 'Additional context:' where you mention the broader context of the image. Mention additional information that will be useful when viewing this image considering race, sexuality, gender and class where relevant. This is a museum artifact and the description will be used for educational purposes.
+    So it should have this format:
+    main text
+    Additional context: additional text
+    The main text should be a maximum of 300 characters and the additional context should be a maximum of 120.
+    """
+    return prompt
+def generate_prompt(title,category='Other',max_characters=450):
+    pre_prompt = f"Keep language simple, plain and informative and limit responses to a maximum of {max_characters} characters. "
+    def main_prompt(image='image',title=''):
+        return f"This {image} (titled: {title}) is part of a museum collection. Please give a concise description of the {image} that will be used as alt-text for accessibility purposes. "
+        #Please give a concise description of the garment that will be used as alt-text to make the museum more accessible to visually impaired people. Don't discuss historical context other than what is included in the image. Don't mention the background or setting, simply focus on describing the garment itself and try to avoid using artistic interpretations and evaluative descriptions.
+    extra_prompt = "Do not explicitly state the title in the description. Do not discuss historical context other than what is included in the image. Avoid concluding statements. "
+    extra_prompt2 = "Avoid artistic interpretations and evaluative descriptions. "
+    background_prompt  = "Do not mention the background or setting, simply focus on describing the item itself. "
+    bb = "keep the description clear, concise and direct to assist visually impaired users - avoid artistic interpretations and evaluative descriptions"
+    if category == 'Clothing':
+        prompt = main_prompt(image='garment',title=title)
+        prompt += "Provide a concise, factual description of the garment, including its type, material, color, shape, notable design features, and any visible embellishments. "
+        prompt += extra_prompt
+        prompt += extra_prompt2
+    elif category == 'Statue/Bust':
+        prompt = main_prompt(image='sculpture',title=title)
+        prompt += extra_prompt
+        prompt += extra_prompt2
+        prompt += background_prompt
+    elif category == 'Painting/sketch':
+        prompt = main_prompt(image='artwork',title=title)
+        prompt += extra_prompt
+        prompt += "Focus on providing a description of the artwork including its content and also briefly its style. "
+        prompt += extra_prompt2
+    elif category == 'Porcelain/Ceramic tableware':
+        prompt = main_prompt(image='tablewear',title=title)
+        prompt += "Describe its type (e.g., plate, bowl, teacup) and notable elements of it's appearance. "
+        prompt += extra_prompt
+        prompt += extra_prompt2
+        prompt += background_prompt
+    elif category == 'Text based document':
+        prompt = main_prompt(image='image',title=title)
+        prompt = "If the text is long do not include the whole text but summarise it. "
+        prompt += extra_prompt
+        prompt += extra_prompt2
+    else:
+        #prompt = main_prompt(image='image',title=title) + extra_prompt + extra_prompt2
+        prompt = f"This image is titled: {title} and is part of a museum collection. Please give a concise description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
+    return pre_prompt + prompt

requirements.txt CHANGED Viewed

@@ -1,2 +1,6 @@
 gradio==5.24.0
-pillow

 gradio==5.24.0
+numpy>=1.24.0
+Pillow>=10.0.0
+requests>=2.28.0
+python-dotenv>=1.0.0
+openai>=1.0.0