Spaces:

NHMUK
/

MATCHA

Sleeping

File size: 11,087 Bytes

import base64
from openai import OpenAI
from PIL import Image
from io import BytesIO
import os

def resize_image(image_input, max_size=1024, preserve_aspect_ratio=True):
    """
    Resize an image while preserving aspect ratio
    
    Args:
        image_input: Either a PIL Image object or a string file path to an image
        max_size (int): Maximum width or height (whichever is larger)
        preserve_aspect_ratio (bool): Whether to preserve the aspect ratio
        
    Returns:
        PIL.Image: Resized image
    """
    # Load the image if it's a file path
    if isinstance(image_input, str):
        if not os.path.exists(image_input):
            raise FileNotFoundError(f"Image file not found: {image_input}")
        img = Image.open(image_input)
    else:
        # Already a PIL Image
        img = image_input
        
    # Get original dimensions
    width, height = img.size
    
    # Skip if already smaller than max_size
    if width <= max_size and height <= max_size:
        return img
    
    if preserve_aspect_ratio:
        # Calculate the ratio
        if width > height:
            new_width = max_size
            new_height = int(height * (max_size / width))
        else:
            new_height = max_size
            new_width = int(width * (max_size / height))
    else:
        new_width = max_size
        new_height = max_size
        
    # Resize the image
    resized_img = img.resize((new_width, new_height), Image.LANCZOS)
    return resized_img

def encode_image(image_input, format="JPEG", max_size=None):
    """
    Convert an image to a base64 encoded string, with optional resizing.
    
    Args:
        image_input: Either a PIL Image object or a string file path to an image
        format: Image format for saving (default: "JPEG")
        max_size: Maximum size for the image (width or height). If None, no resizing is done.
        
    Returns:
        base64 encoded string of the image
    """
    # Check if input is a file path (string) or PIL Image
    if isinstance(image_input, str):
        # Input is a file path
        if not os.path.exists(image_input):
            raise FileNotFoundError(f"Image file not found: {image_input}")
        
        if max_size:
            # Load, resize, and encode
            img = Image.open(image_input)
            resized_img = resize_image(img, max_size=max_size)
            buffered = BytesIO()
            resized_img.save(buffered, format=format)
            return base64.b64encode(buffered.getvalue()).decode("utf-8")
        else:
            # Read file directly without resizing
            with open(image_input, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
    else:
        # Input is a PIL Image object
        if max_size:
            image_input = resize_image(image_input, max_size=max_size)
            
        buffered = BytesIO()
        image_input.save(buffered, format=format)
        return base64.b64encode(buffered.getvalue()).decode("utf-8")

class OpenRouterAPI:
    def __init__(self, api_key=None,base_url="https://openrouter.ai/api/v1"):
        """
        Initialize the OpenRouter client
        
        Args:
            api_key (str, optional): OpenRouter API key. If None, will try to get from environment variable
        """
        api_key = api_key or os.getenv("OPENROUTER_API_KEY")
        if not api_key:
            raise ValueError("OpenRouter API key not provided and not found in environment variables")
        
        self.client = OpenAI(
            api_key=api_key,
            base_url=base_url
        )
    
    def list_models(self):
        """
        List all available models on OpenRouter
        
        Returns:
            list: List of model IDs
        """
        models = self.client.models.list()
        model_ids = [model.id for model in models.data]
        return model_ids

    def generate_caption(self, image_path, 
                     model="anthropic/claude-3-7-sonnet", 
                     prompt_dev="",
                     prompt="Give a very brief description of this image.", 
                     detail="high", 
                     temperature=0.7,
                     max_image_size=1024):
        """
        Generate captions for an image using OpenRouter models
        
        Args:
            image_path (str): Path to the image file
            model (str): Model to use (e.g., 'anthropic/claude-3-7-sonnet', 'openai/gpt-4o')
            prompt_dev (str): System prompt or developer prompt
            prompt (str): Text prompt to guide caption generation
            detail (str): Level of detail for image analysis ('low', 'high', etc.) - only applies to OpenAI models
            temperature (float): Sampling temperature for generation
            max_image_size (int): Maximum dimension of the image before encoding. Set to None to disable resizing.
            
        Returns:
            str: Generated caption
        """
        # Getting the Base64 string with optional resizing
        base64_image = encode_image(image_path, max_size=max_image_size)
        
        # Prepare messages based on OpenRouter's format
        messages = []
        
        # Add system message if prompt_dev is provided
        if prompt_dev:
            messages.append({
                "role": "system",
                "content": prompt_dev
            })
        
        # Add user message with text and image
        content = [
            {
                "type": "text",
                "text": prompt,
            }
        ]
        
        content.append({
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
            })
            
        messages.append({
            "role": "user",
            "content": content,
            "provider": {
                "data_collection": "deny"
            }
        })
        
        response = self.client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature
        )
        
        return response.choices[0].message.content
    
    def generate_text_response(self, text_prompt, 
                              model="anthropic/claude-3-5-haiku", 
                              prompt_dev="", 
                              temperature=0.7):
        """
        Generate responses based on text input using OpenRouter models
        
        Args:
            text_prompt (str): The text to analyze or respond to
            model (str): Model to use (e.g., 'anthropic/claude-3-5-haiku', 'openai/gpt-4o-mini', 'google/gemini-pro')
            prompt_dev (str): System prompt or developer prompt
            temperature (float): Sampling temperature for generation
            
        Returns:
            str: Generated response
        """
        # Prepare messages based on OpenRouter's format
        messages = []
        
        # Add system message if prompt_dev is provided
        if prompt_dev:
            messages.append({
                "role": "system",
                "content": prompt_dev
            })
        
        # Add user message with text
        messages.append({
            "role": "user",
            "content": text_prompt
        })
        
        response = self.client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature,
        )
        
        return response.choices[0].message.content

    def classify_objs(self, image_path, 
                     categories=["Painting/sketch", "Statue/Bust", "Clothing", "Porcelain/Ceramic tableware", "Text based Document", "Other"],
                     model="openai/gpt-4o-mini", 
                     detail="low",
                     max_image_size=512):  # Using smaller size for classification since less detail is needed
        """
        Classify objects in an image into predefined categories
        
        Args:
            image_path (str): Path to the image file
            categories (list): List of categories for classification
            model (str): Model to use for classification
            detail (str): Level of detail for image analysis ('low', 'high') - only applies to OpenAI models
            max_image_size (int): Maximum dimension for the image. Can be smaller for classification tasks.
            
        Returns:
            str: Classification result
        """
        prompt = f"This is an image of a museum object. Classify it into one of these categories: {categories}. Only classify it if you are confident it belongs in that category and the category represents the main portion of the image, otherwise return 'Other'. Respond with only the category name."   
        return self.generate_caption(image_path, model=model, prompt=prompt, detail=detail, max_image_size=max_image_size)

    def estimate_cost(self, model, tokens_in=1000, tokens_out=200, image=False, detail="low"):
        """
        Estimate the cost of using a specific model based on input/output tokens
        
        Args:
            model (str): Model identifier
            tokens_in (int): Number of input tokens
            tokens_out (int): Number of output tokens
            image (bool): Whether the request includes an image
            detail (str): Image detail level ('low', 'high')
            
        Returns:
            dict: Cost estimate information
        """
        # This is a simplified approach - in a real implementation,
        # you might want to use OpenRouter's pricing API or maintain
        # a more complete pricing table
        
        # Simplified pricing mapping (in USD per 1M tokens)
        # These are example values - please update with actual OpenRouter pricing
        pricing = {
            "anthropic/claude-3-7-sonnet": {"input": 15.0, "output": 75.0},
            "anthropic/claude-3-5-haiku": {"input": 1.0, "output": 5.0},
            "openai/gpt-4o": {"input": 10.0, "output": 30.0}, 
            "openai/gpt-4o-mini": {"input": 0.2, "output": 0.6},
            "google/gemini-pro": {"input": 0.5, "output": 1.5},
        }
        
        # Default to a moderate pricing if model not found
        model_pricing = pricing.get(model, {"input": 5.0, "output": 15.0})
        
        # Image tokens estimation
        image_tokens = 0
        if image:
            if detail == "low":
                image_tokens = 1200
            else:  # high
                image_tokens = 4000
        
        # Calculate costs
        input_cost = (tokens_in + image_tokens) * model_pricing["input"] / 1000000
        output_cost = tokens_out * model_pricing["output"] / 1000000
        total_cost = input_cost + output_cost
        
        return {
            "model": model,
            "input_tokens": tokens_in + image_tokens,
            "output_tokens": tokens_out,
            "input_cost": input_cost,
            "output_cost": output_cost,
            "total_cost": total_cost
        }