| | import base64 |
| | from openai import OpenAI |
| | from PIL import Image |
| | from io import BytesIO |
| | import os |
| |
|
| | def resize_image(image_input, max_size=1024, preserve_aspect_ratio=True): |
| | """ |
| | Resize an image while preserving aspect ratio |
| | |
| | Args: |
| | image_input: Either a PIL Image object or a string file path to an image |
| | max_size (int): Maximum width or height (whichever is larger) |
| | preserve_aspect_ratio (bool): Whether to preserve the aspect ratio |
| | |
| | Returns: |
| | PIL.Image: Resized image |
| | """ |
| | |
| | if isinstance(image_input, str): |
| | if not os.path.exists(image_input): |
| | raise FileNotFoundError(f"Image file not found: {image_input}") |
| | img = Image.open(image_input) |
| | else: |
| | |
| | img = image_input |
| | |
| | |
| | width, height = img.size |
| | |
| | |
| | if width <= max_size and height <= max_size: |
| | return img |
| | |
| | if preserve_aspect_ratio: |
| | |
| | if width > height: |
| | new_width = max_size |
| | new_height = int(height * (max_size / width)) |
| | else: |
| | new_height = max_size |
| | new_width = int(width * (max_size / height)) |
| | else: |
| | new_width = max_size |
| | new_height = max_size |
| | |
| | |
| | resized_img = img.resize((new_width, new_height), Image.LANCZOS) |
| | return resized_img |
| |
|
| | def encode_image(image_input, format="JPEG", max_size=None): |
| | """ |
| | Convert an image to a base64 encoded string, with optional resizing. |
| | |
| | Args: |
| | image_input: Either a PIL Image object or a string file path to an image |
| | format: Image format for saving (default: "JPEG") |
| | max_size: Maximum size for the image (width or height). If None, no resizing is done. |
| | |
| | Returns: |
| | base64 encoded string of the image |
| | """ |
| | |
| | if isinstance(image_input, str): |
| | |
| | if not os.path.exists(image_input): |
| | raise FileNotFoundError(f"Image file not found: {image_input}") |
| | |
| | if max_size: |
| | |
| | img = Image.open(image_input) |
| | resized_img = resize_image(img, max_size=max_size) |
| | buffered = BytesIO() |
| | resized_img.save(buffered, format=format) |
| | return base64.b64encode(buffered.getvalue()).decode("utf-8") |
| | else: |
| | |
| | with open(image_input, "rb") as image_file: |
| | return base64.b64encode(image_file.read()).decode("utf-8") |
| | else: |
| | |
| | if max_size: |
| | image_input = resize_image(image_input, max_size=max_size) |
| | |
| | buffered = BytesIO() |
| | image_input.save(buffered, format=format) |
| | return base64.b64encode(buffered.getvalue()).decode("utf-8") |
| |
|
| | class OpenRouterAPI: |
| | def __init__(self, api_key=None,base_url="https://openrouter.ai/api/v1"): |
| | """ |
| | Initialize the OpenRouter client |
| | |
| | Args: |
| | api_key (str, optional): OpenRouter API key. If None, will try to get from environment variable |
| | """ |
| | api_key = api_key or os.getenv("OPENROUTER_API_KEY") |
| | if not api_key: |
| | raise ValueError("OpenRouter API key not provided and not found in environment variables") |
| | |
| | self.client = OpenAI( |
| | api_key=api_key, |
| | base_url=base_url |
| | ) |
| | |
| | def list_models(self): |
| | """ |
| | List all available models on OpenRouter |
| | |
| | Returns: |
| | list: List of model IDs |
| | """ |
| | models = self.client.models.list() |
| | model_ids = [model.id for model in models.data] |
| | return model_ids |
| |
|
| | def generate_caption(self, image_path, |
| | model="anthropic/claude-3-7-sonnet", |
| | prompt_dev="", |
| | prompt="Give a very brief description of this image.", |
| | detail="high", |
| | temperature=0.7, |
| | max_image_size=1024): |
| | """ |
| | Generate captions for an image using OpenRouter models |
| | |
| | Args: |
| | image_path (str): Path to the image file |
| | model (str): Model to use (e.g., 'anthropic/claude-3-7-sonnet', 'openai/gpt-4o') |
| | prompt_dev (str): System prompt or developer prompt |
| | prompt (str): Text prompt to guide caption generation |
| | detail (str): Level of detail for image analysis ('low', 'high', etc.) - only applies to OpenAI models |
| | temperature (float): Sampling temperature for generation |
| | max_image_size (int): Maximum dimension of the image before encoding. Set to None to disable resizing. |
| | |
| | Returns: |
| | str: Generated caption |
| | """ |
| | |
| | base64_image = encode_image(image_path, max_size=max_image_size) |
| | |
| | |
| | messages = [] |
| | |
| | |
| | if prompt_dev: |
| | messages.append({ |
| | "role": "system", |
| | "content": prompt_dev |
| | }) |
| | |
| | |
| | content = [ |
| | { |
| | "type": "text", |
| | "text": prompt, |
| | } |
| | ] |
| | |
| | content.append({ |
| | "type": "image_url", |
| | "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, |
| | }) |
| | |
| | messages.append({ |
| | "role": "user", |
| | "content": content, |
| | "provider": { |
| | "data_collection": "deny" |
| | } |
| | }) |
| | |
| | response = self.client.chat.completions.create( |
| | model=model, |
| | messages=messages, |
| | temperature=temperature |
| | ) |
| | |
| | return response.choices[0].message.content |
| | |
| | def generate_text_response(self, text_prompt, |
| | model="anthropic/claude-3-5-haiku", |
| | prompt_dev="", |
| | temperature=0.7): |
| | """ |
| | Generate responses based on text input using OpenRouter models |
| | |
| | Args: |
| | text_prompt (str): The text to analyze or respond to |
| | model (str): Model to use (e.g., 'anthropic/claude-3-5-haiku', 'openai/gpt-4o-mini', 'google/gemini-pro') |
| | prompt_dev (str): System prompt or developer prompt |
| | temperature (float): Sampling temperature for generation |
| | |
| | Returns: |
| | str: Generated response |
| | """ |
| | |
| | messages = [] |
| | |
| | |
| | if prompt_dev: |
| | messages.append({ |
| | "role": "system", |
| | "content": prompt_dev |
| | }) |
| | |
| | |
| | messages.append({ |
| | "role": "user", |
| | "content": text_prompt |
| | }) |
| | |
| | response = self.client.chat.completions.create( |
| | model=model, |
| | messages=messages, |
| | temperature=temperature, |
| | ) |
| | |
| | return response.choices[0].message.content |
| |
|
| | def classify_objs(self, image_path, |
| | categories=["Painting/sketch", "Statue/Bust", "Clothing", "Porcelain/Ceramic tableware", "Text based Document", "Other"], |
| | model="openai/gpt-4o-mini", |
| | detail="low", |
| | max_image_size=512): |
| | """ |
| | Classify objects in an image into predefined categories |
| | |
| | Args: |
| | image_path (str): Path to the image file |
| | categories (list): List of categories for classification |
| | model (str): Model to use for classification |
| | detail (str): Level of detail for image analysis ('low', 'high') - only applies to OpenAI models |
| | max_image_size (int): Maximum dimension for the image. Can be smaller for classification tasks. |
| | |
| | Returns: |
| | str: Classification result |
| | """ |
| | prompt = f"This is an image of a museum object. Classify it into one of these categories: {categories}. Only classify it if you are confident it belongs in that category and the category represents the main portion of the image, otherwise return 'Other'. Respond with only the category name." |
| | return self.generate_caption(image_path, model=model, prompt=prompt, detail=detail, max_image_size=max_image_size) |
| |
|
| | def estimate_cost(self, model, tokens_in=1000, tokens_out=200, image=False, detail="low"): |
| | """ |
| | Estimate the cost of using a specific model based on input/output tokens |
| | |
| | Args: |
| | model (str): Model identifier |
| | tokens_in (int): Number of input tokens |
| | tokens_out (int): Number of output tokens |
| | image (bool): Whether the request includes an image |
| | detail (str): Image detail level ('low', 'high') |
| | |
| | Returns: |
| | dict: Cost estimate information |
| | """ |
| | |
| | |
| | |
| | |
| | |
| | |
| | pricing = { |
| | "anthropic/claude-3-7-sonnet": {"input": 15.0, "output": 75.0}, |
| | "anthropic/claude-3-5-haiku": {"input": 1.0, "output": 5.0}, |
| | "openai/gpt-4o": {"input": 10.0, "output": 30.0}, |
| | "openai/gpt-4o-mini": {"input": 0.2, "output": 0.6}, |
| | "google/gemini-pro": {"input": 0.5, "output": 1.5}, |
| | } |
| | |
| | |
| | model_pricing = pricing.get(model, {"input": 5.0, "output": 15.0}) |
| | |
| | |
| | image_tokens = 0 |
| | if image: |
| | if detail == "low": |
| | image_tokens = 1200 |
| | else: |
| | image_tokens = 4000 |
| | |
| | |
| | input_cost = (tokens_in + image_tokens) * model_pricing["input"] / 1000000 |
| | output_cost = tokens_out * model_pricing["output"] / 1000000 |
| | total_cost = input_cost + output_cost |
| | |
| | return { |
| | "model": model, |
| | "input_tokens": tokens_in + image_tokens, |
| | "output_tokens": tokens_out, |
| | "input_cost": input_cost, |
| | "output_cost": output_cost, |
| | "total_cost": total_cost |
| | } |
| |
|
| |
|