Spaces:
Sleeping
Sleeping
| """ | |
| GPT Image (OpenAI Image Generation) API Adapter | |
| Adapter for OpenAI's GPT Image models (GPT-Image-1 and GPT-Image-1.5). | |
| These models support high-quality image generation and image editing using | |
| text prompts and reference images. This adapter provides a clean, unified | |
| interface for the following workflows: | |
| - Text-to-Image: Generate images from text descriptions | |
| - Image + Text-to-Image (Editing): Modify existing images using prompts | |
| - Multi-Image Editing / Composition: Use multiple input images for edits | |
| - Mask-based editing: Apply changes to specific regions of an image | |
| - Output control: Size, quality, background, and multiple image generation | |
| The adapter returns raw image bytes, allowing callers to save, transform, | |
| or convert outputs (e.g., to PIL Images) as needed. | |
| Reference: | |
| https://platform.openai.com/docs/guides/image-generation | |
| Models: | |
| - GPT-Image-1 (gpt-image-1): High-quality image generation and editing | |
| - GPT-Image-1.5 (gpt-image-1.5): Enhanced quality, better prompt understanding, improved consistency (default) | |
| Examples: | |
| Text-to-image with latest model: | |
| >>> from tryon.api.openAI.image_adapter import GPTImageAdapter | |
| >>> adapter = GPTImageAdapter() # Uses gpt-image-1.5 by default | |
| >>> images = adapter.generate_text_to_image( | |
| ... prompt="A cinematic portrait of a person wearing a futuristic jacket", | |
| ... size="1024x1024", | |
| ... quality="high" | |
| ... ) | |
| >>> with open("result.png", "wb") as f: | |
| ... f.write(images[0]) | |
| Using GPT-Image-1 specifically: | |
| >>> adapter = GPTImageAdapter(model_version="gpt-image-1") | |
| >>> images = adapter.generate_text_to_image( | |
| ... prompt="A fashion model in elegant attire", | |
| ... size="1024x1024" | |
| ... ) | |
| Image editing: | |
| >>> adapter = GPTImageAdapter() | |
| >>> images = adapter.generate_image_edit( | |
| ... images="person.jpg", | |
| ... prompt="Change the jacket color to black leather" | |
| ... ) | |
| >>> with open("edited.png", "wb") as f: | |
| ... f.write(images[0]) | |
| Mask-based editing: | |
| >>> images = adapter.generate_image_edit( | |
| ... images="scene.png", | |
| ... mask="mask.png", | |
| ... prompt="Replace the masked area with a swimming pool" | |
| ... ) | |
| Multi-image composition: | |
| >>> images = adapter.generate_image_edit( | |
| ... images=["shirt.png", "logo.png"], | |
| ... prompt="Add the logo to the shirt fabric naturally" | |
| ... ) | |
| """ | |
| import base64 | |
| import io | |
| import os | |
| from typing import Optional, Union, List | |
| from PIL import Image | |
| try: | |
| from openai import OpenAI | |
| OPENAI_API_KEY = True | |
| except ImportError: | |
| OPENAI_API_KEY = False | |
| OpenAI = None | |
| VALID_SIZES = {"1024x1024", "1536x1024", "1024x1536", "auto"} | |
| VALID_QUALITY = {"low", "high", "medium", "auto"} | |
| INPUT_FIDELITY = {"low", "high"} | |
| VALID_MODELS = {"gpt-image-1", "gpt-image-1.5"} | |
| class GPTImageAdapter: | |
| """ | |
| Adapter for OpenAI GPT Image API (supports both GPT-Image-1 and GPT-Image-1.5). | |
| Args: | |
| api_key (str, optional): OpenAI API key. If not provided, reads from OPENAI_API_KEY environment variable. | |
| model_version (str, optional): Model version to use. Options: "gpt-image-1", "gpt-image-1.5". | |
| Defaults to "gpt-image-1.5" (latest and recommended). | |
| Examples: | |
| >>> # Use latest model (GPT-Image-1.5) | |
| >>> adapter = GPTImageAdapter() | |
| >>> # Use specific model version | |
| >>> adapter = GPTImageAdapter(model_version="gpt-image-1") | |
| >>> # With explicit API key | |
| >>> adapter = GPTImageAdapter(api_key="sk-...", model_version="gpt-image-1.5") | |
| """ | |
| def __init__(self, api_key: Optional[str] = None, model_version: str = "gpt-image-1.5"): | |
| if not OPENAI_API_KEY: | |
| raise ImportError( | |
| "OpenAI SDK is not available. " \ | |
| "Please install it with 'pip install openai'." | |
| ) | |
| if model_version not in VALID_MODELS: | |
| raise ValueError( | |
| f"Invalid model_version: {model_version}. " | |
| f"Supported models: {VALID_MODELS}" | |
| ) | |
| self.api_key = api_key or os.getenv("OPENAI_API_KEY") | |
| if not self.api_key: | |
| raise ValueError("OpenAI API key must be provided either as a parameter or through the OPENAI_API_KEY environment variable.") | |
| self.model_version = model_version | |
| self.client = OpenAI(api_key=self.api_key) | |
| def prepare_image(self, image: Union[str, io.BytesIO, Image.Image]): | |
| if isinstance(image, str): | |
| return open(image, "rb") | |
| if isinstance(image, Image.Image): | |
| buffer = io.BytesIO() | |
| image.save(buffer, format="PNG") | |
| buffer.seek(0) | |
| buffer.name = "image.png" | |
| return buffer | |
| if isinstance(image, io.BytesIO): | |
| image.seek(0) | |
| image.name = "image.png" | |
| return image | |
| raise TypeError(f"Unsupported image type: {type(image)}") | |
| def generate_text_to_image( | |
| self, | |
| prompt: str, | |
| size: str = "auto", | |
| quality: str = "auto", | |
| background: str = "auto", | |
| n: int = 1, | |
| ) -> List[bytes]: | |
| """ | |
| Generate images from a text prompt using OpenAI's GPT Image 1 model. | |
| This method performs text-to-image generation and returns the generated | |
| images as raw byte data. The caller is responsible for saving or converting | |
| the returned bytes (e.g., into PIL Images). | |
| Args: | |
| prompt (str): | |
| Text description used to generate the image(s). | |
| Must be a non-empty string. | |
| size (str, optional): | |
| Output image resolution. | |
| Allowed values: {"1024x1024", "1536x1024", "1024x1536", "auto"}. | |
| Defaults to "auto". | |
| quality (str, optional): | |
| Image generation quality. | |
| Allowed values: {"low", "medium", "high", "auto"}. | |
| Defaults to "auto". | |
| background (str, optional): | |
| Background mode for the generated image. | |
| Common values include "auto" and "transparent". | |
| Defaults to "auto". | |
| n (int, optional): | |
| Number of images to generate. | |
| Must be >= 1. | |
| Defaults to 1. | |
| Returns: | |
| List[bytes]: | |
| A list of generated images as raw bytes. | |
| Each element represents a single image. | |
| Raises: | |
| ValueError: | |
| - If the prompt is empty | |
| - If `n` is less than 1 | |
| - If `size` or `quality` is not a supported value | |
| Example: | |
| >>> adapter = GPTImageAdapter() | |
| >>> images = adapter.generate_text_to_image( | |
| ... prompt="A cinematic portrait of a person wearing a leather jacket", | |
| ... size="1024x1024", | |
| ... quality="high", | |
| ... n=2 | |
| ... ) | |
| >>> with open("image.png", "wb") as f: | |
| ... f.write(images[0]) | |
| """ | |
| if not prompt: | |
| raise ValueError("Prompt is required for Text to Image generation.") | |
| if n < 1: | |
| raise ValueError("n must be >= 1.") | |
| if size not in VALID_SIZES: | |
| raise ValueError(f"Invalid Size: {size}, Available Options are: {VALID_SIZES}") | |
| if quality not in VALID_QUALITY: | |
| raise ValueError(f"Invalid quality: {quality}, Available Options are: {VALID_QUALITY}") | |
| response = self.client.images.generate( | |
| model=self.model_version, | |
| prompt=prompt, | |
| size=size, | |
| quality=quality, | |
| background=background, | |
| n=n, | |
| ) | |
| output_images = [] | |
| for item in response.data: | |
| image_bytes = base64.b64decode(item.b64_json) | |
| output_images.append(image_bytes) | |
| return output_images | |
| def generate_image_edit( | |
| self, | |
| images: List[Union[str, io.BytesIO, Image.Image]], | |
| prompt: Optional[str] = None, | |
| mask: Optional[Union[str, io.BytesIO, Image.Image]] = None, | |
| size: str = "auto", | |
| quality: str = "auto", | |
| background: str = "auto", | |
| input_fidelity: str = "low", | |
| n: int = 1, | |
| ) -> List[bytes]: | |
| """ | |
| Edit or transform existing images using OpenAI's GPT Image 1 model. | |
| This method performs image-to-image generation by applying a text prompt | |
| to one or more input images. It supports multi-image editing, optional | |
| mask-based edits, and controlled output parameters such as size and quality. | |
| Input images may be provided as file paths, file-like objects, PIL Images, | |
| or base64-encoded strings. The method always returns raw image bytes. | |
| Args: | |
| images (Union[str, io.BytesIO, PIL.Image.Image, List[...]]): | |
| One or more input images to be edited. | |
| A single image may be passed directly or as a list. | |
| prompt (str, optional): | |
| Text instruction describing how the image(s) should be edited. | |
| If omitted, the model performs a minimal transformation. | |
| mask (Union[str, io.BytesIO, PIL.Image.Image], optional): | |
| Optional mask image defining the region to be edited. | |
| Masked areas will be modified while unmasked areas remain unchanged. | |
| size (str, optional): | |
| Output image resolution. | |
| Allowed values: {"1024x1024", "1536x1024", "1024x1536", "auto"}. | |
| Defaults to "auto". | |
| quality (str, optional): | |
| Image generation quality. | |
| Allowed values: {"low", "medium", "high", "auto"}. | |
| Defaults to "auto". | |
| background (str, optional): | |
| Background mode for the generated image. | |
| Common values include "auto" and "transparent". | |
| Defaults to "auto". | |
| input_fidelity (str, optional): | |
| Controls how strictly the model preserves the original image(s). | |
| Allowed values are defined in INPUT_FIDELITY. | |
| Defaults to "low". | |
| n (int, optional): | |
| Number of edited images to generate. | |
| Must be >= 1. | |
| Defaults to 1. | |
| Returns: | |
| List[bytes]: | |
| A list of edited images as raw byte data. | |
| Raises: | |
| ValueError: | |
| - If no images are provided | |
| - If `n` is less than 1 | |
| - If `size`, `quality`, or `input_fidelity` is invalid | |
| Example: | |
| >>> adapter = GPTImageAdapter() | |
| >>> images = adapter.generate_image_edit( | |
| ... images="person.jpg", | |
| ... prompt="Change the jacket color to black leather", | |
| ... quality="high" | |
| ... ) | |
| >>> with open("edited.png", "wb") as f: | |
| ... f.write(images[0]) | |
| >>> images = adapter.generate_image_edit( | |
| ... images=["scene.png"], | |
| ... mask="mask.png", | |
| ... prompt="Replace the masked area with a swimming pool" | |
| ... ) | |
| """ | |
| # If a single image is passed | |
| if not isinstance(images, list): | |
| images = [images] | |
| # Validation Check | |
| if len(images) == 0: | |
| raise ValueError("At least one image is required for Image Edit.") | |
| if n < 1: | |
| raise ValueError("n must be >= 1.") | |
| if size not in VALID_SIZES: | |
| raise ValueError(f"Invalid Size: {size}, Available Options are: {VALID_SIZES}") | |
| if quality not in VALID_QUALITY: | |
| raise ValueError(f"Invalid quality: {quality}, Available Options are: {VALID_QUALITY}") | |
| if input_fidelity not in INPUT_FIDELITY: | |
| raise ValueError(f"input_fidelity can only be {INPUT_FIDELITY}") | |
| # Prepare base images | |
| image_files = [self.prepare_image(img) for img in images] | |
| # Build request dynamically | |
| kwargs = { | |
| "model": self.model_version, | |
| "image": image_files, | |
| "prompt": prompt, | |
| "size": size, | |
| "quality": quality, | |
| "background": background, | |
| "input_fidelity": input_fidelity, | |
| "n": n, | |
| } | |
| # Attach mask only if it exists | |
| if mask: | |
| kwargs["mask"] = self.prepare_image(mask) | |
| response = self.client.images.edit(**kwargs) | |
| output_images = [] | |
| for item in response.data: | |
| output_images.append(base64.b64decode(item.b64_json)) | |
| return output_images |