Virtual-Try-on / tryon /api /openAI /image_adapter.py
sudais14446
initial commit
83039b5
"""
GPT Image (OpenAI Image Generation) API Adapter
Adapter for OpenAI's GPT Image models (GPT-Image-1 and GPT-Image-1.5).
These models support high-quality image generation and image editing using
text prompts and reference images. This adapter provides a clean, unified
interface for the following workflows:
- Text-to-Image: Generate images from text descriptions
- Image + Text-to-Image (Editing): Modify existing images using prompts
- Multi-Image Editing / Composition: Use multiple input images for edits
- Mask-based editing: Apply changes to specific regions of an image
- Output control: Size, quality, background, and multiple image generation
The adapter returns raw image bytes, allowing callers to save, transform,
or convert outputs (e.g., to PIL Images) as needed.
Reference:
https://platform.openai.com/docs/guides/image-generation
Models:
- GPT-Image-1 (gpt-image-1): High-quality image generation and editing
- GPT-Image-1.5 (gpt-image-1.5): Enhanced quality, better prompt understanding, improved consistency (default)
Examples:
Text-to-image with latest model:
>>> from tryon.api.openAI.image_adapter import GPTImageAdapter
>>> adapter = GPTImageAdapter() # Uses gpt-image-1.5 by default
>>> images = adapter.generate_text_to_image(
... prompt="A cinematic portrait of a person wearing a futuristic jacket",
... size="1024x1024",
... quality="high"
... )
>>> with open("result.png", "wb") as f:
... f.write(images[0])
Using GPT-Image-1 specifically:
>>> adapter = GPTImageAdapter(model_version="gpt-image-1")
>>> images = adapter.generate_text_to_image(
... prompt="A fashion model in elegant attire",
... size="1024x1024"
... )
Image editing:
>>> adapter = GPTImageAdapter()
>>> images = adapter.generate_image_edit(
... images="person.jpg",
... prompt="Change the jacket color to black leather"
... )
>>> with open("edited.png", "wb") as f:
... f.write(images[0])
Mask-based editing:
>>> images = adapter.generate_image_edit(
... images="scene.png",
... mask="mask.png",
... prompt="Replace the masked area with a swimming pool"
... )
Multi-image composition:
>>> images = adapter.generate_image_edit(
... images=["shirt.png", "logo.png"],
... prompt="Add the logo to the shirt fabric naturally"
... )
"""
import base64
import io
import os
from typing import Optional, Union, List
from PIL import Image
try:
from openai import OpenAI
OPENAI_API_KEY = True
except ImportError:
OPENAI_API_KEY = False
OpenAI = None
VALID_SIZES = {"1024x1024", "1536x1024", "1024x1536", "auto"}
VALID_QUALITY = {"low", "high", "medium", "auto"}
INPUT_FIDELITY = {"low", "high"}
VALID_MODELS = {"gpt-image-1", "gpt-image-1.5"}
class GPTImageAdapter:
"""
Adapter for OpenAI GPT Image API (supports both GPT-Image-1 and GPT-Image-1.5).
Args:
api_key (str, optional): OpenAI API key. If not provided, reads from OPENAI_API_KEY environment variable.
model_version (str, optional): Model version to use. Options: "gpt-image-1", "gpt-image-1.5".
Defaults to "gpt-image-1.5" (latest and recommended).
Examples:
>>> # Use latest model (GPT-Image-1.5)
>>> adapter = GPTImageAdapter()
>>> # Use specific model version
>>> adapter = GPTImageAdapter(model_version="gpt-image-1")
>>> # With explicit API key
>>> adapter = GPTImageAdapter(api_key="sk-...", model_version="gpt-image-1.5")
"""
def __init__(self, api_key: Optional[str] = None, model_version: str = "gpt-image-1.5"):
if not OPENAI_API_KEY:
raise ImportError(
"OpenAI SDK is not available. " \
"Please install it with 'pip install openai'."
)
if model_version not in VALID_MODELS:
raise ValueError(
f"Invalid model_version: {model_version}. "
f"Supported models: {VALID_MODELS}"
)
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
if not self.api_key:
raise ValueError("OpenAI API key must be provided either as a parameter or through the OPENAI_API_KEY environment variable.")
self.model_version = model_version
self.client = OpenAI(api_key=self.api_key)
def prepare_image(self, image: Union[str, io.BytesIO, Image.Image]):
if isinstance(image, str):
return open(image, "rb")
if isinstance(image, Image.Image):
buffer = io.BytesIO()
image.save(buffer, format="PNG")
buffer.seek(0)
buffer.name = "image.png"
return buffer
if isinstance(image, io.BytesIO):
image.seek(0)
image.name = "image.png"
return image
raise TypeError(f"Unsupported image type: {type(image)}")
def generate_text_to_image(
self,
prompt: str,
size: str = "auto",
quality: str = "auto",
background: str = "auto",
n: int = 1,
) -> List[bytes]:
"""
Generate images from a text prompt using OpenAI's GPT Image 1 model.
This method performs text-to-image generation and returns the generated
images as raw byte data. The caller is responsible for saving or converting
the returned bytes (e.g., into PIL Images).
Args:
prompt (str):
Text description used to generate the image(s).
Must be a non-empty string.
size (str, optional):
Output image resolution.
Allowed values: {"1024x1024", "1536x1024", "1024x1536", "auto"}.
Defaults to "auto".
quality (str, optional):
Image generation quality.
Allowed values: {"low", "medium", "high", "auto"}.
Defaults to "auto".
background (str, optional):
Background mode for the generated image.
Common values include "auto" and "transparent".
Defaults to "auto".
n (int, optional):
Number of images to generate.
Must be >= 1.
Defaults to 1.
Returns:
List[bytes]:
A list of generated images as raw bytes.
Each element represents a single image.
Raises:
ValueError:
- If the prompt is empty
- If `n` is less than 1
- If `size` or `quality` is not a supported value
Example:
>>> adapter = GPTImageAdapter()
>>> images = adapter.generate_text_to_image(
... prompt="A cinematic portrait of a person wearing a leather jacket",
... size="1024x1024",
... quality="high",
... n=2
... )
>>> with open("image.png", "wb") as f:
... f.write(images[0])
"""
if not prompt:
raise ValueError("Prompt is required for Text to Image generation.")
if n < 1:
raise ValueError("n must be >= 1.")
if size not in VALID_SIZES:
raise ValueError(f"Invalid Size: {size}, Available Options are: {VALID_SIZES}")
if quality not in VALID_QUALITY:
raise ValueError(f"Invalid quality: {quality}, Available Options are: {VALID_QUALITY}")
response = self.client.images.generate(
model=self.model_version,
prompt=prompt,
size=size,
quality=quality,
background=background,
n=n,
)
output_images = []
for item in response.data:
image_bytes = base64.b64decode(item.b64_json)
output_images.append(image_bytes)
return output_images
def generate_image_edit(
self,
images: List[Union[str, io.BytesIO, Image.Image]],
prompt: Optional[str] = None,
mask: Optional[Union[str, io.BytesIO, Image.Image]] = None,
size: str = "auto",
quality: str = "auto",
background: str = "auto",
input_fidelity: str = "low",
n: int = 1,
) -> List[bytes]:
"""
Edit or transform existing images using OpenAI's GPT Image 1 model.
This method performs image-to-image generation by applying a text prompt
to one or more input images. It supports multi-image editing, optional
mask-based edits, and controlled output parameters such as size and quality.
Input images may be provided as file paths, file-like objects, PIL Images,
or base64-encoded strings. The method always returns raw image bytes.
Args:
images (Union[str, io.BytesIO, PIL.Image.Image, List[...]]):
One or more input images to be edited.
A single image may be passed directly or as a list.
prompt (str, optional):
Text instruction describing how the image(s) should be edited.
If omitted, the model performs a minimal transformation.
mask (Union[str, io.BytesIO, PIL.Image.Image], optional):
Optional mask image defining the region to be edited.
Masked areas will be modified while unmasked areas remain unchanged.
size (str, optional):
Output image resolution.
Allowed values: {"1024x1024", "1536x1024", "1024x1536", "auto"}.
Defaults to "auto".
quality (str, optional):
Image generation quality.
Allowed values: {"low", "medium", "high", "auto"}.
Defaults to "auto".
background (str, optional):
Background mode for the generated image.
Common values include "auto" and "transparent".
Defaults to "auto".
input_fidelity (str, optional):
Controls how strictly the model preserves the original image(s).
Allowed values are defined in INPUT_FIDELITY.
Defaults to "low".
n (int, optional):
Number of edited images to generate.
Must be >= 1.
Defaults to 1.
Returns:
List[bytes]:
A list of edited images as raw byte data.
Raises:
ValueError:
- If no images are provided
- If `n` is less than 1
- If `size`, `quality`, or `input_fidelity` is invalid
Example:
>>> adapter = GPTImageAdapter()
>>> images = adapter.generate_image_edit(
... images="person.jpg",
... prompt="Change the jacket color to black leather",
... quality="high"
... )
>>> with open("edited.png", "wb") as f:
... f.write(images[0])
>>> images = adapter.generate_image_edit(
... images=["scene.png"],
... mask="mask.png",
... prompt="Replace the masked area with a swimming pool"
... )
"""
# If a single image is passed
if not isinstance(images, list):
images = [images]
# Validation Check
if len(images) == 0:
raise ValueError("At least one image is required for Image Edit.")
if n < 1:
raise ValueError("n must be >= 1.")
if size not in VALID_SIZES:
raise ValueError(f"Invalid Size: {size}, Available Options are: {VALID_SIZES}")
if quality not in VALID_QUALITY:
raise ValueError(f"Invalid quality: {quality}, Available Options are: {VALID_QUALITY}")
if input_fidelity not in INPUT_FIDELITY:
raise ValueError(f"input_fidelity can only be {INPUT_FIDELITY}")
# Prepare base images
image_files = [self.prepare_image(img) for img in images]
# Build request dynamically
kwargs = {
"model": self.model_version,
"image": image_files,
"prompt": prompt,
"size": size,
"quality": quality,
"background": background,
"input_fidelity": input_fidelity,
"n": n,
}
# Attach mask only if it exists
if mask:
kwargs["mask"] = self.prepare_image(mask)
response = self.client.images.edit(**kwargs)
output_images = []
for item in response.data:
output_images.append(base64.b64decode(item.b64_json))
return output_images