Spaces:

SudaisKhan211
/

Virtual-Try-on

Sleeping

Virtual-Try-on / tryon /api /openAI /image_adapter.py

sudais14446

initial commit

83039b5 21 days ago

13.1 kB

	"""
	GPT Image (OpenAI Image Generation) API Adapter

	Adapter for OpenAI's GPT Image models (GPT-Image-1 and GPT-Image-1.5).

	These models support high-quality image generation and image editing using
	text prompts and reference images. This adapter provides a clean, unified
	interface for the following workflows:

	- Text-to-Image: Generate images from text descriptions
	- Image + Text-to-Image (Editing): Modify existing images using prompts
	- Multi-Image Editing / Composition: Use multiple input images for edits
	- Mask-based editing: Apply changes to specific regions of an image
	- Output control: Size, quality, background, and multiple image generation

	The adapter returns raw image bytes, allowing callers to save, transform,
	or convert outputs (e.g., to PIL Images) as needed.

	Reference:
	https://platform.openai.com/docs/guides/image-generation

	Models:
	- GPT-Image-1 (gpt-image-1): High-quality image generation and editing
	- GPT-Image-1.5 (gpt-image-1.5): Enhanced quality, better prompt understanding, improved consistency (default)

	Examples:
	Text-to-image with latest model:
	>>> from tryon.api.openAI.image_adapter import GPTImageAdapter
	>>> adapter = GPTImageAdapter() # Uses gpt-image-1.5 by default
	>>> images = adapter.generate_text_to_image(
	... prompt="A cinematic portrait of a person wearing a futuristic jacket",
	... size="1024x1024",
	... quality="high"
	... )
	>>> with open("result.png", "wb") as f:
	... f.write(images[0])

	Using GPT-Image-1 specifically:
	>>> adapter = GPTImageAdapter(model_version="gpt-image-1")
	>>> images = adapter.generate_text_to_image(
	... prompt="A fashion model in elegant attire",
	... size="1024x1024"
	... )

	Image editing:
	>>> adapter = GPTImageAdapter()
	>>> images = adapter.generate_image_edit(
	... images="person.jpg",
	... prompt="Change the jacket color to black leather"
	... )
	>>> with open("edited.png", "wb") as f:
	... f.write(images[0])

	Mask-based editing:
	>>> images = adapter.generate_image_edit(
	... images="scene.png",
	... mask="mask.png",
	... prompt="Replace the masked area with a swimming pool"
	... )

	Multi-image composition:
	>>> images = adapter.generate_image_edit(
	... images=["shirt.png", "logo.png"],
	... prompt="Add the logo to the shirt fabric naturally"
	... )
	"""

	import base64
	import io
	import os
	from typing import Optional, Union, List
	from PIL import Image

	try:
	from openai import OpenAI
	OPENAI_API_KEY = True
	except ImportError:
	OPENAI_API_KEY = False
	OpenAI = None

	VALID_SIZES = {"1024x1024", "1536x1024", "1024x1536", "auto"}
	VALID_QUALITY = {"low", "high", "medium", "auto"}
	INPUT_FIDELITY = {"low", "high"}
	VALID_MODELS = {"gpt-image-1", "gpt-image-1.5"}


	class GPTImageAdapter:
	"""
	Adapter for OpenAI GPT Image API (supports both GPT-Image-1 and GPT-Image-1.5).

	Args:
	api_key (str, optional): OpenAI API key. If not provided, reads from OPENAI_API_KEY environment variable.
	model_version (str, optional): Model version to use. Options: "gpt-image-1", "gpt-image-1.5".
	Defaults to "gpt-image-1.5" (latest and recommended).

	Examples:
	>>> # Use latest model (GPT-Image-1.5)
	>>> adapter = GPTImageAdapter()

	>>> # Use specific model version
	>>> adapter = GPTImageAdapter(model_version="gpt-image-1")

	>>> # With explicit API key
	>>> adapter = GPTImageAdapter(api_key="sk-...", model_version="gpt-image-1.5")
	"""

	def __init__(self, api_key: Optional[str] = None, model_version: str = "gpt-image-1.5"):

	if not OPENAI_API_KEY:
	raise ImportError(
	"OpenAI SDK is not available. " \
	"Please install it with 'pip install openai'."
	)

	if model_version not in VALID_MODELS:
	raise ValueError(
	f"Invalid model_version: {model_version}. "
	f"Supported models: {VALID_MODELS}"
	)

	self.api_key = api_key or os.getenv("OPENAI_API_KEY")
	if not self.api_key:
	raise ValueError("OpenAI API key must be provided either as a parameter or through the OPENAI_API_KEY environment variable.")

	self.model_version = model_version
	self.client = OpenAI(api_key=self.api_key)


	def prepare_image(self, image: Union[str, io.BytesIO, Image.Image]):

	if isinstance(image, str):
	return open(image, "rb")

	if isinstance(image, Image.Image):
	buffer = io.BytesIO()
	image.save(buffer, format="PNG")
	buffer.seek(0)
	buffer.name = "image.png"
	return buffer

	if isinstance(image, io.BytesIO):
	image.seek(0)
	image.name = "image.png"
	return image

	raise TypeError(f"Unsupported image type: {type(image)}")


	def generate_text_to_image(
	self,
	prompt: str,
	size: str = "auto",
	quality: str = "auto",
	background: str = "auto",
	n: int = 1,
	) -> List[bytes]:

	"""
	Generate images from a text prompt using OpenAI's GPT Image 1 model.

	This method performs text-to-image generation and returns the generated
	images as raw byte data. The caller is responsible for saving or converting
	the returned bytes (e.g., into PIL Images).

	Args:
	prompt (str):
	Text description used to generate the image(s).
	Must be a non-empty string.

	size (str, optional):
	Output image resolution.
	Allowed values: {"1024x1024", "1536x1024", "1024x1536", "auto"}.
	Defaults to "auto".

	quality (str, optional):
	Image generation quality.
	Allowed values: {"low", "medium", "high", "auto"}.
	Defaults to "auto".

	background (str, optional):
	Background mode for the generated image.
	Common values include "auto" and "transparent".
	Defaults to "auto".

	n (int, optional):
	Number of images to generate.
	Must be >= 1.
	Defaults to 1.

	Returns:
	List[bytes]:
	A list of generated images as raw bytes.
	Each element represents a single image.

	Raises:
	ValueError:
	- If the prompt is empty
	- If `n` is less than 1
	- If `size` or `quality` is not a supported value

	Example:
	>>> adapter = GPTImageAdapter()
	>>> images = adapter.generate_text_to_image(
	... prompt="A cinematic portrait of a person wearing a leather jacket",
	... size="1024x1024",
	... quality="high",
	... n=2
	... )
	>>> with open("image.png", "wb") as f:
	... f.write(images[0])
	"""

	if not prompt:
	raise ValueError("Prompt is required for Text to Image generation.")

	if n < 1:
	raise ValueError("n must be >= 1.")

	if size not in VALID_SIZES:
	raise ValueError(f"Invalid Size: {size}, Available Options are: {VALID_SIZES}")

	if quality not in VALID_QUALITY:
	raise ValueError(f"Invalid quality: {quality}, Available Options are: {VALID_QUALITY}")

	response = self.client.images.generate(
	model=self.model_version,
	prompt=prompt,
	size=size,
	quality=quality,
	background=background,
	n=n,
	)

	output_images = []

	for item in response.data:
	image_bytes = base64.b64decode(item.b64_json)
	output_images.append(image_bytes)

	return output_images


	def generate_image_edit(
	self,
	images: List[Union[str, io.BytesIO, Image.Image]],
	prompt: Optional[str] = None,
	mask: Optional[Union[str, io.BytesIO, Image.Image]] = None,
	size: str = "auto",
	quality: str = "auto",
	background: str = "auto",
	input_fidelity: str = "low",
	n: int = 1,
	) -> List[bytes]:

	"""
	Edit or transform existing images using OpenAI's GPT Image 1 model.

	This method performs image-to-image generation by applying a text prompt
	to one or more input images. It supports multi-image editing, optional
	mask-based edits, and controlled output parameters such as size and quality.

	Input images may be provided as file paths, file-like objects, PIL Images,
	or base64-encoded strings. The method always returns raw image bytes.

	Args:
	images (Union[str, io.BytesIO, PIL.Image.Image, List[...]]):
	One or more input images to be edited.
	A single image may be passed directly or as a list.

	prompt (str, optional):
	Text instruction describing how the image(s) should be edited.
	If omitted, the model performs a minimal transformation.

	mask (Union[str, io.BytesIO, PIL.Image.Image], optional):
	Optional mask image defining the region to be edited.
	Masked areas will be modified while unmasked areas remain unchanged.

	size (str, optional):
	Output image resolution.
	Allowed values: {"1024x1024", "1536x1024", "1024x1536", "auto"}.
	Defaults to "auto".

	quality (str, optional):
	Image generation quality.
	Allowed values: {"low", "medium", "high", "auto"}.
	Defaults to "auto".

	background (str, optional):
	Background mode for the generated image.
	Common values include "auto" and "transparent".
	Defaults to "auto".

	input_fidelity (str, optional):
	Controls how strictly the model preserves the original image(s).
	Allowed values are defined in INPUT_FIDELITY.
	Defaults to "low".

	n (int, optional):
	Number of edited images to generate.
	Must be >= 1.
	Defaults to 1.

	Returns:
	List[bytes]:
	A list of edited images as raw byte data.

	Raises:
	ValueError:
	- If no images are provided
	- If `n` is less than 1
	- If `size`, `quality`, or `input_fidelity` is invalid

	Example:
	>>> adapter = GPTImageAdapter()
	>>> images = adapter.generate_image_edit(
	... images="person.jpg",
	... prompt="Change the jacket color to black leather",
	... quality="high"
	... )
	>>> with open("edited.png", "wb") as f:
	... f.write(images[0])

	>>> images = adapter.generate_image_edit(
	... images=["scene.png"],
	... mask="mask.png",
	... prompt="Replace the masked area with a swimming pool"
	... )
	"""

	# If a single image is passed
	if not isinstance(images, list):
	images = [images]

	# Validation Check
	if len(images) == 0:
	raise ValueError("At least one image is required for Image Edit.")

	if n < 1:
	raise ValueError("n must be >= 1.")

	if size not in VALID_SIZES:
	raise ValueError(f"Invalid Size: {size}, Available Options are: {VALID_SIZES}")

	if quality not in VALID_QUALITY:
	raise ValueError(f"Invalid quality: {quality}, Available Options are: {VALID_QUALITY}")

	if input_fidelity not in INPUT_FIDELITY:
	raise ValueError(f"input_fidelity can only be {INPUT_FIDELITY}")


	# Prepare base images
	image_files = [self.prepare_image(img) for img in images]

	# Build request dynamically
	kwargs = {
	"model": self.model_version,
	"image": image_files,
	"prompt": prompt,
	"size": size,
	"quality": quality,
	"background": background,
	"input_fidelity": input_fidelity,
	"n": n,
	}

	# Attach mask only if it exists
	if mask:
	kwargs["mask"] = self.prepare_image(mask)

	response = self.client.images.edit(**kwargs)

	output_images = []

	for item in response.data:
	output_images.append(base64.b64decode(item.b64_json))

	return output_images