Chroma

Runtime error

App Files Files Community

Chroma / comfy_api_nodes /nodes_openai.py

gokaygokay

Upload 1005 files

77f10a3 verified 11 months ago

raw

history blame contribute delete

16.5 kB

	import io
	from inspect import cleandoc
	import numpy as np
	import torch
	from PIL import Image

	from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict


	from comfy_api_nodes.apis import (
	OpenAIImageGenerationRequest,
	OpenAIImageEditRequest,
	OpenAIImageGenerationResponse,
	)

	from comfy_api_nodes.apis.client import (
	ApiEndpoint,
	HttpMethod,
	SynchronousOperation,
	)

	from comfy_api_nodes.apinode_utils import (
	downscale_image_tensor,
	validate_and_cast_response,
	validate_string,
	)

	class OpenAIDalle2(ComfyNodeABC):
	"""
	Generates images synchronously via OpenAI's DALL·E 2 endpoint.
	"""

	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(cls) -> InputTypeDict:
	return {
	"required": {
	"prompt": (
	IO.STRING,
	{
	"multiline": True,
	"default": "",
	"tooltip": "Text prompt for DALL·E",
	},
	),
	},
	"optional": {
	"seed": (
	IO.INT,
	{
	"default": 0,
	"min": 0,
	"max": 2**31 - 1,
	"step": 1,
	"display": "number",
	"control_after_generate": True,
	"tooltip": "not implemented yet in backend",
	},
	),
	"size": (
	IO.COMBO,
	{
	"options": ["256x256", "512x512", "1024x1024"],
	"default": "1024x1024",
	"tooltip": "Image size",
	},
	),
	"n": (
	IO.INT,
	{
	"default": 1,
	"min": 1,
	"max": 8,
	"step": 1,
	"display": "number",
	"tooltip": "How many images to generate",
	},
	),
	"image": (
	IO.IMAGE,
	{
	"default": None,
	"tooltip": "Optional reference image for image editing.",
	},
	),
	"mask": (
	IO.MASK,
	{
	"default": None,
	"tooltip": "Optional mask for inpainting (white areas will be replaced)",
	},
	),
	},
	"hidden": {
	"auth_token": "AUTH_TOKEN_COMFY_ORG",
	"comfy_api_key": "API_KEY_COMFY_ORG",
	"unique_id": "UNIQUE_ID",
	},
	}

	RETURN_TYPES = (IO.IMAGE,)
	FUNCTION = "api_call"
	CATEGORY = "api node/image/OpenAI"
	DESCRIPTION = cleandoc(__doc__ or "")
	API_NODE = True

	def api_call(
	self,
	prompt,
	seed=0,
	image=None,
	mask=None,
	n=1,
	size="1024x1024",
	unique_id=None,
	**kwargs
	):
	validate_string(prompt, strip_whitespace=False)
	model = "dall-e-2"
	path = "/proxy/openai/images/generations"
	content_type = "application/json"
	request_class = OpenAIImageGenerationRequest
	img_binary = None

	if image is not None and mask is not None:
	path = "/proxy/openai/images/edits"
	content_type = "multipart/form-data"
	request_class = OpenAIImageEditRequest

	input_tensor = image.squeeze().cpu()
	height, width, channels = input_tensor.shape
	rgba_tensor = torch.ones(height, width, 4, device="cpu")
	rgba_tensor[:, :, :channels] = input_tensor

	if mask.shape[1:] != image.shape[1:-1]:
	raise Exception("Mask and Image must be the same size")
	rgba_tensor[:, :, 3] = 1 - mask.squeeze().cpu()

	rgba_tensor = downscale_image_tensor(rgba_tensor.unsqueeze(0)).squeeze()

	image_np = (rgba_tensor.numpy() * 255).astype(np.uint8)
	img = Image.fromarray(image_np)
	img_byte_arr = io.BytesIO()
	img.save(img_byte_arr, format="PNG")
	img_byte_arr.seek(0)
	img_binary = img_byte_arr # .getvalue()
	img_binary.name = "image.png"
	elif image is not None or mask is not None:
	raise Exception("Dall-E 2 image editing requires an image AND a mask")

	# Build the operation
	operation = SynchronousOperation(
	endpoint=ApiEndpoint(
	path=path,
	method=HttpMethod.POST,
	request_model=request_class,
	response_model=OpenAIImageGenerationResponse,
	),
	request=request_class(
	model=model,
	prompt=prompt,
	n=n,
	size=size,
	seed=seed,
	),
	files=(
	{
	"image": img_binary,
	}
	if img_binary
	else None
	),
	content_type=content_type,
	auth_kwargs=kwargs,
	)

	response = operation.execute()

	img_tensor = validate_and_cast_response(response, node_id=unique_id)
	return (img_tensor,)


	class OpenAIDalle3(ComfyNodeABC):
	"""
	Generates images synchronously via OpenAI's DALL·E 3 endpoint.
	"""

	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(cls) -> InputTypeDict:
	return {
	"required": {
	"prompt": (
	IO.STRING,
	{
	"multiline": True,
	"default": "",
	"tooltip": "Text prompt for DALL·E",
	},
	),
	},
	"optional": {
	"seed": (
	IO.INT,
	{
	"default": 0,
	"min": 0,
	"max": 2**31 - 1,
	"step": 1,
	"display": "number",
	"control_after_generate": True,
	"tooltip": "not implemented yet in backend",
	},
	),
	"quality": (
	IO.COMBO,
	{
	"options": ["standard", "hd"],
	"default": "standard",
	"tooltip": "Image quality",
	},
	),
	"style": (
	IO.COMBO,
	{
	"options": ["natural", "vivid"],
	"default": "natural",
	"tooltip": "Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images.",
	},
	),
	"size": (
	IO.COMBO,
	{
	"options": ["1024x1024", "1024x1792", "1792x1024"],
	"default": "1024x1024",
	"tooltip": "Image size",
	},
	),
	},
	"hidden": {
	"auth_token": "AUTH_TOKEN_COMFY_ORG",
	"comfy_api_key": "API_KEY_COMFY_ORG",
	"unique_id": "UNIQUE_ID",
	},
	}

	RETURN_TYPES = (IO.IMAGE,)
	FUNCTION = "api_call"
	CATEGORY = "api node/image/OpenAI"
	DESCRIPTION = cleandoc(__doc__ or "")
	API_NODE = True

	def api_call(
	self,
	prompt,
	seed=0,
	style="natural",
	quality="standard",
	size="1024x1024",
	unique_id=None,
	**kwargs
	):
	validate_string(prompt, strip_whitespace=False)
	model = "dall-e-3"

	# build the operation
	operation = SynchronousOperation(
	endpoint=ApiEndpoint(
	path="/proxy/openai/images/generations",
	method=HttpMethod.POST,
	request_model=OpenAIImageGenerationRequest,
	response_model=OpenAIImageGenerationResponse,
	),
	request=OpenAIImageGenerationRequest(
	model=model,
	prompt=prompt,
	quality=quality,
	size=size,
	style=style,
	seed=seed,
	),
	auth_kwargs=kwargs,
	)

	response = operation.execute()

	img_tensor = validate_and_cast_response(response, node_id=unique_id)
	return (img_tensor,)


	class OpenAIGPTImage1(ComfyNodeABC):
	"""
	Generates images synchronously via OpenAI's GPT Image 1 endpoint.
	"""

	def __init__(self):
	pass

	@classmethod
	def INPUT_TYPES(cls) -> InputTypeDict:
	return {
	"required": {
	"prompt": (
	IO.STRING,
	{
	"multiline": True,
	"default": "",
	"tooltip": "Text prompt for GPT Image 1",
	},
	),
	},
	"optional": {
	"seed": (
	IO.INT,
	{
	"default": 0,
	"min": 0,
	"max": 2**31 - 1,
	"step": 1,
	"display": "number",
	"control_after_generate": True,
	"tooltip": "not implemented yet in backend",
	},
	),
	"quality": (
	IO.COMBO,
	{
	"options": ["low", "medium", "high"],
	"default": "low",
	"tooltip": "Image quality, affects cost and generation time.",
	},
	),
	"background": (
	IO.COMBO,
	{
	"options": ["opaque", "transparent"],
	"default": "opaque",
	"tooltip": "Return image with or without background",
	},
	),
	"size": (
	IO.COMBO,
	{
	"options": ["auto", "1024x1024", "1024x1536", "1536x1024"],
	"default": "auto",
	"tooltip": "Image size",
	},
	),
	"n": (
	IO.INT,
	{
	"default": 1,
	"min": 1,
	"max": 8,
	"step": 1,
	"display": "number",
	"tooltip": "How many images to generate",
	},
	),
	"image": (
	IO.IMAGE,
	{
	"default": None,
	"tooltip": "Optional reference image for image editing.",
	},
	),
	"mask": (
	IO.MASK,
	{
	"default": None,
	"tooltip": "Optional mask for inpainting (white areas will be replaced)",
	},
	),
	},
	"hidden": {
	"auth_token": "AUTH_TOKEN_COMFY_ORG",
	"comfy_api_key": "API_KEY_COMFY_ORG",
	"unique_id": "UNIQUE_ID",
	},
	}

	RETURN_TYPES = (IO.IMAGE,)
	FUNCTION = "api_call"
	CATEGORY = "api node/image/OpenAI"
	DESCRIPTION = cleandoc(__doc__ or "")
	API_NODE = True

	def api_call(
	self,
	prompt,
	seed=0,
	quality="low",
	background="opaque",
	image=None,
	mask=None,
	n=1,
	size="1024x1024",
	unique_id=None,
	**kwargs
	):
	validate_string(prompt, strip_whitespace=False)
	model = "gpt-image-1"
	path = "/proxy/openai/images/generations"
	content_type="application/json"
	request_class = OpenAIImageGenerationRequest
	img_binaries = []
	mask_binary = None
	files = []

	if image is not None:
	path = "/proxy/openai/images/edits"
	request_class = OpenAIImageEditRequest
	content_type ="multipart/form-data"

	batch_size = image.shape[0]

	for i in range(batch_size):
	single_image = image[i : i + 1]
	scaled_image = downscale_image_tensor(single_image).squeeze()

	image_np = (scaled_image.numpy() * 255).astype(np.uint8)
	img = Image.fromarray(image_np)
	img_byte_arr = io.BytesIO()
	img.save(img_byte_arr, format="PNG")
	img_byte_arr.seek(0)
	img_binary = img_byte_arr
	img_binary.name = f"image_{i}.png"

	img_binaries.append(img_binary)
	if batch_size == 1:
	files.append(("image", img_binary))
	else:
	files.append(("image[]", img_binary))

	if mask is not None:
	if image is None:
	raise Exception("Cannot use a mask without an input image")
	if image.shape[0] != 1:
	raise Exception("Cannot use a mask with multiple image")
	if mask.shape[1:] != image.shape[1:-1]:
	raise Exception("Mask and Image must be the same size")
	batch, height, width = mask.shape
	rgba_mask = torch.zeros(height, width, 4, device="cpu")
	rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()

	scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze()

	mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
	mask_img = Image.fromarray(mask_np)
	mask_img_byte_arr = io.BytesIO()
	mask_img.save(mask_img_byte_arr, format="PNG")
	mask_img_byte_arr.seek(0)
	mask_binary = mask_img_byte_arr
	mask_binary.name = "mask.png"
	files.append(("mask", mask_binary))

	# Build the operation
	operation = SynchronousOperation(
	endpoint=ApiEndpoint(
	path=path,
	method=HttpMethod.POST,
	request_model=request_class,
	response_model=OpenAIImageGenerationResponse,
	),
	request=request_class(
	model=model,
	prompt=prompt,
	quality=quality,
	background=background,
	n=n,
	seed=seed,
	size=size,
	),
	files=files if files else None,
	content_type=content_type,
	auth_kwargs=kwargs,
	)

	response = operation.execute()

	img_tensor = validate_and_cast_response(response, node_id=unique_id)
	return (img_tensor,)


	# A dictionary that contains all nodes you want to export with their names
	# NOTE: names should be globally unique
	NODE_CLASS_MAPPINGS = {
	"OpenAIDalle2": OpenAIDalle2,
	"OpenAIDalle3": OpenAIDalle3,
	"OpenAIGPTImage1": OpenAIGPTImage1,
	}

	# A dictionary that contains the friendly/humanly readable titles for the nodes
	NODE_DISPLAY_NAME_MAPPINGS = {
	"OpenAIDalle2": "OpenAI DALL·E 2",
	"OpenAIDalle3": "OpenAI DALL·E 3",
	"OpenAIGPTImage1": "OpenAI GPT Image 1",
	}