Spaces:

JasonSmithSO
/

FooocusEnhanced

Configuration error

App Files Files Community

FooocusEnhanced / ComfyUI-Easy-Use /py /brushnet /__init__.py

JasonSmithSO

Upload 578 files

8866644 verified 6 months ago

raw

history blame contribute delete

35.3 kB

	#credit to nullquant for this module
	#from https://github.com/nullquant/ComfyUI-BrushNet

	import os
	import types

	import torch
	from accelerate import init_empty_weights, load_checkpoint_and_dispatch

	import comfy

	from .model import BrushNetModel, PowerPaintModel
	from .model_patch import add_model_patch_option, patch_model_function_wrapper
	from .powerpaint_utils import TokenizerWrapper, add_tokens

	cwd_path = os.path.dirname(os.path.realpath(__file__))
	brushnet_config_file = os.path.join(cwd_path, 'config', 'brushnet.json')
	brushnet_xl_config_file = os.path.join(cwd_path, 'config', 'brushnet_xl.json')
	powerpaint_config_file = os.path.join(cwd_path, 'config', 'powerpaint.json')

	sd15_scaling_factor = 0.18215
	sdxl_scaling_factor = 0.13025

	ModelsToUnload = [comfy.sd1_clip.SD1ClipModel, comfy.ldm.models.autoencoder.AutoencoderKL]

	class BrushNet:

	# Check models compatibility
	def check_compatibilty(self, model, brushnet):
	is_SDXL = False
	is_PP = False
	if isinstance(model.model.model_config, comfy.supported_models.SD15):
	print('Base model type: SD1.5')
	is_SDXL = False
	if brushnet["SDXL"]:
	raise Exception("Base model is SD15, but BrushNet is SDXL type")
	if brushnet["PP"]:
	is_PP = True
	elif isinstance(model.model.model_config, comfy.supported_models.SDXL):
	print('Base model type: SDXL')
	is_SDXL = True
	if not brushnet["SDXL"]:
	raise Exception("Base model is SDXL, but BrushNet is SD15 type")
	else:
	print('Base model type: ', type(model.model.model_config))
	raise Exception("Unsupported model type: " + str(type(model.model.model_config)))

	return (is_SDXL, is_PP)

	def check_image_mask(self, image, mask, name):
	if len(image.shape) < 4:
	# image tensor shape should be [B, H, W, C], but batch somehow is missing
	image = image[None, :, :, :]

	if len(mask.shape) > 3:
	# mask tensor shape should be [B, H, W] but we get [B, H, W, C], image may be?
	# take first mask, red channel
	mask = (mask[:, :, :, 0])[:, :, :]
	elif len(mask.shape) < 3:
	# mask tensor shape should be [B, H, W] but batch somehow is missing
	mask = mask[None, :, :]

	if image.shape[0] > mask.shape[0]:
	print(name, "gets batch of images (%d) but only %d masks" % (image.shape[0], mask.shape[0]))
	if mask.shape[0] == 1:
	print(name, "will copy the mask to fill batch")
	mask = torch.cat([mask] * image.shape[0], dim=0)
	else:
	print(name, "will add empty masks to fill batch")
	empty_mask = torch.zeros([image.shape[0] - mask.shape[0], mask.shape[1], mask.shape[2]])
	mask = torch.cat([mask, empty_mask], dim=0)
	elif image.shape[0] < mask.shape[0]:
	print(name, "gets batch of images (%d) but too many (%d) masks" % (image.shape[0], mask.shape[0]))
	mask = mask[:image.shape[0], :, :]

	return (image, mask)

	# Prepare image and mask
	def prepare_image(self, image, mask):

	image, mask = self.check_image_mask(image, mask, 'BrushNet')

	print("BrushNet image.shape =", image.shape, "mask.shape =", mask.shape)

	if mask.shape[2] != image.shape[2] or mask.shape[1] != image.shape[1]:
	raise Exception("Image and mask should be the same size")

	# As a suggestion of inferno46n2 (https://github.com/nullquant/ComfyUI-BrushNet/issues/64)
	mask = mask.round()

	masked_image = image * (1.0 - mask[:, :, :, None])

	return (masked_image, mask)

	# Get origin of the mask
	def cut_with_mask(self, mask, width, height):
	iy, ix = (mask == 1).nonzero(as_tuple=True)

	h0, w0 = mask.shape

	if iy.numel() == 0:
	x_c = w0 / 2.0
	y_c = h0 / 2.0
	else:
	x_min = ix.min().item()
	x_max = ix.max().item()
	y_min = iy.min().item()
	y_max = iy.max().item()

	if x_max - x_min > width or y_max - y_min > height:
	raise Exception("Mask is bigger than provided dimensions")

	x_c = (x_min + x_max) / 2.0
	y_c = (y_min + y_max) / 2.0

	width2 = width / 2.0
	height2 = height / 2.0

	if w0 <= width:
	x0 = 0
	w = w0
	else:
	x0 = max(0, x_c - width2)
	w = width
	if x0 + width > w0:
	x0 = w0 - width

	if h0 <= height:
	y0 = 0
	h = h0
	else:
	y0 = max(0, y_c - height2)
	h = height
	if y0 + height > h0:
	y0 = h0 - height

	return (int(x0), int(y0), int(w), int(h))

	# Prepare conditioning_latents
	@torch.inference_mode()
	def get_image_latents(self, masked_image, mask, vae, scaling_factor):
	processed_image = masked_image.to(vae.device)
	image_latents = vae.encode(processed_image[:, :, :, :3]) * scaling_factor
	processed_mask = 1. - mask[:, None, :, :]
	interpolated_mask = torch.nn.functional.interpolate(
	processed_mask,
	size=(
	image_latents.shape[-2],
	image_latents.shape[-1]
	)
	)
	interpolated_mask = interpolated_mask.to(image_latents.device)

	conditioning_latents = [image_latents, interpolated_mask]

	print('BrushNet CL: image_latents shape =', image_latents.shape, 'interpolated_mask shape =',
	interpolated_mask.shape)

	return conditioning_latents

	def brushnet_blocks(self, sd):
	brushnet_down_block = 0
	brushnet_mid_block = 0
	brushnet_up_block = 0
	for key in sd:
	if 'brushnet_down_block' in key:
	brushnet_down_block += 1
	if 'brushnet_mid_block' in key:
	brushnet_mid_block += 1
	if 'brushnet_up_block' in key:
	brushnet_up_block += 1
	return (brushnet_down_block, brushnet_mid_block, brushnet_up_block, len(sd))

	def get_model_type(self, brushnet_file):
	sd = comfy.utils.load_torch_file(brushnet_file)
	brushnet_down_block, brushnet_mid_block, brushnet_up_block, keys = self.brushnet_blocks(sd)
	del sd
	if brushnet_down_block == 24 and brushnet_mid_block == 2 and brushnet_up_block == 30:
	is_SDXL = False
	if keys == 322:
	is_PP = False
	print('BrushNet model type: SD1.5')
	else:
	is_PP = True
	print('PowerPaint model type: SD1.5')
	elif brushnet_down_block == 18 and brushnet_mid_block == 2 and brushnet_up_block == 22:
	print('BrushNet model type: Loading SDXL')
	is_SDXL = True
	is_PP = False
	else:
	raise Exception("Unknown BrushNet model")
	return is_SDXL, is_PP

	def load_brushnet_model(self, brushnet_file, dtype='float16'):
	is_SDXL, is_PP = self.get_model_type(brushnet_file)
	with init_empty_weights():
	if is_SDXL:
	brushnet_config = BrushNetModel.load_config(brushnet_xl_config_file)
	brushnet_model = BrushNetModel.from_config(brushnet_config)
	elif is_PP:
	brushnet_config = PowerPaintModel.load_config(powerpaint_config_file)
	brushnet_model = PowerPaintModel.from_config(brushnet_config)
	else:
	brushnet_config = BrushNetModel.load_config(brushnet_config_file)
	brushnet_model = BrushNetModel.from_config(brushnet_config)
	if is_PP:
	print("PowerPaint model file:", brushnet_file)
	else:
	print("BrushNet model file:", brushnet_file)

	if dtype == 'float16':
	torch_dtype = torch.float16
	elif dtype == 'bfloat16':
	torch_dtype = torch.bfloat16
	elif dtype == 'float32':
	torch_dtype = torch.float32
	else:
	torch_dtype = torch.float64

	brushnet_model = load_checkpoint_and_dispatch(
	brushnet_model,
	brushnet_file,
	device_map="sequential",
	max_memory=None,
	offload_folder=None,
	offload_state_dict=False,
	dtype=torch_dtype,
	force_hooks=False,
	)

	if is_PP:
	print("PowerPaint model is loaded")
	elif is_SDXL:
	print("BrushNet SDXL model is loaded")
	else:
	print("BrushNet SD1.5 model is loaded")

	return ({"brushnet": brushnet_model, "SDXL": is_SDXL, "PP": is_PP, "dtype": torch_dtype},)

	def brushnet_model_update(self, model, vae, image, mask, brushnet, positive, negative, scale, start_at, end_at):

	is_SDXL, is_PP = self.check_compatibilty(model, brushnet)

	if is_PP:
	raise Exception("PowerPaint model was loaded, please use PowerPaint node")

	# Make a copy of the model so that we're not patching it everywhere in the workflow.
	model = model.clone()

	# prepare image and mask
	# no batches for original image and mask
	masked_image, mask = self.prepare_image(image, mask)

	batch = masked_image.shape[0]
	width = masked_image.shape[2]
	height = masked_image.shape[1]

	if hasattr(model.model.model_config, 'latent_format') and hasattr(model.model.model_config.latent_format,
	'scale_factor'):
	scaling_factor = model.model.model_config.latent_format.scale_factor
	elif is_SDXL:
	scaling_factor = sdxl_scaling_factor
	else:
	scaling_factor = sd15_scaling_factor

	torch_dtype = brushnet['dtype']

	# prepare conditioning latents
	conditioning_latents = self.get_image_latents(masked_image, mask, vae, scaling_factor)
	conditioning_latents[0] = conditioning_latents[0].to(dtype=torch_dtype).to(brushnet['brushnet'].device)
	conditioning_latents[1] = conditioning_latents[1].to(dtype=torch_dtype).to(brushnet['brushnet'].device)

	# unload vae
	del vae
	for loaded_model in comfy.model_management.current_loaded_models:
	if type(loaded_model.model.model) in ModelsToUnload:
	comfy.model_management.current_loaded_models.remove(loaded_model)
	loaded_model.model_unload()
	del loaded_model

	# prepare embeddings
	prompt_embeds = positive[0][0].to(dtype=torch_dtype).to(brushnet['brushnet'].device)
	negative_prompt_embeds = negative[0][0].to(dtype=torch_dtype).to(brushnet['brushnet'].device)

	max_tokens = max(prompt_embeds.shape[1], negative_prompt_embeds.shape[1])
	if prompt_embeds.shape[1] < max_tokens:
	multiplier = max_tokens // 77 - prompt_embeds.shape[1] // 77
	prompt_embeds = torch.concat([prompt_embeds] + [prompt_embeds[:, -77:, :]] * multiplier, dim=1)
	print('BrushNet: negative prompt more than 75 tokens:', negative_prompt_embeds.shape,
	'multiplying prompt_embeds')
	if negative_prompt_embeds.shape[1] < max_tokens:
	multiplier = max_tokens // 77 - negative_prompt_embeds.shape[1] // 77
	negative_prompt_embeds = torch.concat(
	[negative_prompt_embeds] + [negative_prompt_embeds[:, -77:, :]] * multiplier, dim=1)
	print('BrushNet: positive prompt more than 75 tokens:', prompt_embeds.shape,
	'multiplying negative_prompt_embeds')

	if len(positive[0]) > 1 and 'pooled_output' in positive[0][1] and positive[0][1]['pooled_output'] is not None:
	pooled_prompt_embeds = positive[0][1]['pooled_output'].to(dtype=torch_dtype).to(brushnet['brushnet'].device)
	else:
	print('BrushNet: positive conditioning has not pooled_output')
	if is_SDXL:
	print('BrushNet will not produce correct results')
	pooled_prompt_embeds = torch.empty([2, 1280], device=brushnet['brushnet'].device).to(dtype=torch_dtype)

	if len(negative[0]) > 1 and 'pooled_output' in negative[0][1] and negative[0][1]['pooled_output'] is not None:
	negative_pooled_prompt_embeds = negative[0][1]['pooled_output'].to(dtype=torch_dtype).to(
	brushnet['brushnet'].device)
	else:
	print('BrushNet: negative conditioning has not pooled_output')
	if is_SDXL:
	print('BrushNet will not produce correct results')
	negative_pooled_prompt_embeds = torch.empty([1, pooled_prompt_embeds.shape[1]],
	device=brushnet['brushnet'].device).to(dtype=torch_dtype)

	time_ids = torch.FloatTensor([[height, width, 0., 0., height, width]]).to(dtype=torch_dtype).to(
	brushnet['brushnet'].device)

	if not is_SDXL:
	pooled_prompt_embeds = None
	negative_pooled_prompt_embeds = None
	time_ids = None

	# apply patch to model
	brushnet_conditioning_scale = scale
	control_guidance_start = start_at
	control_guidance_end = end_at

	add_brushnet_patch(model,
	brushnet['brushnet'],
	torch_dtype,
	conditioning_latents,
	(brushnet_conditioning_scale, control_guidance_start, control_guidance_end),
	prompt_embeds, negative_prompt_embeds,
	pooled_prompt_embeds, negative_pooled_prompt_embeds, time_ids,
	False)

	latent = torch.zeros([batch, 4, conditioning_latents[0].shape[2], conditioning_latents[0].shape[3]],
	device=brushnet['brushnet'].device)

	return (model, positive, negative, {"samples": latent},)

	#powperpaint
	def load_powerpaint_clip(self, base_clip_file, pp_clip_file):
	pp_clip = comfy.sd.load_clip(ckpt_paths=[base_clip_file])

	print('PowerPaint base CLIP file: ', base_clip_file)

	pp_tokenizer = TokenizerWrapper(pp_clip.tokenizer.clip_l.tokenizer)
	pp_text_encoder = pp_clip.patcher.model.clip_l.transformer

	add_tokens(
	tokenizer=pp_tokenizer,
	text_encoder=pp_text_encoder,
	placeholder_tokens=["P_ctxt", "P_shape", "P_obj"],
	initialize_tokens=["a", "a", "a"],
	num_vectors_per_token=10,
	)

	pp_text_encoder.load_state_dict(comfy.utils.load_torch_file(pp_clip_file), strict=False)

	print('PowerPaint CLIP file: ', pp_clip_file)

	pp_clip.tokenizer.clip_l.tokenizer = pp_tokenizer
	pp_clip.patcher.model.clip_l.transformer = pp_text_encoder

	return (pp_clip,)

	def powerpaint_model_update(self, model, vae, image, mask, powerpaint, clip, positive, negative, fitting, function, scale, start_at, end_at, save_memory):
	is_SDXL, is_PP = self.check_compatibilty(model, powerpaint)
	if not is_PP:
	raise Exception("BrushNet model was loaded, please use BrushNet node")

	# Make a copy of the model so that we're not patching it everywhere in the workflow.
	model = model.clone()

	# prepare image and mask
	# no batches for original image and mask
	masked_image, mask = self.prepare_image(image, mask)

	batch = masked_image.shape[0]
	# width = masked_image.shape[2]
	# height = masked_image.shape[1]

	if hasattr(model.model.model_config, 'latent_format') and hasattr(model.model.model_config.latent_format,
	'scale_factor'):
	scaling_factor = model.model.model_config.latent_format.scale_factor
	else:
	scaling_factor = sd15_scaling_factor

	torch_dtype = powerpaint['dtype']

	# prepare conditioning latents
	conditioning_latents = self.get_image_latents(masked_image, mask, vae, scaling_factor)
	conditioning_latents[0] = conditioning_latents[0].to(dtype=torch_dtype).to(powerpaint['brushnet'].device)
	conditioning_latents[1] = conditioning_latents[1].to(dtype=torch_dtype).to(powerpaint['brushnet'].device)

	# prepare embeddings

	if function == "object removal":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"
	print('You should add to positive prompt: "empty scene blur"')
	# positive = positive + " empty scene blur"
	elif function == "context aware":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = ""
	negative_promptB = ""
	# positive = positive + " empty scene"
	print('You should add to positive prompt: "empty scene"')
	elif function == "shape guided":
	promptA = "P_shape"
	promptB = "P_ctxt"
	negative_promptA = "P_shape"
	negative_promptB = "P_ctxt"
	elif function == "image outpainting":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"
	# positive = positive + " empty scene"
	print('You should add to positive prompt: "empty scene"')
	else:
	promptA = "P_obj"
	promptB = "P_obj"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"

	tokens = clip.tokenize(promptA)
	prompt_embedsA = clip.encode_from_tokens(tokens, return_pooled=False)

	tokens = clip.tokenize(negative_promptA)
	negative_prompt_embedsA = clip.encode_from_tokens(tokens, return_pooled=False)

	tokens = clip.tokenize(promptB)
	prompt_embedsB = clip.encode_from_tokens(tokens, return_pooled=False)

	tokens = clip.tokenize(negative_promptB)
	negative_prompt_embedsB = clip.encode_from_tokens(tokens, return_pooled=False)

	prompt_embeds_pp = (prompt_embedsA * fitting + (1.0 - fitting) * prompt_embedsB).to(dtype=torch_dtype).to(
	powerpaint['brushnet'].device)
	negative_prompt_embeds_pp = (negative_prompt_embedsA * fitting + (1.0 - fitting) * negative_prompt_embedsB).to(
	dtype=torch_dtype).to(powerpaint['brushnet'].device)

	# unload vae and CLIPs
	del vae
	del clip
	for loaded_model in comfy.model_management.current_loaded_models:
	if type(loaded_model.model.model) in ModelsToUnload:
	comfy.model_management.current_loaded_models.remove(loaded_model)
	loaded_model.model_unload()
	del loaded_model

	# apply patch to model

	brushnet_conditioning_scale = scale
	control_guidance_start = start_at
	control_guidance_end = end_at

	if save_memory != 'none':
	powerpaint['brushnet'].set_attention_slice(save_memory)

	add_brushnet_patch(model,
	powerpaint['brushnet'],
	torch_dtype,
	conditioning_latents,
	(brushnet_conditioning_scale, control_guidance_start, control_guidance_end),
	negative_prompt_embeds_pp, prompt_embeds_pp,
	None, None, None,
	False)

	latent = torch.zeros([batch, 4, conditioning_latents[0].shape[2], conditioning_latents[0].shape[3]],
	device=powerpaint['brushnet'].device)

	return (model, positive, negative, {"samples": latent},)
	@torch.inference_mode()
	def brushnet_inference(x, timesteps, transformer_options, debug):
	if 'model_patch' not in transformer_options:
	print('BrushNet inference: there is no model_patch key in transformer_options')
	return ([], 0, [])
	mp = transformer_options['model_patch']
	if 'brushnet' not in mp:
	print('BrushNet inference: there is no brushnet key in mdel_patch')
	return ([], 0, [])
	bo = mp['brushnet']
	if 'model' not in bo:
	print('BrushNet inference: there is no model key in brushnet')
	return ([], 0, [])
	brushnet = bo['model']
	if not (isinstance(brushnet, BrushNetModel) or isinstance(brushnet, PowerPaintModel)):
	print('BrushNet model is not a BrushNetModel class')
	return ([], 0, [])

	torch_dtype = bo['dtype']
	cl_list = bo['latents']
	brushnet_conditioning_scale, control_guidance_start, control_guidance_end = bo['controls']
	pe = bo['prompt_embeds']
	npe = bo['negative_prompt_embeds']
	ppe, nppe, time_ids = bo['add_embeds']

	#do_classifier_free_guidance = mp['free_guidance']
	do_classifier_free_guidance = len(transformer_options['cond_or_uncond']) > 1

	x = x.detach().clone()
	x = x.to(torch_dtype).to(brushnet.device)

	timesteps = timesteps.detach().clone()
	timesteps = timesteps.to(torch_dtype).to(brushnet.device)

	total_steps = mp['total_steps']
	step = mp['step']

	added_cond_kwargs = {}

	if do_classifier_free_guidance and step == 0:
	print('BrushNet inference: do_classifier_free_guidance is True')

	sub_idx = None
	if 'ad_params' in transformer_options and 'sub_idxs' in transformer_options['ad_params']:
	sub_idx = transformer_options['ad_params']['sub_idxs']

	# we have batch input images
	batch = cl_list[0].shape[0]
	# we have incoming latents
	latents_incoming = x.shape[0]
	# and we already got some
	latents_got = bo['latent_id']
	if step == 0 or batch > 1:
	print('BrushNet inference, step = %d: image batch = %d, got %d latents, starting from %d' \
	% (step, batch, latents_incoming, latents_got))

	image_latents = []
	masks = []
	prompt_embeds = []
	negative_prompt_embeds = []
	pooled_prompt_embeds = []
	negative_pooled_prompt_embeds = []
	if sub_idx:
	# AnimateDiff indexes detected
	if step == 0:
	print('BrushNet inference: AnimateDiff indexes detected and applied')

	batch = len(sub_idx)

	if do_classifier_free_guidance:
	for i in sub_idx:
	image_latents.append(cl_list[0][i][None,:,:,:])
	masks.append(cl_list[1][i][None,:,:,:])
	prompt_embeds.append(pe)
	negative_prompt_embeds.append(npe)
	pooled_prompt_embeds.append(ppe)
	negative_pooled_prompt_embeds.append(nppe)
	for i in sub_idx:
	image_latents.append(cl_list[0][i][None,:,:,:])
	masks.append(cl_list[1][i][None,:,:,:])
	else:
	for i in sub_idx:
	image_latents.append(cl_list[0][i][None,:,:,:])
	masks.append(cl_list[1][i][None,:,:,:])
	prompt_embeds.append(pe)
	pooled_prompt_embeds.append(ppe)
	else:
	# do_classifier_free_guidance = 2 passes, 1st pass is cond, 2nd is uncond
	continue_batch = True
	for i in range(latents_incoming):
	number = latents_got + i
	if number < batch:
	# 1st pass, cond
	image_latents.append(cl_list[0][number][None,:,:,:])
	masks.append(cl_list[1][number][None,:,:,:])
	prompt_embeds.append(pe)
	pooled_prompt_embeds.append(ppe)
	elif do_classifier_free_guidance and number < batch * 2:
	# 2nd pass, uncond
	image_latents.append(cl_list[0][number-batch][None,:,:,:])
	masks.append(cl_list[1][number-batch][None,:,:,:])
	negative_prompt_embeds.append(npe)
	negative_pooled_prompt_embeds.append(nppe)
	else:
	# latent batch
	image_latents.append(cl_list[0][0][None,:,:,:])
	masks.append(cl_list[1][0][None,:,:,:])
	prompt_embeds.append(pe)
	pooled_prompt_embeds.append(ppe)
	latents_got = -i
	continue_batch = False

	if continue_batch:
	# we don't have full batch yet
	if do_classifier_free_guidance:
	if number < batch * 2 - 1:
	bo['latent_id'] = number + 1
	else:
	bo['latent_id'] = 0
	else:
	if number < batch - 1:
	bo['latent_id'] = number + 1
	else:
	bo['latent_id'] = 0
	else:
	bo['latent_id'] = 0

	cl = []
	for il, m in zip(image_latents, masks):
	cl.append(torch.concat([il, m], dim=1))
	cl2apply = torch.concat(cl, dim=0)

	conditioning_latents = cl2apply.to(torch_dtype).to(brushnet.device)

	prompt_embeds.extend(negative_prompt_embeds)
	prompt_embeds = torch.concat(prompt_embeds, dim=0).to(torch_dtype).to(brushnet.device)

	if ppe is not None:
	added_cond_kwargs = {}
	added_cond_kwargs['time_ids'] = torch.concat([time_ids] * latents_incoming, dim = 0).to(torch_dtype).to(brushnet.device)

	pooled_prompt_embeds.extend(negative_pooled_prompt_embeds)
	pooled_prompt_embeds = torch.concat(pooled_prompt_embeds, dim=0).to(torch_dtype).to(brushnet.device)
	added_cond_kwargs['text_embeds'] = pooled_prompt_embeds
	else:
	added_cond_kwargs = None

	if x.shape[2] != conditioning_latents.shape[2] or x.shape[3] != conditioning_latents.shape[3]:
	if step == 0:
	print('BrushNet inference: image', conditioning_latents.shape, 'and latent', x.shape, 'have different size, resizing image')
	conditioning_latents = torch.nn.functional.interpolate(
	conditioning_latents, size=(
	x.shape[2],
	x.shape[3],
	), mode='bicubic',
	).to(torch_dtype).to(brushnet.device)

	if step == 0:
	print('BrushNet inference: sample', x.shape, ', CL', conditioning_latents.shape, 'dtype', torch_dtype)

	if debug: print('BrushNet: step =', step)

	if step < control_guidance_start or step > control_guidance_end:
	cond_scale = 0.0
	else:
	cond_scale = brushnet_conditioning_scale

	return brushnet(x,
	encoder_hidden_states=prompt_embeds,
	brushnet_cond=conditioning_latents,
	timestep = timesteps,
	conditioning_scale=cond_scale,
	guess_mode=False,
	added_cond_kwargs=added_cond_kwargs,
	return_dict=False,
	debug=debug,
	)

	def add_brushnet_patch(model, brushnet, torch_dtype, conditioning_latents,
	controls,
	prompt_embeds, negative_prompt_embeds,
	pooled_prompt_embeds, negative_pooled_prompt_embeds, time_ids,
	debug):

	is_SDXL = isinstance(model.model.model_config, comfy.supported_models.SDXL)

	if is_SDXL:
	input_blocks = [[0, comfy.ops.disable_weight_init.Conv2d],
	[1, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[2, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[3, comfy.ldm.modules.diffusionmodules.openaimodel.Downsample],
	[4, comfy.ldm.modules.attention.SpatialTransformer],
	[5, comfy.ldm.modules.attention.SpatialTransformer],
	[6, comfy.ldm.modules.diffusionmodules.openaimodel.Downsample],
	[7, comfy.ldm.modules.attention.SpatialTransformer],
	[8, comfy.ldm.modules.attention.SpatialTransformer]]
	middle_block = [0, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock]
	output_blocks = [[0, comfy.ldm.modules.attention.SpatialTransformer],
	[1, comfy.ldm.modules.attention.SpatialTransformer],
	[2, comfy.ldm.modules.attention.SpatialTransformer],
	[2, comfy.ldm.modules.diffusionmodules.openaimodel.Upsample],
	[3, comfy.ldm.modules.attention.SpatialTransformer],
	[4, comfy.ldm.modules.attention.SpatialTransformer],
	[5, comfy.ldm.modules.attention.SpatialTransformer],
	[5, comfy.ldm.modules.diffusionmodules.openaimodel.Upsample],
	[6, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[7, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[8, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock]]
	else:
	input_blocks = [[0, comfy.ops.disable_weight_init.Conv2d],
	[1, comfy.ldm.modules.attention.SpatialTransformer],
	[2, comfy.ldm.modules.attention.SpatialTransformer],
	[3, comfy.ldm.modules.diffusionmodules.openaimodel.Downsample],
	[4, comfy.ldm.modules.attention.SpatialTransformer],
	[5, comfy.ldm.modules.attention.SpatialTransformer],
	[6, comfy.ldm.modules.diffusionmodules.openaimodel.Downsample],
	[7, comfy.ldm.modules.attention.SpatialTransformer],
	[8, comfy.ldm.modules.attention.SpatialTransformer],
	[9, comfy.ldm.modules.diffusionmodules.openaimodel.Downsample],
	[10, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[11, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock]]
	middle_block = [0, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock]
	output_blocks = [[0, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[1, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[2, comfy.ldm.modules.diffusionmodules.openaimodel.ResBlock],
	[2, comfy.ldm.modules.diffusionmodules.openaimodel.Upsample],
	[3, comfy.ldm.modules.attention.SpatialTransformer],
	[4, comfy.ldm.modules.attention.SpatialTransformer],
	[5, comfy.ldm.modules.attention.SpatialTransformer],
	[5, comfy.ldm.modules.diffusionmodules.openaimodel.Upsample],
	[6, comfy.ldm.modules.attention.SpatialTransformer],
	[7, comfy.ldm.modules.attention.SpatialTransformer],
	[8, comfy.ldm.modules.attention.SpatialTransformer],
	[8, comfy.ldm.modules.diffusionmodules.openaimodel.Upsample],
	[9, comfy.ldm.modules.attention.SpatialTransformer],
	[10, comfy.ldm.modules.attention.SpatialTransformer],
	[11, comfy.ldm.modules.attention.SpatialTransformer]]

	def last_layer_index(block, tp):
	layer_list = []
	for layer in block:
	layer_list.append(type(layer))
	layer_list.reverse()
	if tp not in layer_list:
	return -1, layer_list.reverse()
	return len(layer_list) - 1 - layer_list.index(tp), layer_list

	def brushnet_forward(model, x, timesteps, transformer_options, control):
	if 'brushnet' not in transformer_options['model_patch']:
	input_samples = []
	mid_sample = 0
	output_samples = []
	else:
	# brushnet inference
	input_samples, mid_sample, output_samples = brushnet_inference(x, timesteps, transformer_options, debug)

	# give additional samples to blocks
	for i, tp in input_blocks:
	idx, layer_list = last_layer_index(model.input_blocks[i], tp)
	if idx < 0:
	print("BrushNet can't find", tp, "layer in", i, "input block:", layer_list)
	continue
	model.input_blocks[i][idx].add_sample_after = input_samples.pop(0) if input_samples else 0

	idx, layer_list = last_layer_index(model.middle_block, middle_block[1])
	if idx < 0:
	print("BrushNet can't find", middle_block[1], "layer in middle block", layer_list)
	model.middle_block[idx].add_sample_after = mid_sample

	for i, tp in output_blocks:
	idx, layer_list = last_layer_index(model.output_blocks[i], tp)
	if idx < 0:
	print("BrushNet can't find", tp, "layer in", i, "outnput block:", layer_list)
	continue
	model.output_blocks[i][idx].add_sample_after = output_samples.pop(0) if output_samples else 0

	patch_model_function_wrapper(model, brushnet_forward)

	to = add_model_patch_option(model)
	mp = to['model_patch']
	if 'brushnet' not in mp:
	mp['brushnet'] = {}
	bo = mp['brushnet']

	bo['model'] = brushnet
	bo['dtype'] = torch_dtype
	bo['latents'] = conditioning_latents
	bo['controls'] = controls
	bo['prompt_embeds'] = prompt_embeds
	bo['negative_prompt_embeds'] = negative_prompt_embeds
	bo['add_embeds'] = (pooled_prompt_embeds, negative_pooled_prompt_embeds, time_ids)
	bo['latent_id'] = 0

	# patch layers `forward` so we can apply brushnet
	def forward_patched_by_brushnet(self, x, args, *kwargs):
	h = self.original_forward(x, args, *kwargs)
	if hasattr(self, 'add_sample_after') and type(self):
	to_add = self.add_sample_after
	if torch.is_tensor(to_add):
	# interpolate due to RAUNet
	if h.shape[2] != to_add.shape[2] or h.shape[3] != to_add.shape[3]:
	to_add = torch.nn.functional.interpolate(to_add, size=(h.shape[2], h.shape[3]), mode='bicubic')
	h += to_add.to(h.dtype).to(h.device)
	else:
	h += self.add_sample_after
	self.add_sample_after = 0
	return h

	for i, block in enumerate(model.model.diffusion_model.input_blocks):
	for j, layer in enumerate(block):
	if not hasattr(layer, 'original_forward'):
	layer.original_forward = layer.forward
	layer.forward = types.MethodType(forward_patched_by_brushnet, layer)
	layer.add_sample_after = 0

	for j, layer in enumerate(model.model.diffusion_model.middle_block):
	if not hasattr(layer, 'original_forward'):
	layer.original_forward = layer.forward
	layer.forward = types.MethodType(forward_patched_by_brushnet, layer)
	layer.add_sample_after = 0

	for i, block in enumerate(model.model.diffusion_model.output_blocks):
	for j, layer in enumerate(block):
	if not hasattr(layer, 'original_forward'):
	layer.original_forward = layer.forward
	layer.forward = types.MethodType(forward_patched_by_brushnet, layer)
	layer.add_sample_after = 0