| import sys | |
| import os | |
| import torch | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| from PIL import Image, ImageSequence, ImageOps | |
| from typing import List | |
| import numpy as np | |
| sys.path.append(os.path.dirname("./ComfyUI/")) | |
| from ComfyUI.nodes import ( | |
| CheckpointLoaderSimple, | |
| VAEDecode, | |
| VAEEncode, | |
| KSampler, | |
| EmptyLatentImage, | |
| CLIPTextEncode, | |
| ) | |
| from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha | |
| from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage | |
| from ComfyUI.comfy import samplers | |
| from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import ( | |
| LayeredDiffusionFG, | |
| LayeredDiffusionDecode, | |
| LayeredDiffusionCond, | |
| ) | |
| import gradio as gr | |
| MODEL_PATH = hf_hub_download( | |
| repo_id="lllyasviel/fav_models", | |
| subfolder="fav", | |
| filename="juggernautXL_v8Rundiffusion.safetensors", | |
| ) | |
| try: | |
| os.symlink( | |
| MODEL_PATH, | |
| Path("./ComfyUI/models/checkpoints/juggernautXL_v8Rundiffusion.safetensors"), | |
| ) | |
| except FileExistsError: | |
| pass | |
| with torch.inference_mode(): | |
| ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint | |
| ckpt = ckpt_load_checkpoint(ckpt_name="juggernautXL_v8Rundiffusion.safetensors") | |
| cliptextencode = CLIPTextEncode().encode | |
| emptylatentimage_generate = EmptyLatentImage().generate | |
| ksampler_sample = KSampler().sample | |
| vae_decode = VAEDecode().decode | |
| vae_encode = VAEEncode().encode | |
| ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion | |
| ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion | |
| ld_decode = LayeredDiffusionDecode().decode | |
| mask_to_image = MaskToImage().mask_to_image | |
| invert_mask = InvertMask().invert | |
| join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha | |
| def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]: | |
| if not isinstance(images, list): | |
| images = [images] | |
| imgs = [] | |
| for image in images: | |
| i = 255.0 * image.cpu().numpy() | |
| img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8)) | |
| imgs.append(img) | |
| return imgs | |
| def pad_image(input_image): | |
| pad_w, pad_h = ( | |
| np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0) | |
| * 64 | |
| - input_image.size | |
| ) | |
| im_padded = Image.fromarray( | |
| np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge") | |
| ) | |
| w, h = im_padded.size | |
| if w == h: | |
| return im_padded | |
| elif w > h: | |
| new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0)) | |
| new_image.paste(im_padded, (0, (w - h) // 2)) | |
| return new_image | |
| else: | |
| new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0)) | |
| new_image.paste(im_padded, ((h - w) // 2, 0)) | |
| return new_image | |
| def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]: | |
| output_images = [] | |
| output_masks = [] | |
| for i in ImageSequence.Iterator(image): | |
| i = ImageOps.exif_transpose(i) | |
| if i.mode == "I": | |
| i = i.point(lambda i: i * (1 / 255)) | |
| image = i.convert("RGB") | |
| image = np.array(image).astype(np.float32) / 255.0 | |
| image = torch.from_numpy(image)[None,] | |
| if "A" in i.getbands(): | |
| mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0 | |
| mask = 1.0 - torch.from_numpy(mask) | |
| else: | |
| mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu") | |
| output_images.append(image) | |
| output_masks.append(mask.unsqueeze(0)) | |
| if len(output_images) > 1: | |
| output_image = torch.cat(output_images, dim=0) | |
| output_mask = torch.cat(output_masks, dim=0) | |
| else: | |
| output_image = output_images[0] | |
| output_mask = output_masks[0] | |
| return (output_image, output_mask) | |
| def predict( | |
| prompt: str, | |
| negative_prompt: str, | |
| input_image: Image.Image | None, | |
| cond_mode: str, | |
| seed: int, | |
| sampler_name: str, | |
| scheduler: str, | |
| steps: int, | |
| cfg: float, | |
| denoise: float, | |
| ): | |
| with torch.inference_mode(): | |
| cliptextencode_prompt = cliptextencode( | |
| text=prompt, | |
| clip=ckpt[1], | |
| ) | |
| cliptextencode_negative_prompt = cliptextencode( | |
| text=negative_prompt, | |
| clip=ckpt[1], | |
| ) | |
| emptylatentimage_sample = emptylatentimage_generate( | |
| width=1024, height=1024, batch_size=1 | |
| ) | |
| if input_image is not None: | |
| img_tensor = pil_to_tensor(pad_image(input_image).resize((1024, 1024))) | |
| img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2]) | |
| layereddiffusionapply_sample = ld_cond_apply_layered_diffusion( | |
| config=cond_mode, | |
| weight=1, | |
| model=ckpt[0], | |
| cond=cliptextencode_prompt[0], | |
| uncond=cliptextencode_negative_prompt[0], | |
| latent=img_latent[0], | |
| ) | |
| ksampler = ksampler_sample( | |
| steps=steps, | |
| cfg=cfg, | |
| sampler_name=sampler_name, | |
| scheduler=scheduler, | |
| seed=seed, | |
| model=layereddiffusionapply_sample[0], | |
| positive=layereddiffusionapply_sample[1], | |
| negative=layereddiffusionapply_sample[2], | |
| latent_image=emptylatentimage_sample[0], | |
| denoise=denoise, | |
| ) | |
| vaedecode_sample = vae_decode( | |
| samples=ksampler[0], | |
| vae=ckpt[2], | |
| ) | |
| layereddiffusiondecode_sample = ld_decode( | |
| sd_version="SDXL", | |
| sub_batch_size=16, | |
| samples=ksampler[0], | |
| images=vaedecode_sample[0], | |
| ) | |
| rgb_img = tensor_to_pil(vaedecode_sample[0]) | |
| return flatten([rgb_img]) | |
| else: | |
| layereddiffusionapply_sample = ld_fg_apply_layered_diffusion( | |
| config="SDXL, Conv Injection", weight=1, model=ckpt[0] | |
| ) | |
| ksampler = ksampler_sample( | |
| steps=steps, | |
| cfg=cfg, | |
| sampler_name=sampler_name, | |
| scheduler=scheduler, | |
| seed=seed, | |
| model=layereddiffusionapply_sample[0], | |
| positive=cliptextencode_prompt[0], | |
| negative=cliptextencode_negative_prompt[0], | |
| latent_image=emptylatentimage_sample[0], | |
| denoise=denoise, | |
| ) | |
| vaedecode_sample = vae_decode( | |
| samples=ksampler[0], | |
| vae=ckpt[2], | |
| ) | |
| layereddiffusiondecode_sample = ld_decode( | |
| sd_version="SDXL", | |
| sub_batch_size=16, | |
| samples=ksampler[0], | |
| images=vaedecode_sample[0], | |
| ) | |
| mask = mask_to_image(mask=layereddiffusiondecode_sample[1]) | |
| ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0]) | |
| inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1]) | |
| rgba_img = join_image_with_alpha( | |
| image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0] | |
| ) | |
| rgba_img = tensor_to_pil(rgba_img[0]) | |
| mask = tensor_to_pil(mask[0]) | |
| rgb_img = tensor_to_pil(vaedecode_sample[0]) | |
| return flatten([rgba_img, mask, rgb_img, ld_image]) | |
| examples = [["An old men sit on a chair looking at the sky"]] | |
| def flatten(l: List[List[any]]) -> List[any]: | |
| return [item for sublist in l for item in sublist] | |
| def predict_examples(prompt, negative_prompt): | |
| return predict( | |
| prompt, negative_prompt, None, None, 0, "euler", "normal", 20, 8.0, 1.0 | |
| ) | |
| css = """ | |
| .gradio-container{ | |
| max-width: 60rem; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as blocks: | |
| gr.Markdown("""# LayerDiffuse (unofficial) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Text(label="Prompt") | |
| negative_prompt = gr.Text(label="Negative Prompt") | |
| button = gr.Button("Generate") | |
| with gr.Accordion(open=False, label="Input Images (Optional)"): | |
| cond_mode = gr.Radio( | |
| value="SDXL, Foreground", | |
| choices=["SDXL, Foreground", "SDXL, Background"], | |
| info="Whether to use input image as foreground or background", | |
| ) | |
| input_image = gr.Image(label="Input Image", type="pil") | |
| with gr.Accordion(open=False, label="Advanced Options"): | |
| seed = gr.Slider( | |
| label="Seed", | |
| value=0, | |
| minimum=-1, | |
| maximum=0xFFFFFFFFFFFFFFFF, | |
| step=1, | |
| randomize=True, | |
| ) | |
| sampler_name = gr.Dropdown( | |
| choices=samplers.KSampler.SAMPLERS, | |
| label="Sampler Name", | |
| value=samplers.KSampler.SAMPLERS[0], | |
| ) | |
| scheduler = gr.Dropdown( | |
| choices=samplers.KSampler.SCHEDULERS, | |
| label="Scheduler", | |
| value=samplers.KSampler.SCHEDULERS[0], | |
| ) | |
| steps = gr.Number( | |
| label="Steps", value=20, minimum=1, maximum=10000, step=1 | |
| ) | |
| cfg = gr.Number( | |
| label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1 | |
| ) | |
| denoise = gr.Number( | |
| label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01 | |
| ) | |
| with gr.Column(scale=1.8): | |
| gallery = gr.Gallery( | |
| columns=[2], rows=[2], object_fit="contain", height="unset" | |
| ) | |
| inputs = [ | |
| prompt, | |
| negative_prompt, | |
| input_image, | |
| cond_mode, | |
| seed, | |
| sampler_name, | |
| scheduler, | |
| steps, | |
| cfg, | |
| denoise, | |
| ] | |
| outputs = [gallery] | |
| gr.Examples( | |
| fn=predict_examples, | |
| examples=examples, | |
| inputs=[prompt, negative_prompt], | |
| outputs=outputs, | |
| cache_examples=False, | |
| ) | |
| button.click(fn=predict, inputs=inputs, outputs=outputs) | |
| if __name__ == "__main__": | |
| blocks.launch() | |