import gradio as gr import spaces import torch from huggingface_hub import hf_hub_download from diffusers import AutoencoderKL, ControlNetUnionModel, DiffusionPipeline, StableDiffusionXLPipeline, TCDScheduler, UNet2DConditionModel def callback_cfg_cutoff(pipeline, step_index, timestep, callback_kwargs): if step_index == int(pipeline.num_timesteps * 0.2): prompt_embeds = callback_kwargs["prompt_embeds"] prompt_embeds = prompt_embeds[-1:] add_text_embeds = callback_kwargs["add_text_embeds"] add_text_embeds = add_text_embeds[-1:] add_time_ids = callback_kwargs["add_time_ids"] add_time_ids = add_time_ids[-1:] control_image = callback_kwargs["control_image"] control_image[0] = control_image[0][-1:] control_type = callback_kwargs["control_type"] control_type = control_type[-1:] pipeline._guidance_scale = 0.0 callback_kwargs["prompt_embeds"] = prompt_embeds callback_kwargs["add_text_embeds"] = add_text_embeds callback_kwargs["add_time_ids"] = add_time_ids callback_kwargs["control_image"] = control_image callback_kwargs["control_type"] = control_type return callback_kwargs MODELS = { "DreamShaper XL Turbo": "Lykon/dreamshaper-xl-v2-turbo", "RealVisXL V5.0 Lightning": "SG161222/RealVisXL_V5.0_Lightning", "Playground v2.5": "playgroundai/playground-v2.5-1024px-aesthetic", "Juggernaut XL Lightning": "RunDiffusion/Juggernaut-XL-Lightning", "Pixel Party XL": "pixelparty/pixel-party-xl", "Fluently XL v3": "fluently/Fluently-XL-v3", } # Models that require special UNet loading (value is base model to use) UNET_MODELS = { "Pixel Party XL": "stabilityai/stable-diffusion-xl-base-1.0", } # Models that are single safetensors files (value is the repo, filename, and base model) SINGLE_FILE_MODELS = { "Fluently XL v3": { "repo_id": "fluently/Fluently-XL-v3", "filename": "FluentlyXL-v3.safetensors", "base": "stabilityai/stable-diffusion-xl-base-1.0", }, } DEFAULT_MODEL = "DreamShaper XL Turbo" controlnet_model = ControlNetUnionModel.from_pretrained( "OzzyGT/controlnet-union-promax-sdxl-1.0", variant="fp16", torch_dtype=torch.float16 ) controlnet_model.to(device="cuda", dtype=torch.float16) vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to("cuda") def load_pipeline(model_name): """Load a pipeline for the given model name.""" model_id = MODELS[model_name] if model_name in SINGLE_FILE_MODELS: # Load single safetensors checkpoint models config = SINGLE_FILE_MODELS[model_name] # Download the checkpoint file first checkpoint_path = hf_hub_download( repo_id=config["repo_id"], filename=config["filename"], ) # Load the single file to extract the UNet temp_pipe = StableDiffusionXLPipeline.from_single_file( checkpoint_path, torch_dtype=torch.float16, ) unet = temp_pipe.unet del temp_pipe pipeline = DiffusionPipeline.from_pretrained( config["base"], torch_dtype=torch.float16, vae=vae, unet=unet, controlnet=controlnet_model, custom_pipeline="OzzyGT/custom_sdxl_cnet_union", ).to("cuda") elif model_name in UNET_MODELS: # Load UNet-only models (like Pixel Party XL) base_model = UNET_MODELS[model_name] unet = UNet2DConditionModel.from_pretrained(model_id, torch_dtype=torch.float16) pipeline = DiffusionPipeline.from_pretrained( base_model, torch_dtype=torch.float16, vae=vae, unet=unet, controlnet=controlnet_model, custom_pipeline="OzzyGT/custom_sdxl_cnet_union", ).to("cuda") else: pipeline = DiffusionPipeline.from_pretrained( model_id, torch_dtype=torch.float16, vae=vae, controlnet=controlnet_model, custom_pipeline="OzzyGT/custom_sdxl_cnet_union", ).to("cuda") pipeline.scheduler = TCDScheduler.from_config(pipeline.scheduler.config) return pipeline current_model = DEFAULT_MODEL pipe = load_pipeline(current_model) lora_loaded = set() LORAS = { "add-detail-xl": "LyliaEngine/add-detail-xl", "pixel-art-xl": "nerijs/pixel-art-xl", "wowifier-xl": "frankjoshua/WowifierXL-V2", } @spaces.GPU(duration=24) def fill_image(prompt, negative_prompt, image, model_selection, paste_back, guidance_scale, num_steps, use_detail_lora, detail_lora_weight, use_pixel_lora, pixel_lora_weight, use_wowifier_lora, wowifier_lora_weight): global pipe, current_model, lora_loaded if model_selection != current_model: pipe = load_pipeline(model_selection) current_model = model_selection lora_loaded = set() # Load any LoRAs that aren't already loaded if use_detail_lora and "add-detail-xl" not in lora_loaded: pipe.load_lora_weights(LORAS["add-detail-xl"], adapter_name="add-detail-xl") lora_loaded.add("add-detail-xl") if use_pixel_lora and "pixel-art-xl" not in lora_loaded: pipe.load_lora_weights(LORAS["pixel-art-xl"], adapter_name="pixel-art-xl") lora_loaded.add("pixel-art-xl") if use_wowifier_lora and "wowifier-xl" not in lora_loaded: pipe.load_lora_weights(LORAS["wowifier-xl"], adapter_name="wowifier-xl") lora_loaded.add("wowifier-xl") # Set adapter weights based on checkboxes active_adapters = [] adapter_weights = [] if "add-detail-xl" in lora_loaded: active_adapters.append("add-detail-xl") adapter_weights.append(detail_lora_weight if use_detail_lora else 0.0) if "pixel-art-xl" in lora_loaded: active_adapters.append("pixel-art-xl") adapter_weights.append(pixel_lora_weight if use_pixel_lora else 0.0) if "wowifier-xl" in lora_loaded: active_adapters.append("wowifier-xl") adapter_weights.append(wowifier_lora_weight if use_wowifier_lora else 0.0) if active_adapters: pipe.set_adapters(active_adapters, adapter_weights=adapter_weights) ( prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, ) = pipe.encode_prompt(prompt, device="cuda", negative_prompt=negative_prompt) source = image["background"] mask = image["layers"][0] alpha_channel = mask.split()[3] binary_mask = alpha_channel.point(lambda p: p > 0 and 255) cnet_image = source.copy() cnet_image.paste(0, (0, 0), binary_mask) image = pipe( prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, pooled_prompt_embeds=pooled_prompt_embeds, negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, control_image=[cnet_image], controlnet_conditioning_scale=[1.0], control_mode=[7], num_inference_steps=int(num_steps), guidance_scale=guidance_scale, callback_on_step_end=callback_cfg_cutoff, callback_on_step_end_tensor_inputs=[ "prompt_embeds", "add_text_embeds", "add_time_ids", "control_image", "control_type", ], ).images[0] if paste_back: image = image.convert("RGBA") # Resize generated image to match original source size if needed if image.size != source.size: image = image.resize(source.size) cnet_image.paste(image, (0, 0), binary_mask) else: cnet_image = image yield source, cnet_image def clear_result(): return gr.update(value=None) title = """

Diffusers Fast Inpaint

Draw the mask over the subject you want to erase or change and write what you want to inpaint it with.
""" with gr.Blocks() as demo: gr.HTML(title) with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Prompt", lines=1, ) with gr.Column(): with gr.Row(): negative_prompt = gr.Textbox( label="Negative Prompt", lines=1, ) with gr.Row(): with gr.Column(): run_button = gr.Button("Generate") with gr.Column(): paste_back = gr.Checkbox(True, label="Paste back original") with gr.Row(): guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=1.5, step=0.1, label="Guidance Scale") num_steps = gr.Slider(minimum=1, maximum=50, value=8, step=1, label="Number of Steps") with gr.Row(): use_detail_lora = gr.Checkbox(False, label="Add Detail XL LoRA") detail_lora_weight = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Detail LoRA Weight") with gr.Row(): use_pixel_lora = gr.Checkbox(False, label="Pixel Art XL LoRA") pixel_lora_weight = gr.Slider(minimum=0.0, maximum=2.0, value=1.2, step=0.1, label="Pixel Art LoRA Weight") with gr.Row(): use_wowifier_lora = gr.Checkbox(False, label="Wowifier XL LoRA") wowifier_lora_weight = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Wowifier LoRA Weight") with gr.Row(): input_image = gr.ImageMask( type="pil", label="Input Image", canvas_size=(1024, 1024), layers=False, height=512, ) result = gr.ImageSlider( interactive=False, label="Generated Image", ) use_as_input_button = gr.Button("Use as Input Image", visible=False) model_selection = gr.Dropdown(choices=list(MODELS.keys()), value=DEFAULT_MODEL, label="Model") def use_output_as_input(output_image): return gr.update(value=output_image[1]) use_as_input_button.click(fn=use_output_as_input, inputs=[result], outputs=[input_image]) run_button.click( fn=clear_result, inputs=None, outputs=result, ).then( fn=lambda: gr.update(visible=False), inputs=None, outputs=use_as_input_button, ).then( fn=fill_image, inputs=[prompt, negative_prompt, input_image, model_selection, paste_back, guidance_scale, num_steps, use_detail_lora, detail_lora_weight, use_pixel_lora, pixel_lora_weight, use_wowifier_lora, wowifier_lora_weight], outputs=result, ).then( fn=lambda: gr.update(visible=True), inputs=None, outputs=use_as_input_button, ) prompt.submit( fn=clear_result, inputs=None, outputs=result, ).then( fn=lambda: gr.update(visible=False), inputs=None, outputs=use_as_input_button, ).then( fn=fill_image, inputs=[prompt, negative_prompt, input_image, model_selection, paste_back, guidance_scale, num_steps, use_detail_lora, detail_lora_weight, use_pixel_lora, pixel_lora_weight, use_wowifier_lora, wowifier_lora_weight], outputs=result, ).then( fn=lambda: gr.update(visible=True), inputs=None, outputs=use_as_input_button, ) demo.queue(max_size=12).launch(share=False)