Spaces:

mvp-lab
/

70113_ImgGen_Diffusion_ControlNetxLoRA

Sleeping

File size: 7,967 Bytes

41789ae
 
 
 
 
 
5ff1093
834f6ad
41789ae
5ff1093
834f6ad
5ff1093
 
 
 
834f6ad
5ff1093
d6df1df
5ff1093
d6df1df
5ff1093
834f6ad
5ff1093
 
834f6ad
 
5ff1093
834f6ad
 
 
 
 
5ff1093
 
 
da2d97d
 
 
41789ae
834f6ad
 
da2d97d
834f6ad
 
41789ae
834f6ad
da2d97d
41789ae
 
 
834f6ad
 
41789ae
834f6ad
da2d97d
41789ae
 
 
 
5ff1093
da2d97d
41789ae
da2d97d
41789ae
 
5ff1093
 
 
 
 
41789ae
 
 
 
 
5ff1093
41789ae
 
 
 
 
 
6580922
18016af
834f6ad
5ff1093
41789ae
 
6580922
 
d6df1df
 
 
834f6ad
d6df1df
 
 
6580922
 
5ff1093
834f6ad
 
 
 
6580922
 
 
 
 
da2d97d
41789ae
6580922
 
 
 
d6df1df
 
 
 
 
65663ad
d6df1df
6580922
 
d6df1df
6580922
d6df1df
6580922
 
da2d97d
41789ae
5ff1093
41789ae
 
6580922
5ff1093
41789ae
 
b53f48b
 
 
834f6ad
 
 
6580922
b53f48b
 
 
834f6ad
 
 
b53f48b
 
 
 
834f6ad
 
 
6580922
b53f48b
 
 
6580922
b53f48b
834f6ad
6580922
b53f48b
 
 
5ff1093
41789ae
 
834f6ad
41789ae
5ff1093
 
834f6ad
6580922
5ff1093
 
41789ae
 
 
 
6580922
41789ae
 
 
 
 
 
 
71a28f3
 
6580922
71a28f3
 
 
41789ae
6580922
5ff1093
6580922
41789ae
 
6580922
41789ae
834f6ad
6580922
41789ae
6580922
41789ae
 
6580922
41789ae
 
 
6580922
 
5ff1093
 
 
 
 
 
6580922
5ff1093
41789ae
 
 
 
71a28f3
 
5ff1093
71a28f3
 
 
 
41789ae
 
 
 
 
d6df1df

import gradio as gr
import torch
import spaces
import cv2
import numpy as np
from PIL import Image

from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler

LORA_REGISTRY = {
    "None (Base SDXL)": {
        "repo": None,
        "trigger": "",
        "weight": 0.0
    },
    "Lego Style XL": {
        "repo": "lordjia/lelo-lego-lora-for-xl-sd1-5", 
        "trigger": "LEGO Creator, LEGO MiniFig, ", 
        "weight": 0.8,
        "file": "Lego_XL_v2.1.safetensors"
    },
    "Claymation Style XL": {
        "repo": "DoctorDiffusion/doctor-diffusion-s-claymation-style-lora",
        "trigger": "made-of-clay, claymation style, ",
        "weight": 0.9,
        "file": "DD-made-of-clay-XL-v2.safetensors"
    },
    "Pixel Art XL": {
        "repo": "nerijs/pixel-art-xl",
        "trigger": "pixel art, ",
        "weight": 1.0,
        "file": "pixel-art-xl.safetensors"
    }
}

print("Loading SDXL Pipeline...")

dtype = torch.float16

vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", 
    torch_dtype=dtype
)

controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=dtype,
    use_safetensors=True
)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=dtype,
    use_safetensors=True
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe.enable_model_cpu_offload()

print("Pipeline loaded successfully.")

def get_canny_image(image, low_threshold=100, high_threshold=200):
    image_array = np.array(image)
    canny_edges = cv2.Canny(image_array, low_threshold, high_threshold)
    canny_edges = canny_edges[:, :, None]
    canny_edges = np.concatenate([canny_edges, canny_edges, canny_edges], axis=2)
    return Image.fromarray(canny_edges)

@spaces.GPU(duration=120)
def generate_controlled_image(
    input_image,
    prompt,
    negative_prompt,
    lora_selection,
    controlnet_conditioning_scale,
    steps,
    seed
):
    if input_image is None:
        raise gr.Error("Please upload an image first!")
    
    width, height = 1024, 1024
    input_image = input_image.resize((width, height))
    canny_image = get_canny_image(input_image)

    pipe.unload_lora_weights()
    
    style_config = LORA_REGISTRY[lora_selection]
    repo_id = style_config["repo"]
    trigger_text = style_config["trigger"]
    lora_file = style_config.get("file", None)
    
    final_prompt = f"{trigger_text}{prompt}"
    
    if repo_id:
        try:
            print(f"Loading LoRA: {repo_id}")
            if lora_file:
                pipe.load_lora_weights(repo_id, weight_name=lora_file)
            else:
                pipe.load_lora_weights(repo_id)
            print("LoRA loaded successfully.")
        except Exception as e:
            print(f"LoRA Load Error: {e}")
            gr.Warning(f"Failed to load LoRA. Using base model.")

    generator = torch.Generator("cuda").manual_seed(int(seed))

    print(f"Generating: {final_prompt[:100]}...")
    
    try:
        output = pipe(
            prompt=final_prompt,
            negative_prompt=negative_prompt,
            image=canny_image,
            num_inference_steps=int(steps),
            controlnet_conditioning_scale=float(controlnet_conditioning_scale),
            guidance_scale=7.0,
            generator=generator,
        )
        output_image = output.images[0]
    except Exception as e:
        pipe.unload_lora_weights()
        raise e

    pipe.unload_lora_weights()
    torch.cuda.empty_cache()

    return canny_image, output_image

css = """
#col-container {max-width: 1200px; margin-left: auto; margin-right: auto;}
.guide-text {font-size: 1.1em; color: #4a5568;}
"""

examples = [
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
        "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece, 8k",
        "blurry, low quality, deformed, illustration",
        "None (Base SDXL)", 
        0.8, 30, 42
    ],
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
        "portrait of a girl with a pearl earring, made of plastic blocks, interlocking bricks, toy aesthetic, macro photography",
        "human skin, realistic, painting, blurry, drawing",
        "Lego Style XL",
        0.8, 30, 101
    ],
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_hed.png",
        "pixel art, a cute bird, isometric view, retro game asset, 8-bit graphics",
        "photorealistic, vector, high resolution, smooth, 3d render",
        "Pixel Art XL",
        0.8, 30, 202
    ],
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
        "made-of-clay, claymation style, interior of a modern living room, stop motion animation, plasticine texture",
        "cgi, 3d render, glossy, architectural visualization",
        "Claymation Style XL",
        0.8, 30, 303
    ],
]

with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# 🎨 SDXL ControlNet + LoRA Mixer")
        gr.Markdown(
            """
            <p class='guide-text'>
            <b>SDXL Edition.</b><br>
            Uses ControlNet Canny (SDXL) for structure preservation with LoRA styles.
            </p>
            """
        )

        with gr.Row():
            with gr.Column(scale=1):
                input_image = gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"])
                
                prompt = gr.Textbox(
                    label="Prompt", 
                    value="A house on a hill, sunny day, masterpiece",
                    lines=2
                )
                
                negative_prompt = gr.Textbox(
                    label="Negative Prompt",
                    value="blurry, low quality, distorted, ugly, watermark",
                    lines=1
                )
                
                lora_selection = gr.Dropdown(
                    label="LoRA Style",
                    choices=list(LORA_REGISTRY.keys()),
                    value="None (Base SDXL)"
                )

                with gr.Accordion("Advanced Settings", open=False):
                    controlnet_conditioning_scale = gr.Slider(
                        label="ControlNet Strength",
                        minimum=0.0, maximum=1.5, value=0.8, step=0.1
                    )
                    steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=30, step=1)
                    seed = gr.Number(label="Seed", value=42, precision=0)

                submit_btn = gr.Button("Generate", variant="primary", size="lg")

            with gr.Column(scale=1):
                with gr.Row():
                    output_canny = gr.Image(label="Canny Edges", type="pil")
                    output_result = gr.Image(label="Result", type="pil")

        gr.Examples(
            examples=examples,
            inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
            outputs=[output_canny, output_result],
            fn=generate_controlled_image,
            cache_examples=False
        )

    submit_btn.click(
        fn=generate_controlled_image,
        inputs=[
            input_image, 
            prompt, 
            negative_prompt, 
            lora_selection, 
            controlnet_conditioning_scale, 
            steps, 
            seed
        ],
        outputs=[output_canny, output_result]
    )

if __name__ == "__main__":
    demo.launch()