File size: 7,967 Bytes
41789ae
 
 
 
 
 
5ff1093
834f6ad
41789ae
5ff1093
834f6ad
5ff1093
 
 
 
834f6ad
5ff1093
d6df1df
5ff1093
d6df1df
5ff1093
834f6ad
5ff1093
 
834f6ad
 
5ff1093
834f6ad
 
 
 
 
5ff1093
 
 
da2d97d
 
 
41789ae
834f6ad
 
da2d97d
834f6ad
 
41789ae
834f6ad
da2d97d
41789ae
 
 
834f6ad
 
41789ae
834f6ad
da2d97d
41789ae
 
 
 
5ff1093
da2d97d
41789ae
da2d97d
41789ae
 
5ff1093
 
 
 
 
41789ae
 
 
 
 
5ff1093
41789ae
 
 
 
 
 
6580922
18016af
834f6ad
5ff1093
41789ae
 
6580922
 
d6df1df
 
 
834f6ad
d6df1df
 
 
6580922
 
5ff1093
834f6ad
 
 
 
6580922
 
 
 
 
da2d97d
41789ae
6580922
 
 
 
d6df1df
 
 
 
 
65663ad
d6df1df
6580922
 
d6df1df
6580922
d6df1df
6580922
 
da2d97d
41789ae
5ff1093
41789ae
 
6580922
5ff1093
41789ae
 
b53f48b
 
 
834f6ad
 
 
6580922
b53f48b
 
 
834f6ad
 
 
b53f48b
 
 
 
834f6ad
 
 
6580922
b53f48b
 
 
6580922
b53f48b
834f6ad
6580922
b53f48b
 
 
5ff1093
41789ae
 
834f6ad
41789ae
5ff1093
 
834f6ad
6580922
5ff1093
 
41789ae
 
 
 
6580922
41789ae
 
 
 
 
 
 
71a28f3
 
6580922
71a28f3
 
 
41789ae
6580922
5ff1093
6580922
41789ae
 
6580922
41789ae
834f6ad
6580922
41789ae
6580922
41789ae
 
6580922
41789ae
 
 
6580922
 
5ff1093
 
 
 
 
 
6580922
5ff1093
41789ae
 
 
 
71a28f3
 
5ff1093
71a28f3
 
 
 
41789ae
 
 
 
 
d6df1df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import gradio as gr
import torch
import spaces
import cv2
import numpy as np
from PIL import Image

from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler

LORA_REGISTRY = {
    "None (Base SDXL)": {
        "repo": None,
        "trigger": "",
        "weight": 0.0
    },
    "Lego Style XL": {
        "repo": "lordjia/lelo-lego-lora-for-xl-sd1-5", 
        "trigger": "LEGO Creator, LEGO MiniFig, ", 
        "weight": 0.8,
        "file": "Lego_XL_v2.1.safetensors"
    },
    "Claymation Style XL": {
        "repo": "DoctorDiffusion/doctor-diffusion-s-claymation-style-lora",
        "trigger": "made-of-clay, claymation style, ",
        "weight": 0.9,
        "file": "DD-made-of-clay-XL-v2.safetensors"
    },
    "Pixel Art XL": {
        "repo": "nerijs/pixel-art-xl",
        "trigger": "pixel art, ",
        "weight": 1.0,
        "file": "pixel-art-xl.safetensors"
    }
}

print("Loading SDXL Pipeline...")

dtype = torch.float16

vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", 
    torch_dtype=dtype
)

controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=dtype,
    use_safetensors=True
)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=dtype,
    use_safetensors=True
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe.enable_model_cpu_offload()

print("Pipeline loaded successfully.")

def get_canny_image(image, low_threshold=100, high_threshold=200):
    image_array = np.array(image)
    canny_edges = cv2.Canny(image_array, low_threshold, high_threshold)
    canny_edges = canny_edges[:, :, None]
    canny_edges = np.concatenate([canny_edges, canny_edges, canny_edges], axis=2)
    return Image.fromarray(canny_edges)

@spaces.GPU(duration=120)
def generate_controlled_image(
    input_image,
    prompt,
    negative_prompt,
    lora_selection,
    controlnet_conditioning_scale,
    steps,
    seed
):
    if input_image is None:
        raise gr.Error("Please upload an image first!")
    
    width, height = 1024, 1024
    input_image = input_image.resize((width, height))
    canny_image = get_canny_image(input_image)

    pipe.unload_lora_weights()
    
    style_config = LORA_REGISTRY[lora_selection]
    repo_id = style_config["repo"]
    trigger_text = style_config["trigger"]
    lora_file = style_config.get("file", None)
    
    final_prompt = f"{trigger_text}{prompt}"
    
    if repo_id:
        try:
            print(f"Loading LoRA: {repo_id}")
            if lora_file:
                pipe.load_lora_weights(repo_id, weight_name=lora_file)
            else:
                pipe.load_lora_weights(repo_id)
            print("LoRA loaded successfully.")
        except Exception as e:
            print(f"LoRA Load Error: {e}")
            gr.Warning(f"Failed to load LoRA. Using base model.")

    generator = torch.Generator("cuda").manual_seed(int(seed))

    print(f"Generating: {final_prompt[:100]}...")
    
    try:
        output = pipe(
            prompt=final_prompt,
            negative_prompt=negative_prompt,
            image=canny_image,
            num_inference_steps=int(steps),
            controlnet_conditioning_scale=float(controlnet_conditioning_scale),
            guidance_scale=7.0,
            generator=generator,
        )
        output_image = output.images[0]
    except Exception as e:
        pipe.unload_lora_weights()
        raise e

    pipe.unload_lora_weights()
    torch.cuda.empty_cache()

    return canny_image, output_image

css = """
#col-container {max-width: 1200px; margin-left: auto; margin-right: auto;}
.guide-text {font-size: 1.1em; color: #4a5568;}
"""

examples = [
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_canny.png",
        "a colorful exotic bird sitting on a branch, detailed feathers, masterpiece, 8k",
        "blurry, low quality, deformed, illustration",
        "None (Base SDXL)", 
        0.8, 30, 42
    ],
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_vermeer_depth.png",
        "portrait of a girl with a pearl earring, made of plastic blocks, interlocking bricks, toy aesthetic, macro photography",
        "human skin, realistic, painting, blurry, drawing",
        "Lego Style XL",
        0.8, 30, 101
    ],
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_bird_hed.png",
        "pixel art, a cute bird, isometric view, retro game asset, 8-bit graphics",
        "photorealistic, vector, high resolution, smooth, 3d render",
        "Pixel Art XL",
        0.8, 30, 202
    ],
    [
        "https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_room_mlsd.png",
        "made-of-clay, claymation style, interior of a modern living room, stop motion animation, plasticine texture",
        "cgi, 3d render, glossy, architectural visualization",
        "Claymation Style XL",
        0.8, 30, 303
    ],
]

with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# 🎨 SDXL ControlNet + LoRA Mixer")
        gr.Markdown(
            """
            <p class='guide-text'>
            <b>SDXL Edition.</b><br>
            Uses ControlNet Canny (SDXL) for structure preservation with LoRA styles.
            </p>
            """
        )

        with gr.Row():
            with gr.Column(scale=1):
                input_image = gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"])
                
                prompt = gr.Textbox(
                    label="Prompt", 
                    value="A house on a hill, sunny day, masterpiece",
                    lines=2
                )
                
                negative_prompt = gr.Textbox(
                    label="Negative Prompt",
                    value="blurry, low quality, distorted, ugly, watermark",
                    lines=1
                )
                
                lora_selection = gr.Dropdown(
                    label="LoRA Style",
                    choices=list(LORA_REGISTRY.keys()),
                    value="None (Base SDXL)"
                )

                with gr.Accordion("Advanced Settings", open=False):
                    controlnet_conditioning_scale = gr.Slider(
                        label="ControlNet Strength",
                        minimum=0.0, maximum=1.5, value=0.8, step=0.1
                    )
                    steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=30, step=1)
                    seed = gr.Number(label="Seed", value=42, precision=0)

                submit_btn = gr.Button("Generate", variant="primary", size="lg")

            with gr.Column(scale=1):
                with gr.Row():
                    output_canny = gr.Image(label="Canny Edges", type="pil")
                    output_result = gr.Image(label="Result", type="pil")

        gr.Examples(
            examples=examples,
            inputs=[input_image, prompt, negative_prompt, lora_selection, controlnet_conditioning_scale, steps, seed],
            outputs=[output_canny, output_result],
            fn=generate_controlled_image,
            cache_examples=False
        )

    submit_btn.click(
        fn=generate_controlled_image,
        inputs=[
            input_image, 
            prompt, 
            negative_prompt, 
            lora_selection, 
            controlnet_conditioning_scale, 
            steps, 
            seed
        ],
        outputs=[output_canny, output_result]
    )

if __name__ == "__main__":
    demo.launch()