File size: 12,019 Bytes
5155cb6
 
 
246e530
f724009
246e530
33a49d9
bd03ad6
 
33a49d9
5155cb6
2c6afc9
246e530
 
6fb01b8
 
246e530
2c6afc9
5155cb6
2c6afc9
 
 
 
 
5155cb6
 
 
 
 
 
 
 
 
 
 
2c6afc9
 
cc9e3b6
70fb6e0
2c6afc9
 
 
246e530
 
 
 
 
 
 
f724009
2c6afc9
5155cb6
246e530
3bbe929
246e530
b4a7576
596b818
2c6afc9
7b31206
33a49d9
945fbe7
33a49d9
945fbe7
 
1d3b30f
945fbe7
 
33a49d9
 
777a3ea
 
 
33a49d9
777a3ea
7b31206
338c56e
dbd9afb
 
 
 
7b31206
 
 
2c6afc9
bd03ad6
 
 
 
 
 
2c6afc9
5155cb6
2c6afc9
5155cb6
 
 
 
 
 
 
 
2956b7d
06c7649
 
 
 
 
 
 
bd03ad6
5155cb6
 
4f40c5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd03ad6
 
4f40c5a
bd03ad6
4f40c5a
5dd9137
4f40c5a
 
 
 
 
 
 
 
 
 
42c2828
 
4f40c5a
 
5dd9137
4f40c5a
 
 
 
 
 
2fff68e
4f40c5a
 
 
 
 
 
 
 
 
9a090ea
 
4f40c5a
 
bd03ad6
 
 
5155cb6
 
 
 
42c2828
 
 
5155cb6
 
57dd83c
5155cb6
 
 
 
 
 
 
e9db8af
2c6afc9
5155cb6
 
2c6afc9
e9db8af
 
 
 
 
 
 
 
 
 
42c2828
e9db8af
 
 
f724009
bd03ad6
a510382
172c52b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bd5307
 
 
 
 
 
 
 
 
 
 
 
172c52b
 
dbd9afb
172c52b
7bd5307
 
dbd9afb
7bd5307
172c52b
dbd9afb
 
 
 
 
 
3048b9d
 
 
 
 
 
172c52b
 
 
 
 
 
 
 
 
 
 
 
 
 
7bd5307
 
 
 
 
 
 
 
172c52b
 
 
 
 
 
 
5155cb6
 
 
 
 
 
2c6afc9
678bacd
2c6afc9
5155cb6
 
 
 
 
 
2c6afc9
5155cb6
 
 
 
 
 
 
 
 
 
42c2828
5155cb6
 
 
 
 
 
 
42c2828
5155cb6
 
 
 
 
42c2828
 
 
 
5155cb6
 
 
 
 
2224b6e
5155cb6
42c2828
5155cb6
 
08250b7
06c7649
cf9952e
c3a922e
cf9952e
 
 
5e2e69a
 
 
 
 
 
 
 
08250b7
da56f16
23d6e70
8145136
 
1a0c5d0
 
 
bd03ad6
cf9952e
49a7a5f
cf9952e
31ff0d7
15c47f4
5155cb6
2c6afc9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
import gradio as gr
import numpy as np
import random
import cv2
import PIL 
from controlnet_aux import OpenposeDetector
from transformers import pipeline
from rembg import remove
from diffusers.models import AutoencoderKL


#import spaces #[uncomment to use ZeroGPU]
from diffusers import (
    DiffusionPipeline, StableDiffusionPipeline,
    StableDiffusionControlNetPipeline, ControlNetModel,
    DPMSolverMultistepScheduler
)
from peft import PeftModel, LoraConfig
import torch
import gc

from huggingface_hub import HfApi
# Создаем экземпляр API
api = HfApi()

device = "cuda" if torch.cuda.is_available() else "cpu"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

model_names = [
    "sand74/changpu_lora",
    "stable-diffusion-v1-5/stable-diffusion-v1-5",
    "stabilityai/sd-turbo",
]


def get_canny_image(image):
    image = np.array(image)
    low_threshold = 100
    high_threshold = 200
    image = cv2.Canny(image, low_threshold, high_threshold)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    return PIL.Image.fromarray(image)


def get_openpos_image(image):
    pil_image = PIL.Image.fromarray(image)
    processor = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
    pil_image = processor(pil_image, hand_and_face=False)
    return pil_image


def get_depth_image(image):
    pil_image = PIL.Image.fromarray(image)
    depth_estimator = pipeline('depth-estimation')
    pil_image = depth_estimator(pil_image)['depth']
    pil_image = np.array(pil_image)
    pil_image = pil_image[:, :, None]
    pil_image = np.concatenate([pil_image, pil_image, pil_image], axis=2)
    return PIL.Image.fromarray(pil_image)


control_net_modes = {
    "lllyasviel/sd-controlnet-canny": get_canny_image,
    "lllyasviel/control_v11p_sd15_openpose": get_openpos_image,
    "lllyasviel/control_v11f1p_sd15_depth": get_depth_image,
}


def preview_control_net_image(controlnet_image, controlnet_mode):
    return control_net_modes[controlnet_mode](controlnet_image)

    
def is_lora(model_name):
    return model_name == "sand74/changpu_lora"


def remove_background(image):
    image = remove(image)
    return image



#@spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    model_id,
    prompt,
    negative_prompt,
    seed,
    randomize_seed,
    width,
    height,
    guidance_scale,
    num_inference_steps,
    lora_scale,
    use_controlnet=False,
    controlnet_image=None,
    controlnet_strength=None,
    controlnet_mode=None,
    use_ip_adapter=False,
    ip_adapter_image=None,
    ip_adapter_scale=None,
    rm_background=True,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator().manual_seed(seed)

    pipe_params = dict(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
    )
    if is_lora(model_id):
        base_model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
    else:
        base_model_id = model_id

    vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch_dtype)
    
    if not use_controlnet:
        pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch_dtype, vae=vae)
    else:
        controlnet_image = cv2.resize(controlnet_image, (width, height), interpolation=cv2.INTER_AREA)
        controlnet = ControlNetModel.from_pretrained(
            controlnet_mode, torch_dtype=torch_dtype)
        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            base_model_id,
            torch_dtype=torch_dtype,
            controlnet=controlnet)
        pipe_params["image"] = control_net_modes[controlnet_mode](controlnet_image)
        pipe_params["controlnet_conditioning_scale"] = controlnet_strength

    if is_lora(model_id):
        lora = PeftModel.from_pretrained(pipe.unet, model_id, adapter_name="panda_hqwh")
        pipe.set_adapters(["panda_hqwh"], adapter_weights=[lora_scale])

    if use_ip_adapter:
        ip_adapter_image = cv2.resize(ip_adapter_image, (width, height), interpolation=cv2.INTER_AREA)
        pipe.load_ip_adapter(
            "h94/IP-Adapter",
            subfolder="models",
            weight_name="ip-adapter-plus_sd15.bin",
        )
        pipe_params["ip_adapter_image"] = ip_adapter_image
        pipe.set_ip_adapter_scale(ip_adapter_scale)

    pipe.safety_checker = None

    if torch_dtype in (torch.float16, torch.bfloat16):
        pipe.unet.half()
        pipe.text_encoder.half()

    pipe.to(device)

    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    
    image = pipe(**pipe_params).images[0]

    if rm_background:
        image = remove(image)
        
    return image, seed


examples = [
    "Sad panda_hqwh drinking beer",
    "panda_hqwh walk in a field",
    "panda_hqwh play with ball",
]


css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

result = None

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        title = gr.Markdown(" # Text-to-Image Gradio Template")

        model_id = gr.Dropdown(model_names, value=model_names[0], label="Select model")

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
                value="Sad panda_hqwh drinking beer",
            )
            run_button = gr.Button("Run", scale=0, variant="primary")

        result = gr.Image(label="Result", show_label=False)
        rm_background = gr.Checkbox(label="Remove background?", scale=1, value=True)

        with gr.Group(visible=True) as lora_section:
            title = gr.Markdown(" ### LoRA section")
            with gr.Row():
                lora_scale = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.9,
                    step=0.1,
                    label="LoRA Strength"
                )

        # Показывать/скрывать секцию LoRA в зависимости от модели
        model_id.change(
            fn=lambda x: gr.update(visible=is_lora(x)),
            inputs=model_id,
            outputs=lora_section
        )

        with gr.Group():
            title = gr.Markdown(" ### ControlNet section")
            with gr.Column():
                use_controlnet = gr.Checkbox(label="Use ControlNet", value=False)

                # Секция ControlNet (изначально скрыта)
                with gr.Column(visible=False) as controlnet_section:
                    controlnet_strength = gr.Slider(
                        minimum=0.1, maximum=1.0, value=0.8, step=0.1,
                        label="ControlNet Strength",
                        interactive=True
                    )
                    controlnet_mode = gr.Dropdown(
                        list(control_net_modes.keys()),
                        value=next(iter(control_net_modes.keys())),
                        label="ControlNet mode",
                        interactive=True
                    )
                    with gr.Row():                    
                        controlnet_image = gr.Image(
                            label="ControlNet image",
                            interactive=True
                        )
                        controlnet_view = gr.Image(
                            label="ControlNet preview",
                            interactive=False
                        )

        controlnet_image.change(
            fn=preview_control_net_image,
            inputs=[controlnet_image, controlnet_mode],
            outputs=controlnet_view
        )

        controlnet_mode.change(
            fn=preview_control_net_image,
            inputs=[controlnet_image, controlnet_mode],
            outputs=controlnet_view
        )

        # Показывать/скрывать секцию ControlNet в зависимости от чекбокса
        use_controlnet.change(
            fn=lambda x: gr.update(visible=x),
            inputs=use_controlnet,
            outputs=controlnet_section
        )

        with gr.Group():
            title = gr.Markdown(" ### IP-adapter section")
            with gr.Column():
                use_ip_adapter = gr.Checkbox(label="Use IP-adapter", value=False)

                # Секция IP-adapter (изначально скрыта)
                with gr.Column(visible=False) as ip_adapter_section:
                    ip_adapter_scale = gr.Slider(
                        minimum=0.1, maximum=1.0, value=0.5, step=0.1,
                        label="IP-adapter Scale",
                        interactive=True
                    )
                    ip_adapter_image = gr.Image(
                        label="IP-adapter image",
                    )

        # Показывать/скрывать секцию IP-adapter в зависимости от чекбокса
        use_ip_adapter.change(
            fn=lambda x: gr.update(visible=x),
            inputs=use_ip_adapter,
            outputs=ip_adapter_section
        )

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
                visible=True,
                value="low quality, blurry, unfinished, text",
           )

            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )

            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,  # Replace with defaults that work for your model
                )

                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,  # Replace with defaults that work for your model
                )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0,
                    maximum=20,
                    step=1,
                    value=7,  # Replace with defaults that work for your model
                )

                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=500,
                    step=1,
                    value=30,  # Replace with defaults that work for your model
                )

        gr.Examples(examples=examples, inputs=[prompt])

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            model_id,
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            width,
            height,
            guidance_scale,
            num_inference_steps,
            lora_scale,
            use_controlnet,
            controlnet_image,
            controlnet_strength,
            controlnet_mode,
            use_ip_adapter,
            ip_adapter_image,
            ip_adapter_scale,
            rm_background,
        ],
        outputs=[result, seed],
    )


if __name__ == "__main__":
    demo.launch()