Gerchegg commited on
Commit
e2d1a1c
·
verified ·
1 Parent(s): b445dea

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +693 -659
app.py CHANGED
@@ -1,659 +1,693 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
- import json
5
- import torch
6
- import cv2
7
- from PIL import Image
8
-
9
- # Опциональный импорт spaces - нужен только для HF Spaces
10
- try:
11
- import spaces
12
- HF_SPACES = True
13
- except ImportError:
14
- HF_SPACES = False
15
- class spaces:
16
- @staticmethod
17
- def GPU(duration=None):
18
- def decorator(func):
19
- return func
20
- return decorator
21
-
22
- import os
23
- import time
24
- import logging
25
-
26
- from diffusers import (
27
- DiffusionPipeline,
28
- QwenImageControlNetPipeline,
29
- QwenImageControlNetModel,
30
- AutoPipelineForImage2Image
31
- )
32
- from huggingface_hub import hf_hub_download
33
-
34
- # Настройка логирования
35
- logging.basicConfig(
36
- level=logging.INFO,
37
- format='%(asctime)s | %(levelname)s | %(message)s',
38
- datefmt='%Y-%m-%d %H:%M:%S'
39
- )
40
- logger = logging.getLogger(__name__)
41
-
42
- logger.info("=" * 60)
43
- logger.info("LOADING QWEN-SOLOBAND ADVANCED")
44
- logger.info("=" * 60)
45
-
46
- hf_token = os.environ.get("HF_TOKEN")
47
- device = "cuda" if torch.cuda.is_available() else "cpu"
48
- dtype = torch.bfloat16
49
-
50
- # Логируем GPU
51
- logger.info(f"CUDA available: {torch.cuda.is_available()}")
52
- if torch.cuda.is_available():
53
- gpu_count = torch.cuda.device_count()
54
- logger.info(f"Number of GPUs: {gpu_count}")
55
- for i in range(gpu_count):
56
- logger.info(f" GPU {i}: {torch.cuda.get_device_name(i)}")
57
- logger.info(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f} GB")
58
-
59
- # =================================================================
60
- # ЗАГРУЗКА МОДЕЛЕЙ
61
- # =================================================================
62
-
63
- # 1. Базовая модель для Text-to-Image
64
- logger.info("\n[1/3] Loading base Text2Image model...")
65
- model_id = "Gerchegg/Qwen-Soloband-Diffusers"
66
-
67
- try:
68
- start_time = time.time()
69
-
70
- # Определяем device_map
71
- if gpu_count > 1:
72
- device_map = "balanced"
73
- logger.info(f" Device map: balanced ({gpu_count} GPUs)")
74
- else:
75
- device_map = None
76
- logger.info(" Device map: single GPU")
77
-
78
- # Загружаем базовую модель
79
- pipe_txt2img = DiffusionPipeline.from_pretrained(
80
- model_id,
81
- torch_dtype=dtype,
82
- device_map=device_map,
83
- token=hf_token
84
- )
85
-
86
- if device_map is None:
87
- pipe_txt2img.to(device)
88
-
89
- load_time = time.time() - start_time
90
- logger.info(f" ✓ Text2Image loaded in {load_time:.1f}s")
91
-
92
- except Exception as e:
93
- logger.error(f" ❌ Error loading Text2Image: {e}")
94
- raise
95
-
96
- # 2. Image-to-Image модель (используем ту же базу)
97
- logger.info("\n[2/3] Creating Image2Image pipeline...")
98
- try:
99
- pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img)
100
- logger.info(" ✓ Image2Image pipeline created")
101
- except Exception as e:
102
- logger.error(f" ❌ Error creating Image2Image: {e}")
103
- pipe_img2img = None
104
-
105
- # 3. ControlNet модель
106
- logger.info("\n[3/3] Loading ControlNet model...")
107
- try:
108
- controlnet_model_id = "InstantX/Qwen-Image-ControlNet-Union"
109
-
110
- controlnet = QwenImageControlNetModel.from_pretrained(
111
- controlnet_model_id,
112
- torch_dtype=dtype,
113
- token=hf_token
114
- )
115
-
116
- # Создаем ControlNet pipeline на базе базовой модели
117
- pipe_controlnet = QwenImageControlNetPipeline.from_pretrained(
118
- model_id,
119
- controlnet=controlnet,
120
- torch_dtype=dtype,
121
- token=hf_token
122
- )
123
-
124
- if device_map is None:
125
- pipe_controlnet.to(device)
126
-
127
- logger.info(" ✓ ControlNet loaded")
128
-
129
- except Exception as e:
130
- logger.error(f" ❌ Error loading ControlNet: {e}")
131
- logger.warning(" ControlNet will be disabled")
132
- pipe_controlnet = None
133
-
134
- # Оптимизации памяти
135
- logger.info("\nApplying memory optimizations...")
136
- for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet]:
137
- if pipe and hasattr(pipe, 'vae'):
138
- if hasattr(pipe.vae, 'enable_tiling'):
139
- pipe.vae.enable_tiling()
140
- if hasattr(pipe.vae, 'enable_slicing'):
141
- pipe.vae.enable_slicing()
142
-
143
- logger.info(" ✓ VAE tiling and slicing enabled")
144
-
145
- logger.info("\n" + "=" * 60)
146
- logger.info("✓ ALL MODELS LOADED")
147
- logger.info("=" * 60)
148
-
149
- # =================================================================
150
- # PREPROCESSOR FUNCTIONS
151
- # =================================================================
152
-
153
- def resize_image(input_image, max_size=1024):
154
- """Изменяет размер изображения с сохранением пропорций (кратно 8)"""
155
- w, h = input_image.size
156
- aspect_ratio = w / h
157
-
158
- if w > h:
159
- new_w = max_size
160
- new_h = int(new_w / aspect_ratio)
161
- else:
162
- new_h = max_size
163
- new_w = int(new_h * aspect_ratio)
164
-
165
- # Кратно 8
166
- new_w = new_w - (new_w % 8)
167
- new_h = new_h - (new_h % 8)
168
-
169
- if new_w == 0: new_w = 8
170
- if new_h == 0: new_h = 8
171
-
172
- return input_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
173
-
174
- def extract_canny(input_image, low_threshold=100, high_threshold=200):
175
- """Canny edge detection"""
176
- image = np.array(input_image)
177
- edges = cv2.Canny(image, low_threshold, high_threshold)
178
- edges = edges[:, :, None]
179
- edges = np.concatenate([edges, edges, edges], axis=2)
180
- return Image.fromarray(edges)
181
-
182
- def extract_depth(input_image):
183
- """Depth map extraction (простая версия через grayscale)"""
184
- # Для полноценного depth нужна модель Depth-Anything
185
- # Упрощенная версия для демонстрации
186
- gray = input_image.convert('L')
187
- return gray.convert('RGB')
188
-
189
- def extract_pose(input_image):
190
- """Pose detection (заглушка - нужна модель OpenPose)"""
191
- # Для полноценного pose нужна модель OpenPose
192
- # Возвращаем Canny как fallback
193
- return extract_canny(input_image)
194
-
195
- def get_control_image(input_image, control_type):
196
- """Применяет препроцессор к изображению"""
197
- if control_type == "Canny":
198
- return extract_canny(input_image)
199
- elif control_type == "Depth":
200
- return extract_depth(input_image)
201
- elif control_type == "Pose":
202
- return extract_pose(input_image)
203
- else:
204
- return extract_canny(input_image) # Fallback
205
-
206
- # =================================================================
207
- # LORA FUNCTIONS
208
- # =================================================================
209
-
210
- # Список доступных LoRA
211
- AVAILABLE_LORAS = {
212
- "Realism": {
213
- "repo": "flymy-ai/qwen-image-realism-lora",
214
- "trigger": "Super Realism portrait of",
215
- "weights": "pytorch_lora_weights.safetensors"
216
- },
217
- "Anime": {
218
- "repo": "alfredplpl/qwen-image-modern-anime-lora",
219
- "trigger": "Japanese modern anime style, ",
220
- "weights": "pytorch_lora_weights.safetensors"
221
- },
222
- "Analog Film": {
223
- "repo": "janekm/analog_film",
224
- "trigger": "fifthel",
225
- "weights": "converted_complete.safetensors"
226
- }
227
- }
228
-
229
- # =================================================================
230
- # GENERATION FUNCTIONS
231
- # =================================================================
232
-
233
- MAX_SEED = np.iinfo(np.int32).max
234
-
235
- @spaces.GPU(duration=180)
236
- def generate_text2img(
237
- prompt,
238
- negative_prompt=" ",
239
- width=1664,
240
- height=928,
241
- seed=42,
242
- randomize_seed=False,
243
- guidance_scale=2.5,
244
- num_inference_steps=40,
245
- lora_name="None",
246
- lora_scale=1.0,
247
- progress=gr.Progress(track_tqdm=True)
248
- ):
249
- """Text-to-Image генерация"""
250
-
251
- logger.info("\n" + "=" * 60)
252
- logger.info("TEXT-TO-IMAGE GENERATION")
253
- logger.info("=" * 60)
254
-
255
- if randomize_seed:
256
- seed = random.randint(0, MAX_SEED)
257
-
258
- logger.info(f" Prompt: {prompt[:100]}...")
259
- logger.info(f" Size: {width}x{height}")
260
- logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
261
- logger.info(f" Seed: {seed}")
262
- logger.info(f" LoRA: {lora_name} (scale: {lora_scale})")
263
-
264
- try:
265
- # Загружаем LoRA если выбрана
266
- if lora_name != "None" and lora_name in AVAILABLE_LORAS:
267
- lora_info = AVAILABLE_LORAS[lora_name]
268
- logger.info(f" Loading LoRA: {lora_info['repo']}")
269
-
270
- pipe_txt2img.load_lora_weights(
271
- lora_info['repo'],
272
- weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
273
- token=hf_token
274
- )
275
-
276
- # Добавляем trigger word
277
- if lora_info['trigger']:
278
- prompt = lora_info['trigger'] + prompt
279
- logger.info(f" Added trigger: {lora_info['trigger']}")
280
-
281
- generator = torch.Generator(device=device).manual_seed(seed)
282
-
283
- image = pipe_txt2img(
284
- prompt=prompt,
285
- negative_prompt=negative_prompt,
286
- width=width,
287
- height=height,
288
- num_inference_steps=num_inference_steps,
289
- true_cfg_scale=guidance_scale,
290
- generator=generator
291
- ).images[0]
292
-
293
- # Выгружаем LoRA после генерации
294
- if lora_name != "None":
295
- pipe_txt2img.unload_lora_weights()
296
-
297
- logger.info(" ✓ Generation completed")
298
-
299
- return image, seed
300
-
301
- except Exception as e:
302
- logger.error(f" Error: {e}")
303
- raise
304
-
305
- @spaces.GPU(duration=180)
306
- def generate_img2img(
307
- input_image,
308
- prompt,
309
- negative_prompt=" ",
310
- strength=0.75,
311
- seed=42,
312
- randomize_seed=False,
313
- guidance_scale=2.5,
314
- num_inference_steps=40,
315
- lora_name="None",
316
- lora_scale=1.0,
317
- progress=gr.Progress(track_tqdm=True)
318
- ):
319
- """Image-to-Image генерация"""
320
-
321
- logger.info("\n" + "=" * 60)
322
- logger.info("IMAGE-TO-IMAGE GENERATION")
323
- logger.info("=" * 60)
324
-
325
- if input_image is None:
326
- raise gr.Error("Please upload an input image")
327
-
328
- if randomize_seed:
329
- seed = random.randint(0, MAX_SEED)
330
-
331
- # Изменяем размер изображения
332
- resized = resize_image(input_image, max_size=1024)
333
-
334
- logger.info(f" Prompt: {prompt[:100]}...")
335
- logger.info(f" Input size: {input_image.size} → {resized.size}")
336
- logger.info(f" Strength: {strength}")
337
- logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
338
- logger.info(f" LoRA: {lora_name}")
339
-
340
- try:
341
- if pipe_img2img is None:
342
- raise gr.Error("Image2Image pipeline not available")
343
-
344
- # Загружаем LoRA если выбрана
345
- if lora_name != "None" and lora_name in AVAILABLE_LORAS:
346
- lora_info = AVAILABLE_LORAS[lora_name]
347
- pipe_img2img.load_lora_weights(
348
- lora_info['repo'],
349
- weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
350
- token=hf_token
351
- )
352
- if lora_info['trigger']:
353
- prompt = lora_info['trigger'] + prompt
354
-
355
- generator = torch.Generator(device=device).manual_seed(seed)
356
-
357
- image = pipe_img2img(
358
- prompt=prompt,
359
- negative_prompt=negative_prompt,
360
- image=resized,
361
- strength=strength,
362
- num_inference_steps=num_inference_steps,
363
- true_cfg_scale=guidance_scale,
364
- generator=generator
365
- ).images[0]
366
-
367
- # Выгружаем LoRA
368
- if lora_name != "None":
369
- pipe_img2img.unload_lora_weights()
370
-
371
- logger.info(" Generation completed")
372
-
373
- return image, seed
374
-
375
- except Exception as e:
376
- logger.error(f" Error: {e}")
377
- raise
378
-
379
- @spaces.GPU(duration=180)
380
- def generate_controlnet(
381
- input_image,
382
- prompt,
383
- control_type="Canny",
384
- negative_prompt=" ",
385
- controlnet_scale=1.0,
386
- seed=42,
387
- randomize_seed=False,
388
- guidance_scale=5.0,
389
- num_inference_steps=30,
390
- lora_name="None",
391
- lora_scale=1.0,
392
- progress=gr.Progress(track_tqdm=True)
393
- ):
394
- """ControlNet генерация"""
395
-
396
- logger.info("\n" + "=" * 60)
397
- logger.info("CONTROLNET GENERATION")
398
- logger.info("=" * 60)
399
-
400
- if input_image is None:
401
- raise gr.Error("Please upload an input image")
402
-
403
- if pipe_controlnet is None:
404
- raise gr.Error("ControlNet pipeline not available")
405
-
406
- if randomize_seed:
407
- seed = random.randint(0, MAX_SEED)
408
-
409
- # Изменяем размер и применяем препроцессор
410
- resized = resize_image(input_image, max_size=1024)
411
- control_image = get_control_image(resized, control_type)
412
-
413
- logger.info(f" Prompt: {prompt[:100]}...")
414
- logger.info(f" Control type: {control_type}")
415
- logger.info(f" Control scale: {controlnet_scale}")
416
- logger.info(f" Image size: {resized.size}")
417
- logger.info(f" LoRA: {lora_name}")
418
-
419
- try:
420
- # Загружаем LoRA если выбрана
421
- if lora_name != "None" and lora_name in AVAILABLE_LORAS:
422
- lora_info = AVAILABLE_LORAS[lora_name]
423
- pipe_controlnet.load_lora_weights(
424
- lora_info['repo'],
425
- weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
426
- token=hf_token
427
- )
428
- if lora_info['trigger']:
429
- prompt = lora_info['trigger'] + prompt
430
-
431
- generator = torch.Generator(device=device).manual_seed(seed)
432
-
433
- image = pipe_controlnet(
434
- prompt=prompt,
435
- negative_prompt=negative_prompt,
436
- control_image=control_image,
437
- controlnet_conditioning_scale=controlnet_scale,
438
- width=resized.width,
439
- height=resized.height,
440
- num_inference_steps=num_inference_steps,
441
- guidance_scale=guidance_scale,
442
- generator=generator
443
- ).images[0]
444
-
445
- # Выгружаем LoRA
446
- if lora_name != "None":
447
- pipe_controlnet.unload_lora_weights()
448
-
449
- logger.info(" Generation completed")
450
-
451
- return image, control_image, seed
452
-
453
- except Exception as e:
454
- logger.error(f" ❌ Error: {e}")
455
- raise
456
-
457
- # =================================================================
458
- # GRADIO INTERFACE
459
- # =================================================================
460
-
461
- MAX_SEED = np.iinfo(np.int32).max
462
-
463
- css = """
464
- #col-container {
465
- margin: 0 auto;
466
- max-width: 1400px;
467
- }
468
- """
469
-
470
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
471
- gr.Markdown("""
472
- # 🎨 Qwen Soloband - Image2Image + ControlNet + LoRA
473
-
474
- **Продвинутая модель генерации** с поддержкой Image-to-Image, ControlNet и LoRA.
475
-
476
- ### ✨ Возможности:
477
- - 🖼️ **Text-to-Image** - Генерация из текста
478
- - 🔄 **Image-to-Image** - Модификация изображений (denoising strength)
479
- - 🎮 **ControlNet** - Управление структурой (Canny, Depth, Pose)
480
- - 🎭 **LoRA** - Стилизация (Realism, Anime, Film)
481
- - 🔌 **Full API** - Все функции доступны через API
482
-
483
- **Модель**: [Gerchegg/Qwen-Soloband-Diffusers](https://huggingface.co/Gerchegg/Qwen-Soloband-Diffusers)
484
- """)
485
-
486
- with gr.Tabs() as tabs:
487
-
488
- # TAB 1: Text-to-Image
489
- with gr.Tab("📝 Text-to-Image"):
490
- with gr.Row():
491
- with gr.Column(scale=1):
492
- t2i_prompt = gr.Text(
493
- label="Prompt",
494
- placeholder="SB_AI, a beautiful landscape...",
495
- lines=3
496
- )
497
-
498
- t2i_run = gr.Button("Generate", variant="primary")
499
-
500
- with gr.Accordion("Advanced Settings", open=False):
501
- t2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
502
-
503
- with gr.Row():
504
- t2i_width = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1664)
505
- t2i_height = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=928)
506
-
507
- with gr.Row():
508
- t2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
509
- t2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
510
-
511
- with gr.Row():
512
- t2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
513
- t2i_random_seed = gr.Checkbox(label="Random", value=True)
514
-
515
- t2i_lora = gr.Radio(
516
- label="LoRA Style",
517
- choices=["None"] + list(AVAILABLE_LORAS.keys()),
518
- value="None"
519
- )
520
- t2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
521
-
522
- with gr.Column(scale=1):
523
- t2i_output = gr.Image(label="Generated Image")
524
- t2i_seed_output = gr.Number(label="Used Seed")
525
-
526
- # TAB 2: Image-to-Image
527
- with gr.Tab("🔄 Image-to-Image"):
528
- with gr.Row():
529
- with gr.Column(scale=1):
530
- i2i_input = gr.Image(type="pil", label="Input Image")
531
- i2i_prompt = gr.Text(
532
- label="Prompt",
533
- placeholder="Transform this image into...",
534
- lines=3
535
- )
536
-
537
- i2i_strength = gr.Slider(
538
- label="Denoising Strength",
539
- info="0.0 = original image, 1.0 = complete redraw",
540
- minimum=0.0,
541
- maximum=1.0,
542
- step=0.05,
543
- value=0.75
544
- )
545
-
546
- i2i_run = gr.Button("Generate", variant="primary")
547
-
548
- with gr.Accordion("Advanced Settings", open=False):
549
- i2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
550
-
551
- with gr.Row():
552
- i2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
553
- i2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
554
-
555
- with gr.Row():
556
- i2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
557
- i2i_random_seed = gr.Checkbox(label="Random", value=True)
558
-
559
- i2i_lora = gr.Radio(
560
- label="LoRA Style",
561
- choices=["None"] + list(AVAILABLE_LORAS.keys()),
562
- value="None"
563
- )
564
- i2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
565
-
566
- with gr.Column(scale=1):
567
- i2i_output = gr.Image(label="Generated Image")
568
- i2i_seed_output = gr.Number(label="Used Seed")
569
-
570
- # TAB 3: ControlNet
571
- with gr.Tab("🎮 ControlNet"):
572
- with gr.Row():
573
- with gr.Column(scale=1):
574
- cn_input = gr.Image(type="pil", label="Input Image")
575
- cn_prompt = gr.Text(
576
- label="Prompt",
577
- placeholder="A detailed description...",
578
- lines=3
579
- )
580
-
581
- cn_control_type = gr.Radio(
582
- label="Control Type (Preprocessor)",
583
- choices=["Canny", "Depth", "Pose"],
584
- value="Canny"
585
- )
586
-
587
- cn_control_scale = gr.Slider(
588
- label="Control Strength",
589
- minimum=0.0,
590
- maximum=2.0,
591
- step=0.05,
592
- value=1.0
593
- )
594
-
595
- cn_run = gr.Button("Generate", variant="primary")
596
-
597
- with gr.Accordion("Advanced Settings", open=False):
598
- cn_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
599
-
600
- with gr.Row():
601
- cn_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=30)
602
- cn_cfg = gr.Slider(label="CFG", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
603
-
604
- with gr.Row():
605
- cn_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
606
- cn_random_seed = gr.Checkbox(label="Random", value=True)
607
-
608
- cn_lora = gr.Radio(
609
- label="LoRA Style",
610
- choices=["None"] + list(AVAILABLE_LORAS.keys()),
611
- value="None"
612
- )
613
- cn_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
614
-
615
- with gr.Column(scale=1):
616
- cn_control_preview = gr.Image(label="Control Image (Preprocessed)")
617
- cn_output = gr.Image(label="Generated Image")
618
- cn_seed_output = gr.Number(label="Used Seed")
619
-
620
- # Event handlers
621
- t2i_run.click(
622
- fn=generate_text2img,
623
- inputs=[
624
- t2i_prompt, t2i_negative, t2i_width, t2i_height,
625
- t2i_seed, t2i_random_seed, t2i_cfg, t2i_steps,
626
- t2i_lora, t2i_lora_scale
627
- ],
628
- outputs=[t2i_output, t2i_seed_output],
629
- api_name="text2img"
630
- )
631
-
632
- i2i_run.click(
633
- fn=generate_img2img,
634
- inputs=[
635
- i2i_input, i2i_prompt, i2i_negative, i2i_strength,
636
- i2i_seed, i2i_random_seed, i2i_cfg, i2i_steps,
637
- i2i_lora, i2i_lora_scale
638
- ],
639
- outputs=[i2i_output, i2i_seed_output],
640
- api_name="img2img"
641
- )
642
-
643
- cn_run.click(
644
- fn=generate_controlnet,
645
- inputs=[
646
- cn_input, cn_prompt, cn_control_type, cn_negative, cn_control_scale,
647
- cn_seed, cn_random_seed, cn_cfg, cn_steps,
648
- cn_lora, cn_lora_scale
649
- ],
650
- outputs=[cn_output, cn_control_preview, cn_seed_output],
651
- api_name="controlnet"
652
- )
653
-
654
- if __name__ == "__main__":
655
- demo.launch(
656
- show_api=True,
657
- share=False
658
- )
659
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import json
5
+ import torch
6
+ import cv2
7
+ from PIL import Image
8
+
9
+ # Опциональный импорт spaces - нужен только для HF Spaces
10
+ try:
11
+ import spaces
12
+ HF_SPACES = True
13
+ except ImportError:
14
+ HF_SPACES = False
15
+ class spaces:
16
+ @staticmethod
17
+ def GPU(duration=None):
18
+ def decorator(func):
19
+ return func
20
+ return decorator
21
+
22
+ import os
23
+ import time
24
+ import logging
25
+
26
+ from diffusers import (
27
+ DiffusionPipeline,
28
+ QwenImageControlNetPipeline,
29
+ QwenImageControlNetModel,
30
+ AutoPipelineForImage2Image
31
+ )
32
+ from huggingface_hub import hf_hub_download
33
+
34
+ # Настройка логирования
35
+ logging.basicConfig(
36
+ level=logging.INFO,
37
+ format='%(asctime)s | %(levelname)s | %(message)s',
38
+ datefmt='%Y-%m-%d %H:%M:%S'
39
+ )
40
+ logger = logging.getLogger(__name__)
41
+
42
+ logger.info("=" * 60)
43
+ logger.info("LOADING QWEN-SOLOBAND ADVANCED")
44
+ logger.info("=" * 60)
45
+
46
+ hf_token = os.environ.get("HF_TOKEN")
47
+ device = "cuda" if torch.cuda.is_available() else "cpu"
48
+ dtype = torch.bfloat16
49
+
50
+ # Логируем GPU
51
+ logger.info(f"CUDA available: {torch.cuda.is_available()}")
52
+ if torch.cuda.is_available():
53
+ gpu_count = torch.cuda.device_count()
54
+ logger.info(f"Number of GPUs: {gpu_count}")
55
+ for i in range(gpu_count):
56
+ logger.info(f" GPU {i}: {torch.cuda.get_device_name(i)}")
57
+ logger.info(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f} GB")
58
+
59
+ # =================================================================
60
+ # ЗАГРУЗКА МОДЕЛЕЙ
61
+ # =================================================================
62
+
63
+ # 1. Базовая модель для Text-to-Image
64
+ logger.info("\n[1/3] Loading base Text2Image model...")
65
+ model_id = "Gerchegg/Qwen-Soloband-Diffusers"
66
+
67
+ try:
68
+ start_time = time.time()
69
+
70
+ # Определяем device_map
71
+ if gpu_count > 1:
72
+ device_map = "balanced"
73
+ logger.info(f" Device map: balanced ({gpu_count} GPUs)")
74
+ else:
75
+ device_map = None
76
+ logger.info(" Device map: single GPU")
77
+
78
+ # Загружаем базовую модель
79
+ pipe_txt2img = DiffusionPipeline.from_pretrained(
80
+ model_id,
81
+ torch_dtype=dtype,
82
+ device_map=device_map,
83
+ token=hf_token
84
+ )
85
+
86
+ if device_map is None:
87
+ pipe_txt2img.to(device)
88
+
89
+ load_time = time.time() - start_time
90
+ logger.info(f" ✓ Text2Image loaded in {load_time:.1f}s")
91
+
92
+ except Exception as e:
93
+ logger.error(f" ❌ Error loading Text2Image: {e}")
94
+ raise
95
+
96
+ # 2. Image-to-Image модель (используем ту же базу)
97
+ logger.info("\n[2/3] Creating Image2Image pipeline...")
98
+ try:
99
+ pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img)
100
+ logger.info(" ✓ Image2Image pipeline created")
101
+ except Exception as e:
102
+ logger.error(f" ❌ Error creating Image2Image: {e}")
103
+ pipe_img2img = None
104
+
105
+ # 3. ControlNet модель
106
+ logger.info("\n[3/3] Loading ControlNet model...")
107
+ try:
108
+ controlnet_model_id = "InstantX/Qwen-Image-ControlNet-Union"
109
+
110
+ # Проверяем наличие модели в кэше и скачиваем если нет
111
+ import os
112
+ from pathlib import Path
113
+
114
+ # Используем /workspace/.cache на RunPod или ~/.cache локально
115
+ if os.path.exists("/workspace"):
116
+ cache_base = Path("/workspace/.cache")
117
+ else:
118
+ cache_base = Path.home() / ".cache"
119
+
120
+ cache_dir = cache_base / "huggingface" / "hub" / "models--InstantX--Qwen-Image-ControlNet-Union"
121
+
122
+ if not cache_dir.exists():
123
+ logger.info(" 📥 ControlNet не найден в кэше, скачиваю...")
124
+ logger.info(f" Это займет 1-2 минуты...")
125
+
126
+ try:
127
+ from huggingface_hub import snapshot_download
128
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
129
+
130
+ snapshot_download(
131
+ repo_id=controlnet_model_id,
132
+ local_dir=cache_dir,
133
+ token=hf_token,
134
+ ignore_patterns=["*.md"]
135
+ )
136
+ logger.info(" ✓ ControlNet успешно загружен в кэш")
137
+ except Exception as download_error:
138
+ logger.warning(f" ⚠️ Не удалось загрузить ControlNet: {download_error}")
139
+ logger.warning(" Продолжаем без ControlNet...")
140
+ raise download_error
141
+ else:
142
+ logger.info(" ✓ ControlNet найден в кэше")
143
+
144
+ controlnet = QwenImageControlNetModel.from_pretrained(
145
+ controlnet_model_id,
146
+ torch_dtype=dtype,
147
+ token=hf_token
148
+ )
149
+
150
+ # Создаем ControlNet pipeline на базе базовой модели
151
+ pipe_controlnet = QwenImageControlNetPipeline.from_pretrained(
152
+ model_id,
153
+ controlnet=controlnet,
154
+ torch_dtype=dtype,
155
+ token=hf_token
156
+ )
157
+
158
+ if device_map is None:
159
+ pipe_controlnet.to(device)
160
+
161
+ logger.info(" ✓ ControlNet loaded")
162
+
163
+ except Exception as e:
164
+ logger.error(f" ❌ Error loading ControlNet: {e}")
165
+ logger.warning(" ControlNet will be disabled")
166
+ pipe_controlnet = None
167
+
168
+ # Оптимизации памяти
169
+ logger.info("\nApplying memory optimizations...")
170
+ for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet]:
171
+ if pipe and hasattr(pipe, 'vae'):
172
+ if hasattr(pipe.vae, 'enable_tiling'):
173
+ pipe.vae.enable_tiling()
174
+ if hasattr(pipe.vae, 'enable_slicing'):
175
+ pipe.vae.enable_slicing()
176
+
177
+ logger.info(" ✓ VAE tiling and slicing enabled")
178
+
179
+ logger.info("\n" + "=" * 60)
180
+ logger.info("✓ ALL MODELS LOADED")
181
+ logger.info("=" * 60)
182
+
183
+ # =================================================================
184
+ # PREPROCESSOR FUNCTIONS
185
+ # =================================================================
186
+
187
+ def resize_image(input_image, max_size=1024):
188
+ """Изменяет размер изображения с сохранением пропорций (кратно 8)"""
189
+ w, h = input_image.size
190
+ aspect_ratio = w / h
191
+
192
+ if w > h:
193
+ new_w = max_size
194
+ new_h = int(new_w / aspect_ratio)
195
+ else:
196
+ new_h = max_size
197
+ new_w = int(new_h * aspect_ratio)
198
+
199
+ # Кратно 8
200
+ new_w = new_w - (new_w % 8)
201
+ new_h = new_h - (new_h % 8)
202
+
203
+ if new_w == 0: new_w = 8
204
+ if new_h == 0: new_h = 8
205
+
206
+ return input_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
207
+
208
+ def extract_canny(input_image, low_threshold=100, high_threshold=200):
209
+ """Canny edge detection"""
210
+ image = np.array(input_image)
211
+ edges = cv2.Canny(image, low_threshold, high_threshold)
212
+ edges = edges[:, :, None]
213
+ edges = np.concatenate([edges, edges, edges], axis=2)
214
+ return Image.fromarray(edges)
215
+
216
+ def extract_depth(input_image):
217
+ """Depth map extraction (простая версия через grayscale)"""
218
+ # Для полноценного depth нужна модель Depth-Anything
219
+ # Упрощенная версия для демонстрации
220
+ gray = input_image.convert('L')
221
+ return gray.convert('RGB')
222
+
223
+ def extract_pose(input_image):
224
+ """Pose detection (заглушка - нужна модель OpenPose)"""
225
+ # Для полноценного pose нужна модель OpenPose
226
+ # Возвращаем Canny как fallback
227
+ return extract_canny(input_image)
228
+
229
+ def get_control_image(input_image, control_type):
230
+ """Применяет препроцессор к изображению"""
231
+ if control_type == "Canny":
232
+ return extract_canny(input_image)
233
+ elif control_type == "Depth":
234
+ return extract_depth(input_image)
235
+ elif control_type == "Pose":
236
+ return extract_pose(input_image)
237
+ else:
238
+ return extract_canny(input_image) # Fallback
239
+
240
+ # =================================================================
241
+ # LORA FUNCTIONS
242
+ # =================================================================
243
+
244
+ # Список доступных LoRA
245
+ AVAILABLE_LORAS = {
246
+ "Realism": {
247
+ "repo": "flymy-ai/qwen-image-realism-lora",
248
+ "trigger": "Super Realism portrait of",
249
+ "weights": "pytorch_lora_weights.safetensors"
250
+ },
251
+ "Anime": {
252
+ "repo": "alfredplpl/qwen-image-modern-anime-lora",
253
+ "trigger": "Japanese modern anime style, ",
254
+ "weights": "pytorch_lora_weights.safetensors"
255
+ },
256
+ "Analog Film": {
257
+ "repo": "janekm/analog_film",
258
+ "trigger": "fifthel",
259
+ "weights": "converted_complete.safetensors"
260
+ }
261
+ }
262
+
263
+ # =================================================================
264
+ # GENERATION FUNCTIONS
265
+ # =================================================================
266
+
267
+ MAX_SEED = np.iinfo(np.int32).max
268
+
269
+ @spaces.GPU(duration=180)
270
+ def generate_text2img(
271
+ prompt,
272
+ negative_prompt=" ",
273
+ width=1664,
274
+ height=928,
275
+ seed=42,
276
+ randomize_seed=False,
277
+ guidance_scale=2.5,
278
+ num_inference_steps=40,
279
+ lora_name="None",
280
+ lora_scale=1.0,
281
+ progress=gr.Progress(track_tqdm=True)
282
+ ):
283
+ """Text-to-Image генерация"""
284
+
285
+ logger.info("\n" + "=" * 60)
286
+ logger.info("TEXT-TO-IMAGE GENERATION")
287
+ logger.info("=" * 60)
288
+
289
+ if randomize_seed:
290
+ seed = random.randint(0, MAX_SEED)
291
+
292
+ logger.info(f" Prompt: {prompt[:100]}...")
293
+ logger.info(f" Size: {width}x{height}")
294
+ logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
295
+ logger.info(f" Seed: {seed}")
296
+ logger.info(f" LoRA: {lora_name} (scale: {lora_scale})")
297
+
298
+ try:
299
+ # Загружаем LoRA если выбрана
300
+ if lora_name != "None" and lora_name in AVAILABLE_LORAS:
301
+ lora_info = AVAILABLE_LORAS[lora_name]
302
+ logger.info(f" Loading LoRA: {lora_info['repo']}")
303
+
304
+ pipe_txt2img.load_lora_weights(
305
+ lora_info['repo'],
306
+ weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
307
+ token=hf_token
308
+ )
309
+
310
+ # Добавляем trigger word
311
+ if lora_info['trigger']:
312
+ prompt = lora_info['trigger'] + prompt
313
+ logger.info(f" Added trigger: {lora_info['trigger']}")
314
+
315
+ generator = torch.Generator(device=device).manual_seed(seed)
316
+
317
+ image = pipe_txt2img(
318
+ prompt=prompt,
319
+ negative_prompt=negative_prompt,
320
+ width=width,
321
+ height=height,
322
+ num_inference_steps=num_inference_steps,
323
+ true_cfg_scale=guidance_scale,
324
+ generator=generator
325
+ ).images[0]
326
+
327
+ # Выгружаем LoRA после генерации
328
+ if lora_name != "None":
329
+ pipe_txt2img.unload_lora_weights()
330
+
331
+ logger.info(" ✓ Generation completed")
332
+
333
+ return image, seed
334
+
335
+ except Exception as e:
336
+ logger.error(f" ❌ Error: {e}")
337
+ raise
338
+
339
+ @spaces.GPU(duration=180)
340
+ def generate_img2img(
341
+ input_image,
342
+ prompt,
343
+ negative_prompt=" ",
344
+ strength=0.75,
345
+ seed=42,
346
+ randomize_seed=False,
347
+ guidance_scale=2.5,
348
+ num_inference_steps=40,
349
+ lora_name="None",
350
+ lora_scale=1.0,
351
+ progress=gr.Progress(track_tqdm=True)
352
+ ):
353
+ """Image-to-Image генерация"""
354
+
355
+ logger.info("\n" + "=" * 60)
356
+ logger.info("IMAGE-TO-IMAGE GENERATION")
357
+ logger.info("=" * 60)
358
+
359
+ if input_image is None:
360
+ raise gr.Error("Please upload an input image")
361
+
362
+ if randomize_seed:
363
+ seed = random.randint(0, MAX_SEED)
364
+
365
+ # Изменяем размер изображения
366
+ resized = resize_image(input_image, max_size=1024)
367
+
368
+ logger.info(f" Prompt: {prompt[:100]}...")
369
+ logger.info(f" Input size: {input_image.size} → {resized.size}")
370
+ logger.info(f" Strength: {strength}")
371
+ logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
372
+ logger.info(f" LoRA: {lora_name}")
373
+
374
+ try:
375
+ if pipe_img2img is None:
376
+ raise gr.Error("Image2Image pipeline not available")
377
+
378
+ # Загружаем LoRA если выбрана
379
+ if lora_name != "None" and lora_name in AVAILABLE_LORAS:
380
+ lora_info = AVAILABLE_LORAS[lora_name]
381
+ pipe_img2img.load_lora_weights(
382
+ lora_info['repo'],
383
+ weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
384
+ token=hf_token
385
+ )
386
+ if lora_info['trigger']:
387
+ prompt = lora_info['trigger'] + prompt
388
+
389
+ generator = torch.Generator(device=device).manual_seed(seed)
390
+
391
+ image = pipe_img2img(
392
+ prompt=prompt,
393
+ negative_prompt=negative_prompt,
394
+ image=resized,
395
+ strength=strength,
396
+ num_inference_steps=num_inference_steps,
397
+ true_cfg_scale=guidance_scale,
398
+ generator=generator
399
+ ).images[0]
400
+
401
+ # Выгружаем LoRA
402
+ if lora_name != "None":
403
+ pipe_img2img.unload_lora_weights()
404
+
405
+ logger.info(" ✓ Generation completed")
406
+
407
+ return image, seed
408
+
409
+ except Exception as e:
410
+ logger.error(f" ❌ Error: {e}")
411
+ raise
412
+
413
+ @spaces.GPU(duration=180)
414
+ def generate_controlnet(
415
+ input_image,
416
+ prompt,
417
+ control_type="Canny",
418
+ negative_prompt=" ",
419
+ controlnet_scale=1.0,
420
+ seed=42,
421
+ randomize_seed=False,
422
+ guidance_scale=5.0,
423
+ num_inference_steps=30,
424
+ lora_name="None",
425
+ lora_scale=1.0,
426
+ progress=gr.Progress(track_tqdm=True)
427
+ ):
428
+ """ControlNet генерация"""
429
+
430
+ logger.info("\n" + "=" * 60)
431
+ logger.info("CONTROLNET GENERATION")
432
+ logger.info("=" * 60)
433
+
434
+ if input_image is None:
435
+ raise gr.Error("Please upload an input image")
436
+
437
+ if pipe_controlnet is None:
438
+ raise gr.Error("ControlNet pipeline not available")
439
+
440
+ if randomize_seed:
441
+ seed = random.randint(0, MAX_SEED)
442
+
443
+ # Изменяем размер и применяем препроцессор
444
+ resized = resize_image(input_image, max_size=1024)
445
+ control_image = get_control_image(resized, control_type)
446
+
447
+ logger.info(f" Prompt: {prompt[:100]}...")
448
+ logger.info(f" Control type: {control_type}")
449
+ logger.info(f" Control scale: {controlnet_scale}")
450
+ logger.info(f" Image size: {resized.size}")
451
+ logger.info(f" LoRA: {lora_name}")
452
+
453
+ try:
454
+ # Загружаем LoRA если выбрана
455
+ if lora_name != "None" and lora_name in AVAILABLE_LORAS:
456
+ lora_info = AVAILABLE_LORAS[lora_name]
457
+ pipe_controlnet.load_lora_weights(
458
+ lora_info['repo'],
459
+ weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
460
+ token=hf_token
461
+ )
462
+ if lora_info['trigger']:
463
+ prompt = lora_info['trigger'] + prompt
464
+
465
+ generator = torch.Generator(device=device).manual_seed(seed)
466
+
467
+ image = pipe_controlnet(
468
+ prompt=prompt,
469
+ negative_prompt=negative_prompt,
470
+ control_image=control_image,
471
+ controlnet_conditioning_scale=controlnet_scale,
472
+ width=resized.width,
473
+ height=resized.height,
474
+ num_inference_steps=num_inference_steps,
475
+ guidance_scale=guidance_scale,
476
+ generator=generator
477
+ ).images[0]
478
+
479
+ # Выгружаем LoRA
480
+ if lora_name != "None":
481
+ pipe_controlnet.unload_lora_weights()
482
+
483
+ logger.info(" ✓ Generation completed")
484
+
485
+ return image, control_image, seed
486
+
487
+ except Exception as e:
488
+ logger.error(f" ❌ Error: {e}")
489
+ raise
490
+
491
+ # =================================================================
492
+ # GRADIO INTERFACE
493
+ # =================================================================
494
+
495
+ MAX_SEED = np.iinfo(np.int32).max
496
+
497
+ css = """
498
+ #col-container {
499
+ margin: 0 auto;
500
+ max-width: 1400px;
501
+ }
502
+ """
503
+
504
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
505
+ gr.Markdown("""
506
+ # 🎨 Qwen Soloband - Image2Image + ControlNet + LoRA
507
+
508
+ **Продвинутая модель генерации** с поддержкой Image-to-Image, ControlNet и LoRA.
509
+
510
+ ### ✨ Возможности:
511
+ - 🖼️ **Text-to-Image** - Генерация из текста
512
+ - 🔄 **Image-to-Image** - Модификация изображений (denoising strength)
513
+ - 🎮 **ControlNet** - Управление структурой (Canny, Depth, Pose)
514
+ - 🎭 **LoRA** - Стилизация (Realism, Anime, Film)
515
+ - 🔌 **Full API** - Все функции доступны через API
516
+
517
+ **Модель**: [Gerchegg/Qwen-Soloband-Diffusers](https://huggingface.co/Gerchegg/Qwen-Soloband-Diffusers)
518
+ """)
519
+
520
+ with gr.Tabs() as tabs:
521
+
522
+ # TAB 1: Text-to-Image
523
+ with gr.Tab("📝 Text-to-Image"):
524
+ with gr.Row():
525
+ with gr.Column(scale=1):
526
+ t2i_prompt = gr.Text(
527
+ label="Prompt",
528
+ placeholder="SB_AI, a beautiful landscape...",
529
+ lines=3
530
+ )
531
+
532
+ t2i_run = gr.Button("Generate", variant="primary")
533
+
534
+ with gr.Accordion("Advanced Settings", open=False):
535
+ t2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
536
+
537
+ with gr.Row():
538
+ t2i_width = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1664)
539
+ t2i_height = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=928)
540
+
541
+ with gr.Row():
542
+ t2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
543
+ t2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
544
+
545
+ with gr.Row():
546
+ t2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
547
+ t2i_random_seed = gr.Checkbox(label="Random", value=True)
548
+
549
+ t2i_lora = gr.Radio(
550
+ label="LoRA Style",
551
+ choices=["None"] + list(AVAILABLE_LORAS.keys()),
552
+ value="None"
553
+ )
554
+ t2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
555
+
556
+ with gr.Column(scale=1):
557
+ t2i_output = gr.Image(label="Generated Image")
558
+ t2i_seed_output = gr.Number(label="Used Seed")
559
+
560
+ # TAB 2: Image-to-Image
561
+ with gr.Tab("🔄 Image-to-Image"):
562
+ with gr.Row():
563
+ with gr.Column(scale=1):
564
+ i2i_input = gr.Image(type="pil", label="Input Image")
565
+ i2i_prompt = gr.Text(
566
+ label="Prompt",
567
+ placeholder="Transform this image into...",
568
+ lines=3
569
+ )
570
+
571
+ i2i_strength = gr.Slider(
572
+ label="Denoising Strength",
573
+ info="0.0 = original image, 1.0 = complete redraw",
574
+ minimum=0.0,
575
+ maximum=1.0,
576
+ step=0.05,
577
+ value=0.75
578
+ )
579
+
580
+ i2i_run = gr.Button("Generate", variant="primary")
581
+
582
+ with gr.Accordion("Advanced Settings", open=False):
583
+ i2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
584
+
585
+ with gr.Row():
586
+ i2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
587
+ i2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
588
+
589
+ with gr.Row():
590
+ i2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
591
+ i2i_random_seed = gr.Checkbox(label="Random", value=True)
592
+
593
+ i2i_lora = gr.Radio(
594
+ label="LoRA Style",
595
+ choices=["None"] + list(AVAILABLE_LORAS.keys()),
596
+ value="None"
597
+ )
598
+ i2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
599
+
600
+ with gr.Column(scale=1):
601
+ i2i_output = gr.Image(label="Generated Image")
602
+ i2i_seed_output = gr.Number(label="Used Seed")
603
+
604
+ # TAB 3: ControlNet
605
+ with gr.Tab("🎮 ControlNet"):
606
+ with gr.Row():
607
+ with gr.Column(scale=1):
608
+ cn_input = gr.Image(type="pil", label="Input Image")
609
+ cn_prompt = gr.Text(
610
+ label="Prompt",
611
+ placeholder="A detailed description...",
612
+ lines=3
613
+ )
614
+
615
+ cn_control_type = gr.Radio(
616
+ label="Control Type (Preprocessor)",
617
+ choices=["Canny", "Depth", "Pose"],
618
+ value="Canny"
619
+ )
620
+
621
+ cn_control_scale = gr.Slider(
622
+ label="Control Strength",
623
+ minimum=0.0,
624
+ maximum=2.0,
625
+ step=0.05,
626
+ value=1.0
627
+ )
628
+
629
+ cn_run = gr.Button("Generate", variant="primary")
630
+
631
+ with gr.Accordion("Advanced Settings", open=False):
632
+ cn_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
633
+
634
+ with gr.Row():
635
+ cn_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=30)
636
+ cn_cfg = gr.Slider(label="CFG", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
637
+
638
+ with gr.Row():
639
+ cn_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
640
+ cn_random_seed = gr.Checkbox(label="Random", value=True)
641
+
642
+ cn_lora = gr.Radio(
643
+ label="LoRA Style",
644
+ choices=["None"] + list(AVAILABLE_LORAS.keys()),
645
+ value="None"
646
+ )
647
+ cn_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
648
+
649
+ with gr.Column(scale=1):
650
+ cn_control_preview = gr.Image(label="Control Image (Preprocessed)")
651
+ cn_output = gr.Image(label="Generated Image")
652
+ cn_seed_output = gr.Number(label="Used Seed")
653
+
654
+ # Event handlers
655
+ t2i_run.click(
656
+ fn=generate_text2img,
657
+ inputs=[
658
+ t2i_prompt, t2i_negative, t2i_width, t2i_height,
659
+ t2i_seed, t2i_random_seed, t2i_cfg, t2i_steps,
660
+ t2i_lora, t2i_lora_scale
661
+ ],
662
+ outputs=[t2i_output, t2i_seed_output],
663
+ api_name="text2img"
664
+ )
665
+
666
+ i2i_run.click(
667
+ fn=generate_img2img,
668
+ inputs=[
669
+ i2i_input, i2i_prompt, i2i_negative, i2i_strength,
670
+ i2i_seed, i2i_random_seed, i2i_cfg, i2i_steps,
671
+ i2i_lora, i2i_lora_scale
672
+ ],
673
+ outputs=[i2i_output, i2i_seed_output],
674
+ api_name="img2img"
675
+ )
676
+
677
+ cn_run.click(
678
+ fn=generate_controlnet,
679
+ inputs=[
680
+ cn_input, cn_prompt, cn_control_type, cn_negative, cn_control_scale,
681
+ cn_seed, cn_random_seed, cn_cfg, cn_steps,
682
+ cn_lora, cn_lora_scale
683
+ ],
684
+ outputs=[cn_output, cn_control_preview, cn_seed_output],
685
+ api_name="controlnet"
686
+ )
687
+
688
+ if __name__ == "__main__":
689
+ demo.launch(
690
+ show_api=True,
691
+ share=False
692
+ )
693
+