Gerchegg commited on
Commit
de94ab7
·
verified ·
1 Parent(s): aaf0014

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +182 -470
app.py CHANGED
@@ -3,7 +3,6 @@ import numpy as np
3
  import random
4
  import json
5
  import torch
6
- import cv2
7
  from PIL import Image
8
 
9
  # Опциональный импорт spaces - нужен только для HF Spaces
@@ -23,13 +22,7 @@ import os
23
  import time
24
  import logging
25
 
26
- from diffusers import (
27
- DiffusionPipeline,
28
- QwenImageControlNetPipeline,
29
- QwenImageControlNetModel,
30
- AutoPipelineForImage2Image
31
- )
32
- from huggingface_hub import hf_hub_download
33
 
34
  # Настройка логирования
35
  logging.basicConfig(
@@ -39,28 +32,10 @@ logging.basicConfig(
39
  )
40
  logger = logging.getLogger(__name__)
41
 
42
- # Preprocessor imports
43
- try:
44
- from controlnet_aux import OpenposeDetector, AnylineDetector
45
- CONTROLNET_AUX_AVAILABLE = True
46
- except ImportError:
47
- CONTROLNET_AUX_AVAILABLE = False
48
-
49
- try:
50
- from depth_anything_v2.dpt import DepthAnythingV2
51
- DEPTH_ANYTHING_AVAILABLE = True
52
- except ImportError:
53
- DEPTH_ANYTHING_AVAILABLE = False
54
-
55
  logger.info("=" * 60)
56
- logger.info("LOADING QWEN-SOLOBAND ADVANCED")
57
  logger.info("=" * 60)
58
-
59
- # Логируем доступность препроцессоров
60
- if not CONTROLNET_AUX_AVAILABLE:
61
- logger.warning("⚠️ controlnet_aux not available - Pose/Soft Edge будут упрощенными")
62
- if not DEPTH_ANYTHING_AVAILABLE:
63
- logger.warning("⚠️ depth_anything_v2 not available - Depth будет упрощенным")
64
 
65
  hf_token = os.environ.get("HF_TOKEN")
66
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -79,19 +54,16 @@ if torch.cuda.is_available():
79
  # ЗАГРУЗКА МОДЕЛЕЙ
80
  # =================================================================
81
 
82
- # 1. Базовая модель для Text-to-Image
83
- logger.info("\n[1/3] Loading base Text2Image model...")
84
  model_id = "Gerchegg/Qwen-Soloband-Diffusers"
85
 
 
 
86
  try:
87
  start_time = time.time()
88
 
89
- # Определяем device_map
90
  if gpu_count > 1:
91
  device_map = "balanced"
92
  logger.info(f" Device map: balanced ({gpu_count} GPUs)")
93
-
94
- # Загружаем базовую модель с распределением
95
  pipe_txt2img = DiffusionPipeline.from_pretrained(
96
  model_id,
97
  torch_dtype=dtype,
@@ -100,8 +72,6 @@ try:
100
  )
101
  else:
102
  logger.info(" Device map: single GPU")
103
-
104
- # Для одной GPU загружаем сразу на устройство (экономит память)
105
  pipe_txt2img = DiffusionPipeline.from_pretrained(
106
  model_id,
107
  torch_dtype=dtype,
@@ -115,8 +85,8 @@ except Exception as e:
115
  logger.error(f" ❌ Error loading Text2Image: {e}")
116
  raise
117
 
118
- # 2. Image-to-Image модель (используем ту же базу)
119
- logger.info("\n[2/3] Creating Image2Image pipeline...")
120
  try:
121
  pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img)
122
  logger.info(" ✓ Image2Image pipeline created")
@@ -124,69 +94,9 @@ except Exception as e:
124
  logger.error(f" ❌ Error creating Image2Image: {e}")
125
  pipe_img2img = None
126
 
127
- # 3. ControlNet модель
128
- logger.info("\n[3/3] Loading ControlNet model...")
129
- try:
130
- controlnet_model_id = "InstantX/Qwen-Image-ControlNet-Union"
131
-
132
- # Проверяем наличие модели в кэше и скачиваем если нет
133
- import os
134
- from pathlib import Path
135
-
136
- # Используем /workspace/.cache на RunPod или ~/.cache локально
137
- if os.path.exists("/workspace"):
138
- cache_base = Path("/workspace/.cache")
139
- else:
140
- cache_base = Path.home() / ".cache"
141
-
142
- cache_dir = cache_base / "huggingface" / "hub" / "models--InstantX--Qwen-Image-ControlNet-Union"
143
-
144
- if not cache_dir.exists():
145
- logger.info(" 📥 ControlNet не найден в кэше, скачиваю...")
146
- logger.info(f" Это займет 1-2 минуты...")
147
-
148
- try:
149
- from huggingface_hub import snapshot_download
150
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
151
-
152
- snapshot_download(
153
- repo_id=controlnet_model_id,
154
- local_dir=cache_dir,
155
- token=hf_token,
156
- ignore_patterns=["*.md"]
157
- )
158
- logger.info(" ✓ ControlNet успешно загружен в кэш")
159
- except Exception as download_error:
160
- logger.warning(f" ⚠️ Не удалось загрузить ControlNet: {download_error}")
161
- logger.warning(" Продолжаем без ControlNet...")
162
- raise download_error
163
- else:
164
- logger.info(" ✓ ControlNet найден в кэше")
165
-
166
- controlnet = QwenImageControlNetModel.from_pretrained(
167
- controlnet_model_id,
168
- torch_dtype=dtype,
169
- token=hf_token
170
- )
171
-
172
- # Создаем ControlNet pipeline на базе базовой модели
173
- pipe_controlnet = QwenImageControlNetPipeline.from_pretrained(
174
- model_id,
175
- controlnet=controlnet,
176
- torch_dtype=dtype,
177
- token=hf_token
178
- ).to(device)
179
-
180
- logger.info(" ✓ ControlNet loaded")
181
-
182
- except Exception as e:
183
- logger.error(f" ❌ Error loading ControlNet: {e}")
184
- logger.warning(" ControlNet will be disabled")
185
- pipe_controlnet = None
186
-
187
  # Оптимизации памяти
188
  logger.info("\nApplying memory optimizations...")
189
- for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet]:
190
  if pipe and hasattr(pipe, 'vae'):
191
  if hasattr(pipe.vae, 'enable_tiling'):
192
  pipe.vae.enable_tiling()
@@ -200,54 +110,7 @@ logger.info("✓ ALL MODELS LOADED")
200
  logger.info("=" * 60)
201
 
202
  # =================================================================
203
- # ЗАГРУЗКА ПРЕПРОЦЕССОРОВ
204
- # =================================================================
205
-
206
- openpose_detector = None
207
- anyline_detector = None
208
- depth_anything = None
209
-
210
- if CONTROLNET_AUX_AVAILABLE:
211
- try:
212
- logger.info("\nLoading advanced preprocessors...")
213
- openpose_detector = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
214
- logger.info(" ✓ OpenPose detector loaded")
215
- except Exception as e:
216
- logger.warning(f" ⚠️ OpenPose failed: {e}")
217
-
218
- try:
219
- anyline_detector = AnylineDetector.from_pretrained(
220
- "TheMistoAI/MistoLine",
221
- filename="MTEED.pth",
222
- subfolder="Anyline"
223
- ).to(device)
224
- logger.info(" ✓ Anyline (Soft Edge) detector loaded")
225
- except Exception as e:
226
- logger.warning(f" ⚠️ Anyline failed: {e}")
227
-
228
- if DEPTH_ANYTHING_AVAILABLE:
229
- try:
230
- logger.info("\nLoading Depth Anything V2...")
231
- depth_model_config = {
232
- 'encoder': 'vitl',
233
- 'features': 256,
234
- 'out_channels': [256, 512, 1024, 1024]
235
- }
236
- depth_anything = DepthAnythingV2(**depth_model_config)
237
- depth_anything_ckpt_path = hf_hub_download(
238
- repo_id="depth-anything/Depth-Anything-V2-Large",
239
- filename="depth_anything_v2_vitl.pth",
240
- repo_type="model"
241
- )
242
- depth_anything.load_state_dict(torch.load(depth_anything_ckpt_path, map_location="cpu"))
243
- depth_anything = depth_anything.to(device).eval()
244
- logger.info(" ✓ Depth Anything V2 loaded")
245
- except Exception as e:
246
- logger.warning(f" ⚠️ Depth Anything V2 failed: {e}")
247
- depth_anything = None
248
-
249
- # =================================================================
250
- # PREPROCESSOR FUNCTIONS
251
  # =================================================================
252
 
253
  def resize_image(input_image, max_size=1024):
@@ -271,65 +134,10 @@ def resize_image(input_image, max_size=1024):
271
 
272
  return input_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
273
 
274
- def extract_canny(input_image, low_threshold=100, high_threshold=200):
275
- """Canny edge detection"""
276
- image = np.array(input_image)
277
- edges = cv2.Canny(image, low_threshold, high_threshold)
278
- edges = edges[:, :, None]
279
- edges = np.concatenate([edges, edges, edges], axis=2)
280
- return Image.fromarray(edges)
281
-
282
- def extract_depth(input_image):
283
- """Depth map extraction using Depth Anything V2 or simple grayscale"""
284
- if depth_anything is not None:
285
- # Используем Depth Anything V2
286
- image_np = np.array(input_image)
287
- with torch.no_grad():
288
- depth = depth_anything.infer_image(image_np[:, :, ::-1])
289
- depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
290
- depth = depth.astype(np.uint8)
291
- return Image.fromarray(depth).convert('RGB')
292
- else:
293
- # Fallback - простая grayscale карта
294
- gray = input_image.convert('L')
295
- return gray.convert('RGB')
296
-
297
- def extract_pose(input_image):
298
- """Pose detection using OpenPose or Canny fallback"""
299
- if openpose_detector is not None:
300
- # Используем OpenPose
301
- return openpose_detector(input_image, hand_and_face=True)
302
- else:
303
- # Fallback - Canny edges
304
- return extract_canny(input_image)
305
-
306
- def extract_soft_edge(input_image):
307
- """Soft Edge detection using Anyline or Canny fallback"""
308
- if anyline_detector is not None:
309
- # Используем Anyline для мягких краев
310
- return anyline_detector(input_image)
311
- else:
312
- # Fallback - Canny edges
313
- return extract_canny(input_image)
314
-
315
- def get_control_image(input_image, control_type):
316
- """Применяет препроцессор к изображению"""
317
- if control_type == "Canny":
318
- return extract_canny(input_image)
319
- elif control_type == "Soft Edge":
320
- return extract_soft_edge(input_image)
321
- elif control_type == "Depth":
322
- return extract_depth(input_image)
323
- elif control_type == "Pose":
324
- return extract_pose(input_image)
325
- else:
326
- return extract_canny(input_image) # Fallback
327
-
328
  # =================================================================
329
  # LORA FUNCTIONS
330
  # =================================================================
331
 
332
- # Список доступных LoRA
333
  AVAILABLE_LORAS = {
334
  "Realism": {
335
  "repo": "flymy-ai/qwen-image-realism-lora",
@@ -348,6 +156,38 @@ AVAILABLE_LORAS = {
348
  }
349
  }
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  # =================================================================
352
  # GENERATION FUNCTIONS
353
  # =================================================================
@@ -378,29 +218,17 @@ def generate_text2img(
378
  seed = random.randint(0, MAX_SEED)
379
 
380
  logger.info(f" Prompt: {prompt[:100]}...")
381
- logger.info(f" Size: {width}x{height}")
382
  logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
383
- logger.info(f" Seed: {seed}")
384
- logger.info(f" LoRA: {lora_name} (scale: {lora_scale})")
385
 
386
  try:
387
  # Загружаем LoRA если выбрана
388
- if lora_name != "None" and lora_name in AVAILABLE_LORAS:
389
- lora_info = AVAILABLE_LORAS[lora_name]
390
- logger.info(f" Loading LoRA: {lora_info['repo']}")
391
-
392
- pipe_txt2img.load_lora_weights(
393
- lora_info['repo'],
394
- weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
395
- token=hf_token
396
- )
397
-
398
- # Добавляем trigger word
399
- if lora_info['trigger']:
400
- prompt = lora_info['trigger'] + prompt
401
- logger.info(f" Added trigger: {lora_info['trigger']}")
402
 
403
- generator = torch.Generator(device=device).manual_seed(seed)
404
 
405
  image = pipe_txt2img(
406
  prompt=prompt,
@@ -412,11 +240,7 @@ def generate_text2img(
412
  generator=generator
413
  ).images[0]
414
 
415
- # Выгружаем LoRA после генерации
416
- if lora_name != "None":
417
- pipe_txt2img.unload_lora_weights()
418
-
419
- logger.info(" ✓ Generation completed")
420
 
421
  return image, seed
422
 
@@ -444,37 +268,28 @@ def generate_img2img(
444
  logger.info("IMAGE-TO-IMAGE GENERATION")
445
  logger.info("=" * 60)
446
 
447
- if input_image is None:
448
- raise gr.Error("Please upload an input image")
449
 
450
  if randomize_seed:
451
  seed = random.randint(0, MAX_SEED)
452
 
453
- # Изменяем размер изображения
454
  resized = resize_image(input_image, max_size=1024)
455
 
456
  logger.info(f" Prompt: {prompt[:100]}...")
457
- logger.info(f" Input size: {input_image.size} → {resized.size}")
458
  logger.info(f" Strength: {strength}")
459
  logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
460
- logger.info(f" LoRA: {lora_name}")
461
 
462
  try:
463
- if pipe_img2img is None:
464
- raise gr.Error("Image2Image pipeline not available")
465
-
466
  # Загружаем LoRA если выбрана
467
- if lora_name != "None" and lora_name in AVAILABLE_LORAS:
468
- lora_info = AVAILABLE_LORAS[lora_name]
469
- pipe_img2img.load_lora_weights(
470
- lora_info['repo'],
471
- weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
472
- token=hf_token
473
- )
474
- if lora_info['trigger']:
475
- prompt = lora_info['trigger'] + prompt
476
 
477
- generator = torch.Generator(device=device).manual_seed(seed)
478
 
479
  image = pipe_img2img(
480
  prompt=prompt,
@@ -482,296 +297,193 @@ def generate_img2img(
482
  image=resized,
483
  strength=strength,
484
  num_inference_steps=num_inference_steps,
485
- true_cfg_scale=guidance_scale,
486
- generator=generator
487
- ).images[0]
488
-
489
- # Выгружаем LoRA
490
- if lora_name != "None":
491
- pipe_img2img.unload_lora_weights()
492
-
493
- logger.info(" ✓ Generation completed")
494
-
495
- return image, seed
496
-
497
- except Exception as e:
498
- logger.error(f" ❌ Error: {e}")
499
- raise
500
-
501
- @spaces.GPU(duration=180)
502
- def generate_controlnet(
503
- input_image,
504
- prompt,
505
- control_type="Canny",
506
- negative_prompt=" ",
507
- controlnet_conditioning_scale=1.0,
508
- seed=42,
509
- randomize_seed=False,
510
- guidance_scale=5.0,
511
- num_inference_steps=30,
512
- lora_name="None",
513
- lora_scale=1.0,
514
- progress=gr.Progress(track_tqdm=True)
515
- ):
516
- """ControlNet генерация"""
517
-
518
- logger.info("\n" + "=" * 60)
519
- logger.info("CONTROLNET GENERATION")
520
- logger.info("=" * 60)
521
-
522
- if input_image is None:
523
- raise gr.Error("Please upload an input image")
524
-
525
- if pipe_controlnet is None:
526
- raise gr.Error("ControlNet pipeline not available")
527
-
528
- if randomize_seed:
529
- seed = random.randint(0, MAX_SEED)
530
-
531
- # Изменяем размер и применяем препроцессор
532
- resized = resize_image(input_image, max_size=1024)
533
- control_image = get_control_image(resized, control_type)
534
-
535
- logger.info(f" Prompt: {prompt[:100]}...")
536
- logger.info(f" Control type: {control_type}")
537
- logger.info(f" Control scale: {controlnet_conditioning_scale}")
538
- logger.info(f" Image size: {resized.size}")
539
- logger.info(f" LoRA: {lora_name}")
540
-
541
- try:
542
- # Загружаем LoRA если выбрана
543
- if lora_name != "None" and lora_name in AVAILABLE_LORAS:
544
- lora_info = AVAILABLE_LORAS[lora_name]
545
- pipe_controlnet.load_lora_weights(
546
- lora_info['repo'],
547
- weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
548
- token=hf_token
549
- )
550
- if lora_info['trigger']:
551
- prompt = lora_info['trigger'] + prompt
552
-
553
- generator = torch.Generator(device=device).manual_seed(seed)
554
-
555
- image = pipe_controlnet(
556
- prompt=prompt,
557
- negative_prompt=negative_prompt,
558
- control_image=control_image,
559
- controlnet_conditioning_scale=controlnet_conditioning_scale,
560
- width=resized.width,
561
- height=resized.height,
562
- num_inference_steps=num_inference_steps,
563
  guidance_scale=guidance_scale,
564
  generator=generator
565
  ).images[0]
566
 
567
- # Выгружаем LoRA
568
- if lora_name != "None":
569
- pipe_controlnet.unload_lora_weights()
570
-
571
- logger.info(" ✓ Generation completed")
572
 
573
- return image, control_image, seed
574
 
575
  except Exception as e:
576
  logger.error(f" ❌ Error: {e}")
577
  raise
578
 
579
  # =================================================================
580
- # GRADIO INTERFACE
581
  # =================================================================
582
 
583
- MAX_SEED = np.iinfo(np.int32).max
584
-
585
  css = """
586
  #col-container {
587
  margin: 0 auto;
588
- max-width: 1400px;
589
  }
590
  """
591
 
592
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
593
  gr.Markdown("""
594
- # 🎨 Qwen Soloband - Image2Image + ControlNet + LoRA
595
 
596
- **Продвинутая модель генерации** с поддержкой Image-to-Image, ControlNet и LoRA.
597
 
598
- ### ✨ Возможности:
599
- - 🖼️ **Text-to-Image** - Генерация из текста
600
- - 🔄 **Image-to-Image** - Модификация изображений (denoising strength)
601
- - 🎮 **ControlNet** - Управление структурой (Canny, Depth, Pose)
602
- - 🎭 **LoRA** - Стилизация (Realism, Anime, Film)
603
- - 🔌 **Full API** - Все функции доступны через API
604
 
605
- **Модель**: [Gerchegg/Qwen-Soloband-Diffusers](https://huggingface.co/Gerchegg/Qwen-Soloband-Diffusers)
606
  """)
607
 
608
- with gr.Tabs() as tabs:
609
-
610
- # TAB 1: Text-to-Image
611
- with gr.Tab("📝 Text-to-Image"):
612
  with gr.Row():
613
  with gr.Column(scale=1):
614
- t2i_prompt = gr.Text(
615
  label="Prompt",
616
- placeholder="SB_AI, a beautiful landscape...",
617
  lines=3
618
  )
 
619
 
620
- t2i_run = gr.Button("Generate", variant="primary")
621
-
622
- with gr.Accordion("Advanced Settings", open=False):
623
- t2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
 
 
624
 
625
  with gr.Row():
626
- t2i_width = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1664)
627
- t2i_height = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=928)
 
 
 
 
628
 
629
  with gr.Row():
630
- t2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
631
- t2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
 
 
 
 
632
 
633
  with gr.Row():
634
- t2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
635
- t2i_random_seed = gr.Checkbox(label="Random", value=True)
 
 
636
 
637
- t2i_lora = gr.Radio(
638
- label="LoRA Style",
639
- choices=["None"] + list(AVAILABLE_LORAS.keys()),
640
- value="None"
641
- )
642
- t2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
 
 
 
643
 
644
  with gr.Column(scale=1):
645
  t2i_output = gr.Image(label="Generated Image")
646
  t2i_seed_output = gr.Number(label="Used Seed")
 
 
 
 
 
 
 
 
 
 
 
647
 
648
- # TAB 2: Image-to-Image
649
- with gr.Tab("🔄 Image-to-Image"):
650
  with gr.Row():
651
  with gr.Column(scale=1):
652
  i2i_input = gr.Image(type="pil", label="Input Image")
653
- i2i_prompt = gr.Text(
654
  label="Prompt",
655
- placeholder="Transform this image into...",
656
  lines=3
657
  )
 
658
 
659
- i2i_strength = gr.Slider(
660
- label="Denoising Strength",
661
- info="0.0 = original image, 1.0 = complete redraw",
662
- minimum=0.0,
663
- maximum=1.0,
664
- step=0.05,
665
- value=0.75
666
- )
667
-
668
- i2i_run = gr.Button("Generate", variant="primary")
669
-
670
- with gr.Accordion("Advanced Settings", open=False):
671
- i2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
672
-
673
- with gr.Row():
674
- i2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
675
- i2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
676
-
677
- with gr.Row():
678
- i2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
679
- i2i_random_seed = gr.Checkbox(label="Random", value=True)
680
 
681
- i2i_lora = gr.Radio(
682
- label="LoRA Style",
683
- choices=["None"] + list(AVAILABLE_LORAS.keys()),
684
- value="None"
685
  )
686
- i2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
687
-
688
- with gr.Column(scale=1):
689
- i2i_output = gr.Image(label="Generated Image")
690
- i2i_seed_output = gr.Number(label="Used Seed")
691
-
692
- # TAB 3: ControlNet
693
- with gr.Tab("🎮 ControlNet"):
694
- with gr.Row():
695
- with gr.Column(scale=1):
696
- cn_input = gr.Image(type="pil", label="Input Image")
697
- cn_prompt = gr.Text(
698
- label="Prompt",
699
- placeholder="A detailed description...",
700
- lines=3
701
- )
702
-
703
- cn_control_type = gr.Radio(
704
- label="Control Type (Preprocessor)",
705
- choices=["Canny", "Soft Edge", "Depth", "Pose"],
706
- value="Canny"
707
- )
708
-
709
- cn_control_scale = gr.Slider(
710
- label="Control Strength",
711
- minimum=0.0,
712
- maximum=2.0,
713
- step=0.05,
714
- value=1.0
715
- )
716
-
717
- cn_run = gr.Button("Generate", variant="primary")
718
-
719
- with gr.Accordion("Advanced Settings", open=False):
720
- cn_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
721
 
722
  with gr.Row():
723
- cn_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=30)
724
- cn_cfg = gr.Slider(label="CFG", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
 
 
 
 
725
 
726
  with gr.Row():
727
- cn_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
728
- cn_random_seed = gr.Checkbox(label="Random", value=True)
 
 
729
 
730
- cn_lora = gr.Radio(
731
- label="LoRA Style",
732
- choices=["None"] + list(AVAILABLE_LORAS.keys()),
733
- value="None"
734
- )
735
- cn_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
 
 
 
736
 
737
  with gr.Column(scale=1):
738
- cn_control_preview = gr.Image(label="Control Image (Preprocessed)")
739
- cn_output = gr.Image(label="Generated Image")
740
- cn_seed_output = gr.Number(label="Used Seed")
 
 
 
 
 
 
 
 
 
 
741
 
742
- # Event handlers
743
- t2i_run.click(
744
- fn=generate_text2img,
745
- inputs=[
746
- t2i_prompt, t2i_negative, t2i_width, t2i_height,
747
- t2i_seed, t2i_random_seed, t2i_cfg, t2i_steps,
748
- t2i_lora, t2i_lora_scale
749
- ],
750
- outputs=[t2i_output, t2i_seed_output],
751
- api_name="text2img"
752
- )
753
 
754
- i2i_run.click(
755
- fn=generate_img2img,
756
- inputs=[
757
- i2i_input, i2i_prompt, i2i_negative, i2i_strength,
758
- i2i_seed, i2i_random_seed, i2i_cfg, i2i_steps,
759
- i2i_lora, i2i_lora_scale
760
- ],
761
- outputs=[i2i_output, i2i_seed_output],
762
- api_name="img2img"
763
- )
764
 
765
- cn_run.click(
766
- fn=generate_controlnet,
767
- inputs=[
768
- cn_input, cn_prompt, cn_control_type, cn_negative, cn_control_scale,
769
- cn_seed, cn_random_seed, cn_cfg, cn_steps,
770
- cn_lora, cn_lora_scale
771
- ],
772
- outputs=[cn_output, cn_control_preview, cn_seed_output],
773
- api_name="controlnet"
774
- )
775
 
776
  if __name__ == "__main__":
777
  demo.launch(
 
3
  import random
4
  import json
5
  import torch
 
6
  from PIL import Image
7
 
8
  # Опциональный импорт spaces - нужен только для HF Spaces
 
22
  import time
23
  import logging
24
 
25
+ from diffusers import DiffusionPipeline, AutoPipelineForImage2Image
 
 
 
 
 
 
26
 
27
  # Настройка логирования
28
  logging.basicConfig(
 
32
  )
33
  logger = logging.getLogger(__name__)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  logger.info("=" * 60)
36
+ logger.info("QWEN-SOLOBAND: Text2Image + Image2Image + LoRA")
37
  logger.info("=" * 60)
38
+ logger.info(f"Environment: {'HF Spaces' if HF_SPACES else 'RunPod/Local'}")
 
 
 
 
 
39
 
40
  hf_token = os.environ.get("HF_TOKEN")
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
54
  # ЗАГРУЗКА МОДЕЛЕЙ
55
  # =================================================================
56
 
 
 
57
  model_id = "Gerchegg/Qwen-Soloband-Diffusers"
58
 
59
+ # 1. Text2Image модель
60
+ logger.info("\n[1/2] Loading Text2Image model...")
61
  try:
62
  start_time = time.time()
63
 
 
64
  if gpu_count > 1:
65
  device_map = "balanced"
66
  logger.info(f" Device map: balanced ({gpu_count} GPUs)")
 
 
67
  pipe_txt2img = DiffusionPipeline.from_pretrained(
68
  model_id,
69
  torch_dtype=dtype,
 
72
  )
73
  else:
74
  logger.info(" Device map: single GPU")
 
 
75
  pipe_txt2img = DiffusionPipeline.from_pretrained(
76
  model_id,
77
  torch_dtype=dtype,
 
85
  logger.error(f" ❌ Error loading Text2Image: {e}")
86
  raise
87
 
88
+ # 2. Image2Image модель
89
+ logger.info("\n[2/2] Creating Image2Image pipeline...")
90
  try:
91
  pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img)
92
  logger.info(" ✓ Image2Image pipeline created")
 
94
  logger.error(f" ❌ Error creating Image2Image: {e}")
95
  pipe_img2img = None
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # Оптимизации памяти
98
  logger.info("\nApplying memory optimizations...")
99
+ for pipe in [pipe_txt2img, pipe_img2img]:
100
  if pipe and hasattr(pipe, 'vae'):
101
  if hasattr(pipe.vae, 'enable_tiling'):
102
  pipe.vae.enable_tiling()
 
110
  logger.info("=" * 60)
111
 
112
  # =================================================================
113
+ # HELPER FUNCTIONS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  # =================================================================
115
 
116
  def resize_image(input_image, max_size=1024):
 
134
 
135
  return input_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  # =================================================================
138
  # LORA FUNCTIONS
139
  # =================================================================
140
 
 
141
  AVAILABLE_LORAS = {
142
  "Realism": {
143
  "repo": "flymy-ai/qwen-image-realism-lora",
 
156
  }
157
  }
158
 
159
+ loaded_loras = {}
160
+
161
+ def load_lora(pipe, lora_name):
162
+ """Загружает LoRA в pipeline"""
163
+ if lora_name == "None" or lora_name not in AVAILABLE_LORAS:
164
+ return pipe
165
+
166
+ if lora_name in loaded_loras:
167
+ logger.info(f" Using cached LoRA: {lora_name}")
168
+ return pipe
169
+
170
+ lora_info = AVAILABLE_LORAS[lora_name]
171
+ logger.info(f" Loading LoRA: {lora_name} from {lora_info['repo']}")
172
+
173
+ try:
174
+ pipe.load_lora_weights(
175
+ lora_info["repo"],
176
+ weight_name=lora_info["weights"]
177
+ )
178
+ loaded_loras[lora_name] = True
179
+ logger.info(f" ✓ LoRA loaded: {lora_name}")
180
+ except Exception as e:
181
+ logger.warning(f" ⚠️ Failed to load LoRA {lora_name}: {e}")
182
+
183
+ return pipe
184
+
185
+ def unload_loras(pipe):
186
+ """Выгружает все LoRA"""
187
+ if hasattr(pipe, 'unload_lora_weights'):
188
+ pipe.unload_lora_weights()
189
+ loaded_loras.clear()
190
+
191
  # =================================================================
192
  # GENERATION FUNCTIONS
193
  # =================================================================
 
218
  seed = random.randint(0, MAX_SEED)
219
 
220
  logger.info(f" Prompt: {prompt[:100]}...")
221
+ logger.info(f" Resolution: {width}x{height}")
222
  logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
223
+ logger.info(f" Seed: {seed}, LoRA: {lora_name}")
 
224
 
225
  try:
226
  # Загружаем LoRA если выбрана
227
+ if lora_name != "None":
228
+ load_lora(pipe_txt2img, lora_name)
229
+ pipe_txt2img.set_adapters([lora_name], adapter_weights=[lora_scale])
 
 
 
 
 
 
 
 
 
 
 
230
 
231
+ generator = torch.Generator(device="cuda:0" if torch.cuda.is_available() else "cpu").manual_seed(seed)
232
 
233
  image = pipe_txt2img(
234
  prompt=prompt,
 
240
  generator=generator
241
  ).images[0]
242
 
243
+ logger.info(" ✓ Generation complete")
 
 
 
 
244
 
245
  return image, seed
246
 
 
268
  logger.info("IMAGE-TO-IMAGE GENERATION")
269
  logger.info("=" * 60)
270
 
271
+ if pipe_img2img is None:
272
+ raise gr.Error("Image2Image pipeline not available")
273
 
274
  if randomize_seed:
275
  seed = random.randint(0, MAX_SEED)
276
 
277
+ # Resize изображение
278
  resized = resize_image(input_image, max_size=1024)
279
 
280
  logger.info(f" Prompt: {prompt[:100]}...")
281
+ logger.info(f" Image size: {resized.size}")
282
  logger.info(f" Strength: {strength}")
283
  logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
284
+ logger.info(f" Seed: {seed}, LoRA: {lora_name}")
285
 
286
  try:
 
 
 
287
  # Загружаем LoRA если выбрана
288
+ if lora_name != "None":
289
+ load_lora(pipe_img2img, lora_name)
290
+ pipe_img2img.set_adapters([lora_name], adapter_weights=[lora_scale])
 
 
 
 
 
 
291
 
292
+ generator = torch.Generator(device="cuda:0" if torch.cuda.is_available() else "cpu").manual_seed(seed)
293
 
294
  image = pipe_img2img(
295
  prompt=prompt,
 
297
  image=resized,
298
  strength=strength,
299
  num_inference_steps=num_inference_steps,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  guidance_scale=guidance_scale,
301
  generator=generator
302
  ).images[0]
303
 
304
+ logger.info(" ✓ Generation complete")
 
 
 
 
305
 
306
+ return image, seed
307
 
308
  except Exception as e:
309
  logger.error(f" ❌ Error: {e}")
310
  raise
311
 
312
  # =================================================================
313
+ # UI
314
  # =================================================================
315
 
 
 
316
  css = """
317
  #col-container {
318
  margin: 0 auto;
319
+ max-width: 1200px;
320
  }
321
  """
322
 
323
+ # Загрузка examples
324
+ try:
325
+ examples = json.loads(open("examples.json").read())
326
+ except:
327
+ examples = []
328
+
329
+ with gr.Blocks(css=css) as demo:
330
  gr.Markdown("""
331
+ # 🎨 Qwen Soloband: Text2Image + Image2Image + LoRA
332
 
333
+ **Кастомная модель генерации изображений** на базе Qwen-Image DiT архитектуры.
334
 
335
+ ### ✨ Возможности
336
+ - 🔥 **Text-to-Image** - генерация из текста
337
+ - 🖼️ **Image-to-Image** - преобразование изображений
338
+ - 🎯 **LoRA поддержка** - Realism, Anime, Analog Film
339
+ - 🚀 **Multi-GPU** - автоматическое распределение
340
+ - **Оптимизированная память** - VAE tiling/slicing
341
 
342
+ **Модель**: [Qwen-Soloband-Diffusers](https://huggingface.co/Gerchegg/Qwen-Soloband-Diffusers)
343
  """)
344
 
345
+ with gr.Tabs():
346
+ # ============= TAB 1: TEXT2IMAGE =============
347
+ with gr.Tab("🎨 Text-to-Image"):
 
348
  with gr.Row():
349
  with gr.Column(scale=1):
350
+ t2i_prompt = gr.Textbox(
351
  label="Prompt",
352
+ placeholder="SB_AI, a beautiful landscape with mountains...",
353
  lines=3
354
  )
355
+ t2i_run = gr.Button("Generate", variant="primary", size="lg")
356
 
357
+ with gr.Accordion("Settings", open=False):
358
+ t2i_negative = gr.Textbox(
359
+ label="Negative Prompt",
360
+ value="blurry, low quality, ugly, bad anatomy",
361
+ lines=2
362
+ )
363
 
364
  with gr.Row():
365
+ t2i_width = gr.Slider(
366
+ label="Width", minimum=512, maximum=2048, step=64, value=1664
367
+ )
368
+ t2i_height = gr.Slider(
369
+ label="Height", minimum=512, maximum=2048, step=64, value=928
370
+ )
371
 
372
  with gr.Row():
373
+ t2i_cfg = gr.Slider(
374
+ label="CFG Scale", minimum=1.0, maximum=7.5, step=0.1, value=2.5
375
+ )
376
+ t2i_steps = gr.Slider(
377
+ label="Steps", minimum=1, maximum=50, step=1, value=40
378
+ )
379
 
380
  with gr.Row():
381
+ t2i_seed = gr.Slider(
382
+ label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
383
+ )
384
+ t2i_random_seed = gr.Checkbox(label="Random seed", value=True)
385
 
386
+ with gr.Row():
387
+ t2i_lora = gr.Dropdown(
388
+ label="LoRA",
389
+ choices=["None"] + list(AVAILABLE_LORAS.keys()),
390
+ value="None"
391
+ )
392
+ t2i_lora_scale = gr.Slider(
393
+ label="LoRA Scale", minimum=0.0, maximum=2.0, step=0.1, value=1.0
394
+ )
395
 
396
  with gr.Column(scale=1):
397
  t2i_output = gr.Image(label="Generated Image")
398
  t2i_seed_output = gr.Number(label="Used Seed")
399
+
400
+ t2i_run.click(
401
+ fn=generate_text2img,
402
+ inputs=[
403
+ t2i_prompt, t2i_negative, t2i_width, t2i_height,
404
+ t2i_seed, t2i_random_seed, t2i_cfg, t2i_steps,
405
+ t2i_lora, t2i_lora_scale
406
+ ],
407
+ outputs=[t2i_output, t2i_seed_output],
408
+ api_name="text2img"
409
+ )
410
 
411
+ # ============= TAB 2: IMAGE2IMAGE =============
412
+ with gr.Tab("🖼️ Image-to-Image"):
413
  with gr.Row():
414
  with gr.Column(scale=1):
415
  i2i_input = gr.Image(type="pil", label="Input Image")
416
+ i2i_prompt = gr.Textbox(
417
  label="Prompt",
418
+ placeholder="Enhanced version...",
419
  lines=3
420
  )
421
+ i2i_run = gr.Button("Generate", variant="primary", size="lg")
422
 
423
+ with gr.Accordion("Settings", open=False):
424
+ i2i_negative = gr.Textbox(
425
+ label="Negative Prompt",
426
+ value="blurry, low quality, ugly",
427
+ lines=2
428
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
+ i2i_strength = gr.Slider(
431
+ label="Strength (transformation amount)",
432
+ minimum=0.1, maximum=1.0, step=0.05, value=0.75
 
433
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
  with gr.Row():
436
+ i2i_cfg = gr.Slider(
437
+ label="CFG Scale", minimum=1.0, maximum=7.5, step=0.1, value=2.5
438
+ )
439
+ i2i_steps = gr.Slider(
440
+ label="Steps", minimum=1, maximum=50, step=1, value=40
441
+ )
442
 
443
  with gr.Row():
444
+ i2i_seed = gr.Slider(
445
+ label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42
446
+ )
447
+ i2i_random_seed = gr.Checkbox(label="Random seed", value=True)
448
 
449
+ with gr.Row():
450
+ i2i_lora = gr.Dropdown(
451
+ label="LoRA",
452
+ choices=["None"] + list(AVAILABLE_LORAS.keys()),
453
+ value="None"
454
+ )
455
+ i2i_lora_scale = gr.Slider(
456
+ label="LoRA Scale", minimum=0.0, maximum=2.0, step=0.1, value=1.0
457
+ )
458
 
459
  with gr.Column(scale=1):
460
+ i2i_output = gr.Image(label="Generated Image")
461
+ i2i_seed_output = gr.Number(label="Used Seed")
462
+
463
+ i2i_run.click(
464
+ fn=generate_img2img,
465
+ inputs=[
466
+ i2i_input, i2i_prompt, i2i_negative, i2i_strength,
467
+ i2i_seed, i2i_random_seed, i2i_cfg, i2i_steps,
468
+ i2i_lora, i2i_lora_scale
469
+ ],
470
+ outputs=[i2i_output, i2i_seed_output],
471
+ api_name="img2img"
472
+ )
473
 
474
+ gr.Markdown("""
475
+ ### 💡 Советы
 
 
 
 
 
 
 
 
 
476
 
477
+ **Промпты**: Используйте префикс `SB_AI,` для лучших результатов
 
 
 
 
 
 
 
 
 
478
 
479
+ **Разрешения**:
480
+ - 1664×928 (16:9) - широкоформатное
481
+ - 1328×1328 (1:1) - квадрат
482
+ - 928×1664 (9:16) - портрет
483
+ - 1472×1140 (4:3) - стандарт
484
+
485
+ **LoRA**: Можно комбинировать с промптом для стилизации
486
+ """)
 
 
487
 
488
  if __name__ == "__main__":
489
  demo.launch(