| import gradio as gr
|
| import numpy as np
|
| import random
|
| import json
|
| import torch
|
| import cv2
|
| from PIL import Image
|
|
|
|
|
| try:
|
| import spaces
|
| HF_SPACES = True
|
| except ImportError:
|
| HF_SPACES = False
|
| class spaces:
|
| @staticmethod
|
| def GPU(duration=None):
|
| def decorator(func):
|
| return func
|
| return decorator
|
|
|
| import os
|
| import time
|
| import logging
|
|
|
| from diffusers import (
|
| DiffusionPipeline,
|
| QwenImageControlNetPipeline,
|
| QwenImageControlNetModel,
|
| AutoPipelineForImage2Image
|
| )
|
| from huggingface_hub import hf_hub_download
|
|
|
|
|
| logging.basicConfig(
|
| level=logging.INFO,
|
| format='%(asctime)s | %(levelname)s | %(message)s',
|
| datefmt='%Y-%m-%d %H:%M:%S'
|
| )
|
| logger = logging.getLogger(__name__)
|
|
|
| logger.info("=" * 60)
|
| logger.info("LOADING QWEN-SOLOBAND ADVANCED")
|
| logger.info("=" * 60)
|
|
|
| hf_token = os.environ.get("HF_TOKEN")
|
| device = "cuda" if torch.cuda.is_available() else "cpu"
|
| dtype = torch.bfloat16
|
|
|
|
|
| logger.info(f"CUDA available: {torch.cuda.is_available()}")
|
| if torch.cuda.is_available():
|
| gpu_count = torch.cuda.device_count()
|
| logger.info(f"Number of GPUs: {gpu_count}")
|
| for i in range(gpu_count):
|
| logger.info(f" GPU {i}: {torch.cuda.get_device_name(i)}")
|
| logger.info(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f} GB")
|
|
|
|
|
|
|
|
|
|
|
|
|
| logger.info("\n[1/3] Loading base Text2Image model...")
|
| model_id = "Gerchegg/Qwen-Soloband-Diffusers"
|
|
|
| try:
|
| start_time = time.time()
|
|
|
|
|
| if gpu_count > 1:
|
| device_map = "balanced"
|
| logger.info(f" Device map: balanced ({gpu_count} GPUs)")
|
| else:
|
| device_map = None
|
| logger.info(" Device map: single GPU")
|
|
|
|
|
| pipe_txt2img = DiffusionPipeline.from_pretrained(
|
| model_id,
|
| torch_dtype=dtype,
|
| device_map=device_map,
|
| token=hf_token
|
| )
|
|
|
| if device_map is None:
|
| pipe_txt2img.to(device)
|
|
|
| load_time = time.time() - start_time
|
| logger.info(f" ✓ Text2Image loaded in {load_time:.1f}s")
|
|
|
| except Exception as e:
|
| logger.error(f" ❌ Error loading Text2Image: {e}")
|
| raise
|
|
|
|
|
| logger.info("\n[2/3] Creating Image2Image pipeline...")
|
| try:
|
| pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img)
|
| logger.info(" ✓ Image2Image pipeline created")
|
| except Exception as e:
|
| logger.error(f" ❌ Error creating Image2Image: {e}")
|
| pipe_img2img = None
|
|
|
|
|
| logger.info("\n[3/3] Loading ControlNet model...")
|
| try:
|
| controlnet_model_id = "InstantX/Qwen-Image-ControlNet-Union"
|
|
|
| controlnet = QwenImageControlNetModel.from_pretrained(
|
| controlnet_model_id,
|
| torch_dtype=dtype,
|
| token=hf_token
|
| )
|
|
|
|
|
| pipe_controlnet = QwenImageControlNetPipeline.from_pretrained(
|
| model_id,
|
| controlnet=controlnet,
|
| torch_dtype=dtype,
|
| token=hf_token
|
| )
|
|
|
| if device_map is None:
|
| pipe_controlnet.to(device)
|
|
|
| logger.info(" ✓ ControlNet loaded")
|
|
|
| except Exception as e:
|
| logger.error(f" ❌ Error loading ControlNet: {e}")
|
| logger.warning(" ControlNet will be disabled")
|
| pipe_controlnet = None
|
|
|
|
|
| logger.info("\nApplying memory optimizations...")
|
| for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet]:
|
| if pipe and hasattr(pipe, 'vae'):
|
| if hasattr(pipe.vae, 'enable_tiling'):
|
| pipe.vae.enable_tiling()
|
| if hasattr(pipe.vae, 'enable_slicing'):
|
| pipe.vae.enable_slicing()
|
|
|
| logger.info(" ✓ VAE tiling and slicing enabled")
|
|
|
| logger.info("\n" + "=" * 60)
|
| logger.info("✓ ALL MODELS LOADED")
|
| logger.info("=" * 60)
|
|
|
|
|
|
|
|
|
|
|
| def resize_image(input_image, max_size=1024):
|
| """Изменяет размер изображения с сохранением пропорций (кратно 8)"""
|
| w, h = input_image.size
|
| aspect_ratio = w / h
|
|
|
| if w > h:
|
| new_w = max_size
|
| new_h = int(new_w / aspect_ratio)
|
| else:
|
| new_h = max_size
|
| new_w = int(new_h * aspect_ratio)
|
|
|
|
|
| new_w = new_w - (new_w % 8)
|
| new_h = new_h - (new_h % 8)
|
|
|
| if new_w == 0: new_w = 8
|
| if new_h == 0: new_h = 8
|
|
|
| return input_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
|
|
| def extract_canny(input_image, low_threshold=100, high_threshold=200):
|
| """Canny edge detection"""
|
| image = np.array(input_image)
|
| edges = cv2.Canny(image, low_threshold, high_threshold)
|
| edges = edges[:, :, None]
|
| edges = np.concatenate([edges, edges, edges], axis=2)
|
| return Image.fromarray(edges)
|
|
|
| def extract_depth(input_image):
|
| """Depth map extraction (простая версия через grayscale)"""
|
|
|
|
|
| gray = input_image.convert('L')
|
| return gray.convert('RGB')
|
|
|
| def extract_pose(input_image):
|
| """Pose detection (заглушка - нужна модель OpenPose)"""
|
|
|
|
|
| return extract_canny(input_image)
|
|
|
| def get_control_image(input_image, control_type):
|
| """Применяет препроцессор к изображению"""
|
| if control_type == "Canny":
|
| return extract_canny(input_image)
|
| elif control_type == "Depth":
|
| return extract_depth(input_image)
|
| elif control_type == "Pose":
|
| return extract_pose(input_image)
|
| else:
|
| return extract_canny(input_image)
|
|
|
|
|
|
|
|
|
|
|
|
|
| AVAILABLE_LORAS = {
|
| "Realism": {
|
| "repo": "flymy-ai/qwen-image-realism-lora",
|
| "trigger": "Super Realism portrait of",
|
| "weights": "pytorch_lora_weights.safetensors"
|
| },
|
| "Anime": {
|
| "repo": "alfredplpl/qwen-image-modern-anime-lora",
|
| "trigger": "Japanese modern anime style, ",
|
| "weights": "pytorch_lora_weights.safetensors"
|
| },
|
| "Analog Film": {
|
| "repo": "janekm/analog_film",
|
| "trigger": "fifthel",
|
| "weights": "converted_complete.safetensors"
|
| }
|
| }
|
|
|
|
|
|
|
|
|
|
|
| MAX_SEED = np.iinfo(np.int32).max
|
|
|
| @spaces.GPU(duration=180)
|
| def generate_text2img(
|
| prompt,
|
| negative_prompt=" ",
|
| width=1664,
|
| height=928,
|
| seed=42,
|
| randomize_seed=False,
|
| guidance_scale=2.5,
|
| num_inference_steps=40,
|
| lora_name="None",
|
| lora_scale=1.0,
|
| progress=gr.Progress(track_tqdm=True)
|
| ):
|
| """Text-to-Image генерация"""
|
|
|
| logger.info("\n" + "=" * 60)
|
| logger.info("TEXT-TO-IMAGE GENERATION")
|
| logger.info("=" * 60)
|
|
|
| if randomize_seed:
|
| seed = random.randint(0, MAX_SEED)
|
|
|
| logger.info(f" Prompt: {prompt[:100]}...")
|
| logger.info(f" Size: {width}x{height}")
|
| logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
|
| logger.info(f" Seed: {seed}")
|
| logger.info(f" LoRA: {lora_name} (scale: {lora_scale})")
|
|
|
| try:
|
|
|
| if lora_name != "None" and lora_name in AVAILABLE_LORAS:
|
| lora_info = AVAILABLE_LORAS[lora_name]
|
| logger.info(f" Loading LoRA: {lora_info['repo']}")
|
|
|
| pipe_txt2img.load_lora_weights(
|
| lora_info['repo'],
|
| weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
|
| token=hf_token
|
| )
|
|
|
|
|
| if lora_info['trigger']:
|
| prompt = lora_info['trigger'] + prompt
|
| logger.info(f" Added trigger: {lora_info['trigger']}")
|
|
|
| generator = torch.Generator(device=device).manual_seed(seed)
|
|
|
| image = pipe_txt2img(
|
| prompt=prompt,
|
| negative_prompt=negative_prompt,
|
| width=width,
|
| height=height,
|
| num_inference_steps=num_inference_steps,
|
| true_cfg_scale=guidance_scale,
|
| generator=generator
|
| ).images[0]
|
|
|
|
|
| if lora_name != "None":
|
| pipe_txt2img.unload_lora_weights()
|
|
|
| logger.info(" ✓ Generation completed")
|
|
|
| return image, seed
|
|
|
| except Exception as e:
|
| logger.error(f" ❌ Error: {e}")
|
| raise
|
|
|
| @spaces.GPU(duration=180)
|
| def generate_img2img(
|
| input_image,
|
| prompt,
|
| negative_prompt=" ",
|
| strength=0.75,
|
| seed=42,
|
| randomize_seed=False,
|
| guidance_scale=2.5,
|
| num_inference_steps=40,
|
| lora_name="None",
|
| lora_scale=1.0,
|
| progress=gr.Progress(track_tqdm=True)
|
| ):
|
| """Image-to-Image генерация"""
|
|
|
| logger.info("\n" + "=" * 60)
|
| logger.info("IMAGE-TO-IMAGE GENERATION")
|
| logger.info("=" * 60)
|
|
|
| if input_image is None:
|
| raise gr.Error("Please upload an input image")
|
|
|
| if randomize_seed:
|
| seed = random.randint(0, MAX_SEED)
|
|
|
|
|
| resized = resize_image(input_image, max_size=1024)
|
|
|
| logger.info(f" Prompt: {prompt[:100]}...")
|
| logger.info(f" Input size: {input_image.size} → {resized.size}")
|
| logger.info(f" Strength: {strength}")
|
| logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
|
| logger.info(f" LoRA: {lora_name}")
|
|
|
| try:
|
| if pipe_img2img is None:
|
| raise gr.Error("Image2Image pipeline not available")
|
|
|
|
|
| if lora_name != "None" and lora_name in AVAILABLE_LORAS:
|
| lora_info = AVAILABLE_LORAS[lora_name]
|
| pipe_img2img.load_lora_weights(
|
| lora_info['repo'],
|
| weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
|
| token=hf_token
|
| )
|
| if lora_info['trigger']:
|
| prompt = lora_info['trigger'] + prompt
|
|
|
| generator = torch.Generator(device=device).manual_seed(seed)
|
|
|
| image = pipe_img2img(
|
| prompt=prompt,
|
| negative_prompt=negative_prompt,
|
| image=resized,
|
| strength=strength,
|
| num_inference_steps=num_inference_steps,
|
| true_cfg_scale=guidance_scale,
|
| generator=generator
|
| ).images[0]
|
|
|
|
|
| if lora_name != "None":
|
| pipe_img2img.unload_lora_weights()
|
|
|
| logger.info(" ✓ Generation completed")
|
|
|
| return image, seed
|
|
|
| except Exception as e:
|
| logger.error(f" ❌ Error: {e}")
|
| raise
|
|
|
| @spaces.GPU(duration=180)
|
| def generate_controlnet(
|
| input_image,
|
| prompt,
|
| control_type="Canny",
|
| negative_prompt=" ",
|
| controlnet_scale=1.0,
|
| seed=42,
|
| randomize_seed=False,
|
| guidance_scale=5.0,
|
| num_inference_steps=30,
|
| lora_name="None",
|
| lora_scale=1.0,
|
| progress=gr.Progress(track_tqdm=True)
|
| ):
|
| """ControlNet генерация"""
|
|
|
| logger.info("\n" + "=" * 60)
|
| logger.info("CONTROLNET GENERATION")
|
| logger.info("=" * 60)
|
|
|
| if input_image is None:
|
| raise gr.Error("Please upload an input image")
|
|
|
| if pipe_controlnet is None:
|
| raise gr.Error("ControlNet pipeline not available")
|
|
|
| if randomize_seed:
|
| seed = random.randint(0, MAX_SEED)
|
|
|
|
|
| resized = resize_image(input_image, max_size=1024)
|
| control_image = get_control_image(resized, control_type)
|
|
|
| logger.info(f" Prompt: {prompt[:100]}...")
|
| logger.info(f" Control type: {control_type}")
|
| logger.info(f" Control scale: {controlnet_scale}")
|
| logger.info(f" Image size: {resized.size}")
|
| logger.info(f" LoRA: {lora_name}")
|
|
|
| try:
|
|
|
| if lora_name != "None" and lora_name in AVAILABLE_LORAS:
|
| lora_info = AVAILABLE_LORAS[lora_name]
|
| pipe_controlnet.load_lora_weights(
|
| lora_info['repo'],
|
| weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
|
| token=hf_token
|
| )
|
| if lora_info['trigger']:
|
| prompt = lora_info['trigger'] + prompt
|
|
|
| generator = torch.Generator(device=device).manual_seed(seed)
|
|
|
| image = pipe_controlnet(
|
| prompt=prompt,
|
| negative_prompt=negative_prompt,
|
| control_image=control_image,
|
| controlnet_conditioning_scale=controlnet_scale,
|
| width=resized.width,
|
| height=resized.height,
|
| num_inference_steps=num_inference_steps,
|
| guidance_scale=guidance_scale,
|
| generator=generator
|
| ).images[0]
|
|
|
|
|
| if lora_name != "None":
|
| pipe_controlnet.unload_lora_weights()
|
|
|
| logger.info(" ✓ Generation completed")
|
|
|
| return image, control_image, seed
|
|
|
| except Exception as e:
|
| logger.error(f" ❌ Error: {e}")
|
| raise
|
|
|
|
|
|
|
|
|
|
|
| MAX_SEED = np.iinfo(np.int32).max
|
|
|
| css = """
|
| #col-container {
|
| margin: 0 auto;
|
| max-width: 1400px;
|
| }
|
| """
|
|
|
| with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
| gr.Markdown("""
|
| # 🎨 Qwen Soloband - Image2Image + ControlNet + LoRA
|
|
|
| **Продвинутая модель генерации** с поддержкой Image-to-Image, ControlNet и LoRA.
|
|
|
| ### ✨ Возможности:
|
| - 🖼️ **Text-to-Image** - Генерация из текста
|
| - 🔄 **Image-to-Image** - Модификация изображений (denoising strength)
|
| - 🎮 **ControlNet** - Управление структурой (Canny, Depth, Pose)
|
| - 🎭 **LoRA** - Стилизация (Realism, Anime, Film)
|
| - 🔌 **Full API** - Все функции доступны через API
|
|
|
| **Модель**: [Gerchegg/Qwen-Soloband-Diffusers](https://huggingface.co/Gerchegg/Qwen-Soloband-Diffusers)
|
| """)
|
|
|
| with gr.Tabs() as tabs:
|
|
|
|
|
| with gr.Tab("📝 Text-to-Image"):
|
| with gr.Row():
|
| with gr.Column(scale=1):
|
| t2i_prompt = gr.Text(
|
| label="Prompt",
|
| placeholder="SB_AI, a beautiful landscape...",
|
| lines=3
|
| )
|
|
|
| t2i_run = gr.Button("Generate", variant="primary")
|
|
|
| with gr.Accordion("Advanced Settings", open=False):
|
| t2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
|
|
|
| with gr.Row():
|
| t2i_width = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1664)
|
| t2i_height = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=928)
|
|
|
| with gr.Row():
|
| t2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
|
| t2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
|
|
|
| with gr.Row():
|
| t2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
| t2i_random_seed = gr.Checkbox(label="Random", value=True)
|
|
|
| t2i_lora = gr.Radio(
|
| label="LoRA Style",
|
| choices=["None"] + list(AVAILABLE_LORAS.keys()),
|
| value="None"
|
| )
|
| t2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
|
|
|
| with gr.Column(scale=1):
|
| t2i_output = gr.Image(label="Generated Image")
|
| t2i_seed_output = gr.Number(label="Used Seed")
|
|
|
|
|
| with gr.Tab("🔄 Image-to-Image"):
|
| with gr.Row():
|
| with gr.Column(scale=1):
|
| i2i_input = gr.Image(type="pil", label="Input Image")
|
| i2i_prompt = gr.Text(
|
| label="Prompt",
|
| placeholder="Transform this image into...",
|
| lines=3
|
| )
|
|
|
| i2i_strength = gr.Slider(
|
| label="Denoising Strength",
|
| info="0.0 = original image, 1.0 = complete redraw",
|
| minimum=0.0,
|
| maximum=1.0,
|
| step=0.05,
|
| value=0.75
|
| )
|
|
|
| i2i_run = gr.Button("Generate", variant="primary")
|
|
|
| with gr.Accordion("Advanced Settings", open=False):
|
| i2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
|
|
|
| with gr.Row():
|
| i2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
|
| i2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)
|
|
|
| with gr.Row():
|
| i2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
| i2i_random_seed = gr.Checkbox(label="Random", value=True)
|
|
|
| i2i_lora = gr.Radio(
|
| label="LoRA Style",
|
| choices=["None"] + list(AVAILABLE_LORAS.keys()),
|
| value="None"
|
| )
|
| i2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
|
|
|
| with gr.Column(scale=1):
|
| i2i_output = gr.Image(label="Generated Image")
|
| i2i_seed_output = gr.Number(label="Used Seed")
|
|
|
|
|
| with gr.Tab("🎮 ControlNet"):
|
| with gr.Row():
|
| with gr.Column(scale=1):
|
| cn_input = gr.Image(type="pil", label="Input Image")
|
| cn_prompt = gr.Text(
|
| label="Prompt",
|
| placeholder="A detailed description...",
|
| lines=3
|
| )
|
|
|
| cn_control_type = gr.Radio(
|
| label="Control Type (Preprocessor)",
|
| choices=["Canny", "Depth", "Pose"],
|
| value="Canny"
|
| )
|
|
|
| cn_control_scale = gr.Slider(
|
| label="Control Strength",
|
| minimum=0.0,
|
| maximum=2.0,
|
| step=0.05,
|
| value=1.0
|
| )
|
|
|
| cn_run = gr.Button("Generate", variant="primary")
|
|
|
| with gr.Accordion("Advanced Settings", open=False):
|
| cn_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")
|
|
|
| with gr.Row():
|
| cn_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=30)
|
| cn_cfg = gr.Slider(label="CFG", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
|
|
|
| with gr.Row():
|
| cn_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
| cn_random_seed = gr.Checkbox(label="Random", value=True)
|
|
|
| cn_lora = gr.Radio(
|
| label="LoRA Style",
|
| choices=["None"] + list(AVAILABLE_LORAS.keys()),
|
| value="None"
|
| )
|
| cn_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)
|
|
|
| with gr.Column(scale=1):
|
| cn_control_preview = gr.Image(label="Control Image (Preprocessed)")
|
| cn_output = gr.Image(label="Generated Image")
|
| cn_seed_output = gr.Number(label="Used Seed")
|
|
|
|
|
| t2i_run.click(
|
| fn=generate_text2img,
|
| inputs=[
|
| t2i_prompt, t2i_negative, t2i_width, t2i_height,
|
| t2i_seed, t2i_random_seed, t2i_cfg, t2i_steps,
|
| t2i_lora, t2i_lora_scale
|
| ],
|
| outputs=[t2i_output, t2i_seed_output],
|
| api_name="text2img"
|
| )
|
|
|
| i2i_run.click(
|
| fn=generate_img2img,
|
| inputs=[
|
| i2i_input, i2i_prompt, i2i_negative, i2i_strength,
|
| i2i_seed, i2i_random_seed, i2i_cfg, i2i_steps,
|
| i2i_lora, i2i_lora_scale
|
| ],
|
| outputs=[i2i_output, i2i_seed_output],
|
| api_name="img2img"
|
| )
|
|
|
| cn_run.click(
|
| fn=generate_controlnet,
|
| inputs=[
|
| cn_input, cn_prompt, cn_control_type, cn_negative, cn_control_scale,
|
| cn_seed, cn_random_seed, cn_cfg, cn_steps,
|
| cn_lora, cn_lora_scale
|
| ],
|
| outputs=[cn_output, cn_control_preview, cn_seed_output],
|
| api_name="controlnet"
|
| )
|
|
|
| if __name__ == "__main__":
|
| demo.launch(
|
| show_api=True,
|
| share=False
|
| )
|
|
|
|
|