Spaces:

Gerchegg
/

Qwen-ImageForFlo-Advanced

Paused

App Files Files Community

Qwen-ImageForFlo-Advanced / app.py

Gerchegg

Upload app.py with huggingface_hub

b445dea verified 6 months ago

raw

history blame

24 kB

	import gradio as gr
	import numpy as np
	import random
	import json
	import torch
	import cv2
	from PIL import Image

	# Опциональный импорт spaces - нужен только для HF Spaces
	try:
	import spaces
	HF_SPACES = True
	except ImportError:
	HF_SPACES = False
	class spaces:
	@staticmethod
	def GPU(duration=None):
	def decorator(func):
	return func
	return decorator

	import os
	import time
	import logging

	from diffusers import (
	DiffusionPipeline,
	QwenImageControlNetPipeline,
	QwenImageControlNetModel,
	AutoPipelineForImage2Image
	)
	from huggingface_hub import hf_hub_download

	# Настройка логирования
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s \| %(levelname)s \| %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S'
	)
	logger = logging.getLogger(__name__)

	logger.info("=" * 60)
	logger.info("LOADING QWEN-SOLOBAND ADVANCED")
	logger.info("=" * 60)

	hf_token = os.environ.get("HF_TOKEN")
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.bfloat16

	# Логируем GPU
	logger.info(f"CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	gpu_count = torch.cuda.device_count()
	logger.info(f"Number of GPUs: {gpu_count}")
	for i in range(gpu_count):
	logger.info(f" GPU {i}: {torch.cuda.get_device_name(i)}")
	logger.info(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f} GB")

	# =================================================================
	# ЗАГРУЗКА МОДЕЛЕЙ
	# =================================================================

	# 1. Базовая модель для Text-to-Image
	logger.info("\n[1/3] Loading base Text2Image model...")
	model_id = "Gerchegg/Qwen-Soloband-Diffusers"

	try:
	start_time = time.time()

	# Определяем device_map
	if gpu_count > 1:
	device_map = "balanced"
	logger.info(f" Device map: balanced ({gpu_count} GPUs)")
	else:
	device_map = None
	logger.info(" Device map: single GPU")

	# Загружаем базовую модель
	pipe_txt2img = DiffusionPipeline.from_pretrained(
	model_id,
	torch_dtype=dtype,
	device_map=device_map,
	token=hf_token
	)

	if device_map is None:
	pipe_txt2img.to(device)

	load_time = time.time() - start_time
	logger.info(f" ✓ Text2Image loaded in {load_time:.1f}s")

	except Exception as e:
	logger.error(f" ❌ Error loading Text2Image: {e}")
	raise

	# 2. Image-to-Image модель (используем ту же базу)
	logger.info("\n[2/3] Creating Image2Image pipeline...")
	try:
	pipe_img2img = AutoPipelineForImage2Image.from_pipe(pipe_txt2img)
	logger.info(" ✓ Image2Image pipeline created")
	except Exception as e:
	logger.error(f" ❌ Error creating Image2Image: {e}")
	pipe_img2img = None

	# 3. ControlNet модель
	logger.info("\n[3/3] Loading ControlNet model...")
	try:
	controlnet_model_id = "InstantX/Qwen-Image-ControlNet-Union"

	controlnet = QwenImageControlNetModel.from_pretrained(
	controlnet_model_id,
	torch_dtype=dtype,
	token=hf_token
	)

	# Создаем ControlNet pipeline на базе базовой модели
	pipe_controlnet = QwenImageControlNetPipeline.from_pretrained(
	model_id,
	controlnet=controlnet,
	torch_dtype=dtype,
	token=hf_token
	)

	if device_map is None:
	pipe_controlnet.to(device)

	logger.info(" ✓ ControlNet loaded")

	except Exception as e:
	logger.error(f" ❌ Error loading ControlNet: {e}")
	logger.warning(" ControlNet will be disabled")
	pipe_controlnet = None

	# Оптимизации памяти
	logger.info("\nApplying memory optimizations...")
	for pipe in [pipe_txt2img, pipe_img2img, pipe_controlnet]:
	if pipe and hasattr(pipe, 'vae'):
	if hasattr(pipe.vae, 'enable_tiling'):
	pipe.vae.enable_tiling()
	if hasattr(pipe.vae, 'enable_slicing'):
	pipe.vae.enable_slicing()

	logger.info(" ✓ VAE tiling and slicing enabled")

	logger.info("\n" + "=" * 60)
	logger.info("✓ ALL MODELS LOADED")
	logger.info("=" * 60)

	# =================================================================
	# PREPROCESSOR FUNCTIONS
	# =================================================================

	def resize_image(input_image, max_size=1024):
	"""Изменяет размер изображения с сохранением пропорций (кратно 8)"""
	w, h = input_image.size
	aspect_ratio = w / h

	if w > h:
	new_w = max_size
	new_h = int(new_w / aspect_ratio)
	else:
	new_h = max_size
	new_w = int(new_h * aspect_ratio)

	# Кратно 8
	new_w = new_w - (new_w % 8)
	new_h = new_h - (new_h % 8)

	if new_w == 0: new_w = 8
	if new_h == 0: new_h = 8

	return input_image.resize((new_w, new_h), Image.Resampling.LANCZOS)

	def extract_canny(input_image, low_threshold=100, high_threshold=200):
	"""Canny edge detection"""
	image = np.array(input_image)
	edges = cv2.Canny(image, low_threshold, high_threshold)
	edges = edges[:, :, None]
	edges = np.concatenate([edges, edges, edges], axis=2)
	return Image.fromarray(edges)

	def extract_depth(input_image):
	"""Depth map extraction (простая версия через grayscale)"""
	# Для полноценного depth нужна модель Depth-Anything
	# Упрощенная версия для демонстрации
	gray = input_image.convert('L')
	return gray.convert('RGB')

	def extract_pose(input_image):
	"""Pose detection (заглушка - нужна модель OpenPose)"""
	# Для полноценного pose нужна модель OpenPose
	# Возвращаем Canny как fallback
	return extract_canny(input_image)

	def get_control_image(input_image, control_type):
	"""Применяет препроцессор к изображению"""
	if control_type == "Canny":
	return extract_canny(input_image)
	elif control_type == "Depth":
	return extract_depth(input_image)
	elif control_type == "Pose":
	return extract_pose(input_image)
	else:
	return extract_canny(input_image) # Fallback

	# =================================================================
	# LORA FUNCTIONS
	# =================================================================

	# Список доступных LoRA
	AVAILABLE_LORAS = {
	"Realism": {
	"repo": "flymy-ai/qwen-image-realism-lora",
	"trigger": "Super Realism portrait of",
	"weights": "pytorch_lora_weights.safetensors"
	},
	"Anime": {
	"repo": "alfredplpl/qwen-image-modern-anime-lora",
	"trigger": "Japanese modern anime style, ",
	"weights": "pytorch_lora_weights.safetensors"
	},
	"Analog Film": {
	"repo": "janekm/analog_film",
	"trigger": "fifthel",
	"weights": "converted_complete.safetensors"
	}
	}

	# =================================================================
	# GENERATION FUNCTIONS
	# =================================================================

	MAX_SEED = np.iinfo(np.int32).max

	@spaces.GPU(duration=180)
	def generate_text2img(
	prompt,
	negative_prompt=" ",
	width=1664,
	height=928,
	seed=42,
	randomize_seed=False,
	guidance_scale=2.5,
	num_inference_steps=40,
	lora_name="None",
	lora_scale=1.0,
	progress=gr.Progress(track_tqdm=True)
	):
	"""Text-to-Image генерация"""

	logger.info("\n" + "=" * 60)
	logger.info("TEXT-TO-IMAGE GENERATION")
	logger.info("=" * 60)

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	logger.info(f" Prompt: {prompt[:100]}...")
	logger.info(f" Size: {width}x{height}")
	logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
	logger.info(f" Seed: {seed}")
	logger.info(f" LoRA: {lora_name} (scale: {lora_scale})")

	try:
	# Загружаем LoRA если выбрана
	if lora_name != "None" and lora_name in AVAILABLE_LORAS:
	lora_info = AVAILABLE_LORAS[lora_name]
	logger.info(f" Loading LoRA: {lora_info['repo']}")

	pipe_txt2img.load_lora_weights(
	lora_info['repo'],
	weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
	token=hf_token
	)

	# Добавляем trigger word
	if lora_info['trigger']:
	prompt = lora_info['trigger'] + prompt
	logger.info(f" Added trigger: {lora_info['trigger']}")

	generator = torch.Generator(device=device).manual_seed(seed)

	image = pipe_txt2img(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	num_inference_steps=num_inference_steps,
	true_cfg_scale=guidance_scale,
	generator=generator
	).images[0]

	# Выгружаем LoRA после генерации
	if lora_name != "None":
	pipe_txt2img.unload_lora_weights()

	logger.info(" ✓ Generation completed")

	return image, seed

	except Exception as e:
	logger.error(f" ❌ Error: {e}")
	raise

	@spaces.GPU(duration=180)
	def generate_img2img(
	input_image,
	prompt,
	negative_prompt=" ",
	strength=0.75,
	seed=42,
	randomize_seed=False,
	guidance_scale=2.5,
	num_inference_steps=40,
	lora_name="None",
	lora_scale=1.0,
	progress=gr.Progress(track_tqdm=True)
	):
	"""Image-to-Image генерация"""

	logger.info("\n" + "=" * 60)
	logger.info("IMAGE-TO-IMAGE GENERATION")
	logger.info("=" * 60)

	if input_image is None:
	raise gr.Error("Please upload an input image")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	# Изменяем размер изображения
	resized = resize_image(input_image, max_size=1024)

	logger.info(f" Prompt: {prompt[:100]}...")
	logger.info(f" Input size: {input_image.size} → {resized.size}")
	logger.info(f" Strength: {strength}")
	logger.info(f" Steps: {num_inference_steps}, CFG: {guidance_scale}")
	logger.info(f" LoRA: {lora_name}")

	try:
	if pipe_img2img is None:
	raise gr.Error("Image2Image pipeline not available")

	# Загружаем LoRA если выбрана
	if lora_name != "None" and lora_name in AVAILABLE_LORAS:
	lora_info = AVAILABLE_LORAS[lora_name]
	pipe_img2img.load_lora_weights(
	lora_info['repo'],
	weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
	token=hf_token
	)
	if lora_info['trigger']:
	prompt = lora_info['trigger'] + prompt

	generator = torch.Generator(device=device).manual_seed(seed)

	image = pipe_img2img(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image=resized,
	strength=strength,
	num_inference_steps=num_inference_steps,
	true_cfg_scale=guidance_scale,
	generator=generator
	).images[0]

	# Выгружаем LoRA
	if lora_name != "None":
	pipe_img2img.unload_lora_weights()

	logger.info(" ✓ Generation completed")

	return image, seed

	except Exception as e:
	logger.error(f" ❌ Error: {e}")
	raise

	@spaces.GPU(duration=180)
	def generate_controlnet(
	input_image,
	prompt,
	control_type="Canny",
	negative_prompt=" ",
	controlnet_scale=1.0,
	seed=42,
	randomize_seed=False,
	guidance_scale=5.0,
	num_inference_steps=30,
	lora_name="None",
	lora_scale=1.0,
	progress=gr.Progress(track_tqdm=True)
	):
	"""ControlNet генерация"""

	logger.info("\n" + "=" * 60)
	logger.info("CONTROLNET GENERATION")
	logger.info("=" * 60)

	if input_image is None:
	raise gr.Error("Please upload an input image")

	if pipe_controlnet is None:
	raise gr.Error("ControlNet pipeline not available")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	# Изменяем размер и применяем препроцессор
	resized = resize_image(input_image, max_size=1024)
	control_image = get_control_image(resized, control_type)

	logger.info(f" Prompt: {prompt[:100]}...")
	logger.info(f" Control type: {control_type}")
	logger.info(f" Control scale: {controlnet_scale}")
	logger.info(f" Image size: {resized.size}")
	logger.info(f" LoRA: {lora_name}")

	try:
	# Загружаем LoRA если выбрана
	if lora_name != "None" and lora_name in AVAILABLE_LORAS:
	lora_info = AVAILABLE_LORAS[lora_name]
	pipe_controlnet.load_lora_weights(
	lora_info['repo'],
	weight_name=lora_info.get('weights', 'pytorch_lora_weights.safetensors'),
	token=hf_token
	)
	if lora_info['trigger']:
	prompt = lora_info['trigger'] + prompt

	generator = torch.Generator(device=device).manual_seed(seed)

	image = pipe_controlnet(
	prompt=prompt,
	negative_prompt=negative_prompt,
	control_image=control_image,
	controlnet_conditioning_scale=controlnet_scale,
	width=resized.width,
	height=resized.height,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator
	).images[0]

	# Выгружаем LoRA
	if lora_name != "None":
	pipe_controlnet.unload_lora_weights()

	logger.info(" ✓ Generation completed")

	return image, control_image, seed

	except Exception as e:
	logger.error(f" ❌ Error: {e}")
	raise

	# =================================================================
	# GRADIO INTERFACE
	# =================================================================

	MAX_SEED = np.iinfo(np.int32).max

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 1400px;
	}
	"""

	with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🎨 Qwen Soloband - Image2Image + ControlNet + LoRA

	Продвинутая модель генерации с поддержкой Image-to-Image, ControlNet и LoRA.

	### ✨ Возможности:
	- 🖼️ Text-to-Image - Генерация из текста
	- 🔄 Image-to-Image - Модификация изображений (denoising strength)
	- 🎮 ControlNet - Управление структурой (Canny, Depth, Pose)
	- 🎭 LoRA - Стилизация (Realism, Anime, Film)
	- 🔌 Full API - Все функции доступны через API

	Модель: [Gerchegg/Qwen-Soloband-Diffusers](https://huggingface.co/Gerchegg/Qwen-Soloband-Diffusers)
	""")

	with gr.Tabs() as tabs:

	# TAB 1: Text-to-Image
	with gr.Tab("📝 Text-to-Image"):
	with gr.Row():
	with gr.Column(scale=1):
	t2i_prompt = gr.Text(
	label="Prompt",
	placeholder="SB_AI, a beautiful landscape...",
	lines=3
	)

	t2i_run = gr.Button("Generate", variant="primary")

	with gr.Accordion("Advanced Settings", open=False):
	t2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")

	with gr.Row():
	t2i_width = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1664)
	t2i_height = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=928)

	with gr.Row():
	t2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
	t2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)

	with gr.Row():
	t2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	t2i_random_seed = gr.Checkbox(label="Random", value=True)

	t2i_lora = gr.Radio(
	label="LoRA Style",
	choices=["None"] + list(AVAILABLE_LORAS.keys()),
	value="None"
	)
	t2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)

	with gr.Column(scale=1):
	t2i_output = gr.Image(label="Generated Image")
	t2i_seed_output = gr.Number(label="Used Seed")

	# TAB 2: Image-to-Image
	with gr.Tab("🔄 Image-to-Image"):
	with gr.Row():
	with gr.Column(scale=1):
	i2i_input = gr.Image(type="pil", label="Input Image")
	i2i_prompt = gr.Text(
	label="Prompt",
	placeholder="Transform this image into...",
	lines=3
	)

	i2i_strength = gr.Slider(
	label="Denoising Strength",
	info="0.0 = original image, 1.0 = complete redraw",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.75
	)

	i2i_run = gr.Button("Generate", variant="primary")

	with gr.Accordion("Advanced Settings", open=False):
	i2i_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")

	with gr.Row():
	i2i_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=40)
	i2i_cfg = gr.Slider(label="CFG", minimum=0.0, maximum=7.5, step=0.1, value=2.5)

	with gr.Row():
	i2i_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	i2i_random_seed = gr.Checkbox(label="Random", value=True)

	i2i_lora = gr.Radio(
	label="LoRA Style",
	choices=["None"] + list(AVAILABLE_LORAS.keys()),
	value="None"
	)
	i2i_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)

	with gr.Column(scale=1):
	i2i_output = gr.Image(label="Generated Image")
	i2i_seed_output = gr.Number(label="Used Seed")

	# TAB 3: ControlNet
	with gr.Tab("🎮 ControlNet"):
	with gr.Row():
	with gr.Column(scale=1):
	cn_input = gr.Image(type="pil", label="Input Image")
	cn_prompt = gr.Text(
	label="Prompt",
	placeholder="A detailed description...",
	lines=3
	)

	cn_control_type = gr.Radio(
	label="Control Type (Preprocessor)",
	choices=["Canny", "Depth", "Pose"],
	value="Canny"
	)

	cn_control_scale = gr.Slider(
	label="Control Strength",
	minimum=0.0,
	maximum=2.0,
	step=0.05,
	value=1.0
	)

	cn_run = gr.Button("Generate", variant="primary")

	with gr.Accordion("Advanced Settings", open=False):
	cn_negative = gr.Text(label="Negative Prompt", value="blurry, low quality")

	with gr.Row():
	cn_steps = gr.Slider(label="Steps", minimum=1, maximum=50, step=1, value=30)
	cn_cfg = gr.Slider(label="CFG", minimum=1.0, maximum=10.0, step=0.1, value=5.0)

	with gr.Row():
	cn_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	cn_random_seed = gr.Checkbox(label="Random", value=True)

	cn_lora = gr.Radio(
	label="LoRA Style",
	choices=["None"] + list(AVAILABLE_LORAS.keys()),
	value="None"
	)
	cn_lora_scale = gr.Slider(label="LoRA Strength", minimum=0.0, maximum=2.0, step=0.1, value=1.0)

	with gr.Column(scale=1):
	cn_control_preview = gr.Image(label="Control Image (Preprocessed)")
	cn_output = gr.Image(label="Generated Image")
	cn_seed_output = gr.Number(label="Used Seed")

	# Event handlers
	t2i_run.click(
	fn=generate_text2img,
	inputs=[
	t2i_prompt, t2i_negative, t2i_width, t2i_height,
	t2i_seed, t2i_random_seed, t2i_cfg, t2i_steps,
	t2i_lora, t2i_lora_scale
	],
	outputs=[t2i_output, t2i_seed_output],
	api_name="text2img"
	)

	i2i_run.click(
	fn=generate_img2img,
	inputs=[
	i2i_input, i2i_prompt, i2i_negative, i2i_strength,
	i2i_seed, i2i_random_seed, i2i_cfg, i2i_steps,
	i2i_lora, i2i_lora_scale
	],
	outputs=[i2i_output, i2i_seed_output],
	api_name="img2img"
	)

	cn_run.click(
	fn=generate_controlnet,
	inputs=[
	cn_input, cn_prompt, cn_control_type, cn_negative, cn_control_scale,
	cn_seed, cn_random_seed, cn_cfg, cn_steps,
	cn_lora, cn_lora_scale
	],
	outputs=[cn_output, cn_control_preview, cn_seed_output],
	api_name="controlnet"
	)

	if __name__ == "__main__":
	demo.launch(
	show_api=True,
	share=False
	)