Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,555 +1,65 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
-
import
|
| 4 |
-
from diffusers import (
|
| 5 |
-
StableDiffusionPipeline,
|
| 6 |
-
ControlNetModel,
|
| 7 |
-
StableDiffusionControlNetPipeline,
|
| 8 |
-
StableDiffusionControlNetImg2ImgPipeline,
|
| 9 |
-
AutoPipelineForImage2Image,
|
| 10 |
-
DDIMScheduler,
|
| 11 |
-
UniPCMultistepScheduler)
|
| 12 |
-
from transformers import pipeline
|
| 13 |
-
from diffusers.utils import load_image, make_image_grid
|
| 14 |
-
from peft import PeftModel, LoraConfig
|
| 15 |
-
import os
|
| 16 |
-
from PIL import Image
|
| 17 |
-
|
| 18 |
-
MAX_SEED = np.iinfo(np.int32).max
|
| 19 |
-
MAX_IMAGE_SIZE = 1024
|
| 20 |
-
IP_ADAPTER = 'h94/IP-Adapter'
|
| 21 |
-
WEIGHT_NAME = "ip-adapter_sd15.bin"
|
| 22 |
-
WEIGHT_NAME_plus = "ip-adapter-plus_sd15.bin"
|
| 23 |
-
WEIGHT_NAME_face = "ip-adapter-full-face_sd15.bin"
|
| 24 |
-
|
| 25 |
-
model_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
| 26 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 27 |
-
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 28 |
-
|
| 29 |
-
def get_lora_sd_pipeline(
|
| 30 |
-
lora_dir='lora_man_animestyle',
|
| 31 |
-
base_model_name_or_path=None,
|
| 32 |
-
dtype=torch.float16,
|
| 33 |
-
adapter_name="default"
|
| 34 |
-
):
|
| 35 |
-
unet_sub_dir = os.path.join(lora_dir, "unet")
|
| 36 |
-
text_encoder_sub_dir = os.path.join(lora_dir, "text_encoder")
|
| 37 |
-
|
| 38 |
-
if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
|
| 39 |
-
config = LoraConfig.from_pretrained(text_encoder_sub_dir)
|
| 40 |
-
base_model_name_or_path = config.base_model_name_or_path
|
| 41 |
-
|
| 42 |
-
if base_model_name_or_path is None:
|
| 43 |
-
raise ValueError("Укажите название базовой модели или путь к ней")
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
|
| 48 |
-
pipe.unet.set_adapter(adapter_name)
|
| 49 |
-
after_params = pipe.unet.parameters()
|
| 50 |
-
|
| 51 |
-
if os.path.exists(text_encoder_sub_dir):
|
| 52 |
-
pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
|
| 53 |
-
|
| 54 |
-
if dtype in (torch.float16, torch.bfloat16):
|
| 55 |
-
pipe.unet.half()
|
| 56 |
-
pipe.text_encoder.half()
|
| 57 |
-
|
| 58 |
-
return pipe
|
| 59 |
-
|
| 60 |
-
def long_prompt_encoder(prompt, tokenizer, text_encoder, max_length=77):
|
| 61 |
-
tokens = tokenizer(prompt, truncation=False, return_tensors="pt")["input_ids"]
|
| 62 |
-
part_s = [tokens[:, i:i + max_length] for i in range(0, tokens.shape[1], max_length)]
|
| 63 |
-
with torch.no_grad():
|
| 64 |
-
embeds = [text_encoder(part.to(text_encoder.device))[0] for part in part_s]
|
| 65 |
-
return torch.cat(embeds, dim=1)
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
|
| 70 |
-
torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
image = image.resize((224, 224), Image.LANCZOS)
|
| 79 |
-
else:
|
| 80 |
-
image = image.resize((target_width, target_height), Image.LANCZOS)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
image = torch.from_numpy(image).to(device)
|
| 85 |
-
return image
|
| 86 |
|
| 87 |
-
def get_depth_map(image, depth_estimator):
|
| 88 |
-
# Преобразуем изображение в PIL, если это необходимо
|
| 89 |
-
if isinstance(image, np.ndarray):
|
| 90 |
-
image = Image.fromarray(image)
|
| 91 |
-
elif isinstance(image, torch.Tensor):
|
| 92 |
-
image = Image.fromarray(image.cpu().numpy())
|
| 93 |
-
# Получаем карту глубины
|
| 94 |
-
depth_map = depth_estimator(image)["depth"]
|
| 95 |
-
depth_map = np.array(depth_map)
|
| 96 |
-
depth_map = depth_map[:, :, None] # Добавляем третье измерение
|
| 97 |
-
depth_map = np.concatenate([depth_map, depth_map, depth_map], axis=2) # Преобразуем в 3 канала
|
| 98 |
-
depth_map = torch.from_numpy(depth_map).float() / 255.0 # Нормализация [0, 1]
|
| 99 |
-
depth_map = depth_map.permute(2, 0, 1) # Меняем порядок осей (C, H, W)
|
| 100 |
-
return depth_map
|
| 101 |
-
|
| 102 |
-
pipe_default = get_lora_sd_pipeline(lora_dir='lora_man_animestyle', base_model_name_or_path=model_default, dtype=torch_dtype).to(device)
|
| 103 |
|
| 104 |
-
#
|
| 105 |
def infer(
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
use_control_net=False, # Параметр для включения ControlNet
|
| 116 |
-
control_mode=None, # Параметр для выбора режима ControlNet
|
| 117 |
-
strength_cn=0.5, # Коэфф. зашумления ControlNet
|
| 118 |
-
control_strength=0.5, # Сила влияния ControlNet
|
| 119 |
-
cn_source_image=None, # Исходное изображение ControlNet
|
| 120 |
-
control_image=None, # Контрольное изображение ControlNet
|
| 121 |
-
use_ip_adapter=False, # Параметр для включения IP_adapter
|
| 122 |
-
ip_adapter_mode=None, # Параметр для выбора режима IP_adapter
|
| 123 |
-
strength_ip=0.5, # Коэфф. зашумления IP_adapter
|
| 124 |
-
ip_adapter_strength=0.5,# Сила влияния IP_adapter
|
| 125 |
-
controlnet_conditioning_scale=0.5, # Сила влияния ControlNet
|
| 126 |
-
ip_source_image=None, # Исходное изображение IP_adapter
|
| 127 |
-
ip_adapter_image=None, # Контрольное изображение IP_adapter
|
| 128 |
-
progress=gr.Progress(track_tqdm=True)
|
| 129 |
):
|
| 130 |
|
| 131 |
-
|
| 132 |
-
if
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
print('ip_adapter_mode = ', ip_adapter_mode)
|
| 137 |
-
|
| 138 |
-
# Инициализация ControlNet
|
| 139 |
-
controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
|
| 140 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
|
| 141 |
-
|
| 142 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 143 |
-
|
| 144 |
-
pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
|
| 145 |
-
model_default,
|
| 146 |
-
controlnet=controlnet,
|
| 147 |
-
torch_dtype=torch_dtype
|
| 148 |
-
).to(device)
|
| 149 |
-
|
| 150 |
-
# Загрузка IP-Adapter
|
| 151 |
-
pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
|
| 152 |
-
pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
|
| 153 |
-
|
| 154 |
-
# Преобразование изображений для IP-Adapter (размер 224x224)
|
| 155 |
-
ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
|
| 156 |
-
ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
|
| 157 |
-
|
| 158 |
-
# Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
|
| 159 |
-
if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
|
| 160 |
-
# Загружаем LoRA для UNet
|
| 161 |
-
pipe_ip_adapter.unet = PeftModel.from_pretrained(
|
| 162 |
-
pipe_ip_adapter.unet,
|
| 163 |
-
'lora_man_animestyle/unet',
|
| 164 |
-
adapter_name="default"
|
| 165 |
-
)
|
| 166 |
-
pipe_ip_adapter.unet.set_adapter("default")
|
| 167 |
-
|
| 168 |
-
# Загружаем LoRA для Text Encoder, если она существует
|
| 169 |
-
text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
|
| 170 |
-
if os.path.exists(text_encoder_lora_path):
|
| 171 |
-
pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
|
| 172 |
-
pipe_ip_adapter.text_encoder,
|
| 173 |
-
text_encoder_lora_path,
|
| 174 |
-
adapter_name="default"
|
| 175 |
-
)
|
| 176 |
-
pipe_ip_adapter.text_encoder.set_adapter("default")
|
| 177 |
-
|
| 178 |
-
# Объединяем LoRA с основной моделью
|
| 179 |
-
pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
|
| 180 |
-
pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
|
| 181 |
-
|
| 182 |
-
# Убедимся, что параметры имеют тип float
|
| 183 |
-
ip_adapter_strength = float(ip_adapter_strength)
|
| 184 |
-
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
|
| 185 |
-
|
| 186 |
-
# Используем IP-Adapter с LoRA
|
| 187 |
-
prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
|
| 188 |
-
negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
|
| 189 |
-
prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
|
| 190 |
-
image = pipe_ip_adapter(
|
| 191 |
-
prompt_embeds=prompt_embeds,
|
| 192 |
-
negative_prompt_embeds=negative_prompt_embeds,
|
| 193 |
-
image=ip_adapter_image,
|
| 194 |
-
ip_adapter_image=ip_source_image,
|
| 195 |
-
strength=strength_ip,
|
| 196 |
-
width=width,
|
| 197 |
-
height=height,
|
| 198 |
-
num_inference_steps=num_inference_steps,
|
| 199 |
-
guidance_scale=guidance_scale,
|
| 200 |
-
controlnet_conditioning_scale=controlnet_conditioning_scale,
|
| 201 |
-
generator=generator,
|
| 202 |
-
).images[0]
|
| 203 |
-
else:
|
| 204 |
-
|
| 205 |
-
if ip_adapter_mode == "edge_detection":
|
| 206 |
-
|
| 207 |
-
print('ip_adapter_mode = ', ip_adapter_mode)
|
| 208 |
-
|
| 209 |
-
# Инициализация ControlNet
|
| 210 |
-
controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
|
| 211 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
|
| 212 |
-
|
| 213 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 214 |
-
|
| 215 |
-
pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
|
| 216 |
-
model_default,
|
| 217 |
-
controlnet=controlnet,
|
| 218 |
-
torch_dtype=torch_dtype
|
| 219 |
-
).to(device)
|
| 220 |
-
|
| 221 |
-
# Загрузка IP-Adapter
|
| 222 |
-
#pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
|
| 223 |
-
pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
|
| 224 |
-
pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
|
| 225 |
-
|
| 226 |
-
# Преобразование изображений для IP-Adapter (размер 224x224)
|
| 227 |
-
ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
|
| 228 |
-
ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
|
| 229 |
-
|
| 230 |
-
# Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
|
| 231 |
-
if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
|
| 232 |
-
# Загружаем LoRA для UNet
|
| 233 |
-
pipe_ip_adapter.unet = PeftModel.from_pretrained(
|
| 234 |
-
pipe_ip_adapter.unet,
|
| 235 |
-
'lora_man_animestyle/unet',
|
| 236 |
-
adapter_name="default"
|
| 237 |
-
)
|
| 238 |
-
pipe_ip_adapter.unet.set_adapter("default")
|
| 239 |
-
|
| 240 |
-
# Загружаем LoRA для Text Encoder, если она существует
|
| 241 |
-
text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
|
| 242 |
-
if os.path.exists(text_encoder_lora_path):
|
| 243 |
-
pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
|
| 244 |
-
pipe_ip_adapter.text_encoder,
|
| 245 |
-
text_encoder_lora_path,
|
| 246 |
-
adapter_name="default"
|
| 247 |
-
)
|
| 248 |
-
pipe_ip_adapter.text_encoder.set_adapter("default")
|
| 249 |
-
|
| 250 |
-
# Объединяем LoRA с основной моделью
|
| 251 |
-
pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
|
| 252 |
-
pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
|
| 253 |
-
|
| 254 |
-
# Убедимся, что параметры имеют тип float
|
| 255 |
-
ip_adapter_strength = float(ip_adapter_strength)
|
| 256 |
-
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
|
| 257 |
-
|
| 258 |
-
# Используем IP-Adapter с LoRA
|
| 259 |
-
prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
|
| 260 |
-
negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
|
| 261 |
-
prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
|
| 262 |
-
|
| 263 |
-
# scale = { # по умолчанию в остальных блоках везде 0.
|
| 264 |
-
# "down": {
|
| 265 |
-
# "block_0": [0.0, 1.0],
|
| 266 |
-
# "block_1": [0.0, 1.0],
|
| 267 |
-
# },
|
| 268 |
-
# "up": {
|
| 269 |
-
# "block_0": [0.0, 1.0, 0.0],
|
| 270 |
-
# "block_1": [0.0, 1.0, 0.0],
|
| 271 |
-
# },
|
| 272 |
-
# }
|
| 273 |
-
# scale = {
|
| 274 |
-
# "down": {"block_2": [0.0, 1.0]},
|
| 275 |
-
# "up": {"block_0": [0.0, 1.0, 0.0]},
|
| 276 |
-
# }
|
| 277 |
-
# pipe_ip_adapter.set_ip_adapter_scale(scale)
|
| 278 |
-
|
| 279 |
-
image = pipe_ip_adapter(
|
| 280 |
-
prompt_embeds=prompt_embeds,
|
| 281 |
-
negative_prompt_embeds=negative_prompt_embeds,
|
| 282 |
-
image=ip_adapter_image,
|
| 283 |
-
ip_adapter_image=ip_source_image,
|
| 284 |
-
strength=strength_ip,
|
| 285 |
-
width=width,
|
| 286 |
-
height=height,
|
| 287 |
-
num_inference_steps=num_inference_steps,
|
| 288 |
-
guidance_scale=guidance_scale,
|
| 289 |
-
controlnet_conditioning_scale=controlnet_conditioning_scale,
|
| 290 |
-
generator=generator,
|
| 291 |
-
).images[0]
|
| 292 |
-
else:
|
| 293 |
-
|
| 294 |
-
if ip_adapter_mode == "depth_map":
|
| 295 |
-
|
| 296 |
-
print('ip_adapter_mode = ', ip_adapter_mode)
|
| 297 |
-
|
| 298 |
-
# Убедимся, что параметры имеют тип float
|
| 299 |
-
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
|
| 300 |
-
|
| 301 |
-
# Инициализация ControlNet
|
| 302 |
-
controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
|
| 303 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
|
| 304 |
-
|
| 305 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 306 |
-
|
| 307 |
-
# Преобразование изображений для IP-Adapter (размер 224x224)
|
| 308 |
-
ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
|
| 309 |
-
ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
|
| 310 |
-
|
| 311 |
-
pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
|
| 312 |
-
model_default,
|
| 313 |
-
controlnet=controlnet,
|
| 314 |
-
torch_dtype=torch_dtype
|
| 315 |
-
).to(device)
|
| 316 |
-
pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME)
|
| 317 |
-
|
| 318 |
-
pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
|
| 319 |
-
image = pipe_ip_adapter(
|
| 320 |
-
prompt=prompt,
|
| 321 |
-
negative_prompt=negative_prompt,
|
| 322 |
-
image=ip_source_image,
|
| 323 |
-
width=width,
|
| 324 |
-
height=height,
|
| 325 |
-
ip_adapter_image=ip_adapter_image,
|
| 326 |
-
num_inference_steps=num_inference_steps,
|
| 327 |
-
strength=strength_ip,
|
| 328 |
-
guidance_scale=guidance_scale,
|
| 329 |
-
controlnet_conditioning_scale=controlnet_conditioning_scale,
|
| 330 |
-
generator=generator,
|
| 331 |
-
).images[0]
|
| 332 |
-
else:
|
| 333 |
-
|
| 334 |
-
if ip_adapter_mode == "face_model":
|
| 335 |
-
|
| 336 |
-
print('ip_adapter_mode = ', ip_adapter_mode)
|
| 337 |
-
|
| 338 |
-
# Преобразование изображений для IP-Adapter (размер 224x224)
|
| 339 |
-
ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
|
| 340 |
-
ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
|
| 341 |
-
|
| 342 |
-
pipe_ip_adapter = StableDiffusionPipeline.from_pretrained(
|
| 343 |
-
model_default,
|
| 344 |
-
torch_dtype=torch_dtype,
|
| 345 |
-
).to(device)
|
| 346 |
-
|
| 347 |
-
pipe_ip_adapter.scheduler = DDIMScheduler.from_config(pipe_ip_adapter.scheduler.config)
|
| 348 |
-
pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
|
| 349 |
-
|
| 350 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 351 |
-
|
| 352 |
-
pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
|
| 353 |
-
image = pipe_ip_adapter(
|
| 354 |
-
prompt=prompt,
|
| 355 |
-
negative_prompt=negative_prompt,
|
| 356 |
-
ip_adapter_image=ip_adapter_image,
|
| 357 |
-
width=width,
|
| 358 |
-
height=height,
|
| 359 |
-
guidance_scale=guidance_scale,
|
| 360 |
-
num_inference_steps=num_inference_steps,
|
| 361 |
-
generator=generator,
|
| 362 |
-
).images[0]
|
| 363 |
-
else:
|
| 364 |
-
# Генерация изображений с ControlNet ----------------------------------------------------------------------------------------------------------------
|
| 365 |
-
|
| 366 |
-
if use_control_net and control_image is not None and cn_source_image is not None:
|
| 367 |
-
|
| 368 |
-
if control_mode == "pose_estimation":
|
| 369 |
-
|
| 370 |
-
print('control_mode = ', control_mode)
|
| 371 |
-
|
| 372 |
-
# Инициализация ControlNet
|
| 373 |
-
controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
|
| 374 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
|
| 375 |
-
|
| 376 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 377 |
-
|
| 378 |
-
pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
| 379 |
-
model_default,
|
| 380 |
-
controlnet=controlnet,
|
| 381 |
-
torch_dtype=torch_dtype
|
| 382 |
-
).to(device)
|
| 383 |
-
|
| 384 |
-
# Преобразуем изображения
|
| 385 |
-
cn_source_image = preprocess_image(cn_source_image, width, height)
|
| 386 |
-
control_image = preprocess_image(control_image, width, height)
|
| 387 |
-
|
| 388 |
-
# Создаём пайплайн ControlNet с LoRA, если он ещё не создан
|
| 389 |
-
if not hasattr(pipe_controlnet, 'lora_loaded') or not pipe_controlnet.lora_loaded:
|
| 390 |
-
# Загружаем LoRA для UNet
|
| 391 |
-
pipe_controlnet.unet = PeftModel.from_pretrained(
|
| 392 |
-
pipe_controlnet.unet,
|
| 393 |
-
'lora_man_animestyle/unet',
|
| 394 |
-
adapter_name="default"
|
| 395 |
-
)
|
| 396 |
-
pipe_controlnet.unet.set_adapter("default")
|
| 397 |
-
|
| 398 |
-
# Загружаем LoRA для Text Encoder, если она существует
|
| 399 |
-
text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
|
| 400 |
-
if os.path.exists(text_encoder_lora_path):
|
| 401 |
-
pipe_controlnet.text_encoder = PeftModel.from_pretrained(
|
| 402 |
-
pipe_controlnet.text_encoder,
|
| 403 |
-
text_encoder_lora_path,
|
| 404 |
-
adapter_name="default"
|
| 405 |
-
)
|
| 406 |
-
pipe_controlnet.text_encoder.set_adapter("default")
|
| 407 |
-
|
| 408 |
-
# Объединяем LoRA с основной моделью
|
| 409 |
-
pipe_controlnet.fuse_lora(lora_scale=lora_scale)
|
| 410 |
-
pipe_controlnet.lora_loaded = True # Помечаем, что LoRA загружена
|
| 411 |
-
|
| 412 |
-
# Убедимся, что control_strength имеет тип float
|
| 413 |
-
control_strength = float(control_strength)
|
| 414 |
-
#strength_sn = float(strength_sn)
|
| 415 |
-
|
| 416 |
-
# Используем ControlNet с LoRA
|
| 417 |
-
prompt_embeds = long_prompt_encoder(prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
|
| 418 |
-
negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
|
| 419 |
-
prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
|
| 420 |
-
image = pipe_controlnet(
|
| 421 |
-
prompt_embeds=prompt_embeds,
|
| 422 |
-
negative_prompt_embeds=negative_prompt_embeds,
|
| 423 |
-
image=cn_source_image,
|
| 424 |
-
control_image=control_image,
|
| 425 |
-
strength=strength_cn,
|
| 426 |
-
width=width,
|
| 427 |
-
height=height,
|
| 428 |
-
num_inference_steps=num_inference_steps,
|
| 429 |
-
guidance_scale=guidance_scale,
|
| 430 |
-
controlnet_conditioning_scale=control_strength,
|
| 431 |
-
generator=generator
|
| 432 |
-
).images[0]
|
| 433 |
-
else:
|
| 434 |
|
| 435 |
-
|
| 436 |
|
| 437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
-
|
| 440 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype, use_safetensors=True)
|
| 441 |
|
| 442 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 443 |
-
|
| 444 |
-
pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
|
| 445 |
-
model_default,
|
| 446 |
-
controlnet=controlnet,
|
| 447 |
-
torch_dtype=torch_dtype,
|
| 448 |
-
use_safetensors=True
|
| 449 |
-
).to(device)
|
| 450 |
-
|
| 451 |
-
pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
|
| 452 |
-
|
| 453 |
-
# Преобразуем изображения
|
| 454 |
-
cn_source_image = preprocess_image(cn_source_image, width, height)
|
| 455 |
-
control_image = preprocess_image(control_image, width, height)
|
| 456 |
-
|
| 457 |
-
image = pipe_controlnet(
|
| 458 |
-
prompt=prompt,
|
| 459 |
-
negative_prompt=negative_prompt,
|
| 460 |
-
image=cn_source_image,
|
| 461 |
-
control_image=control_image,
|
| 462 |
-
strength=strength_cn,
|
| 463 |
-
width=width,
|
| 464 |
-
height=height,
|
| 465 |
-
num_inference_steps=num_inference_steps,
|
| 466 |
-
guidance_scale=guidance_scale,
|
| 467 |
-
controlnet_conditioning_scale=control_strength,
|
| 468 |
-
generator=generator
|
| 469 |
-
).images[0]
|
| 470 |
-
else:
|
| 471 |
-
|
| 472 |
-
if control_mode == "depth_map":
|
| 473 |
-
|
| 474 |
-
print('control_mode = ', control_mode)
|
| 475 |
-
|
| 476 |
-
depth_estimator = pipeline("depth-estimation")
|
| 477 |
-
depth_map = get_depth_map(control_image, depth_estimator).unsqueeze(0).half().to(device)
|
| 478 |
-
|
| 479 |
-
controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
|
| 480 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype, use_safetensors=True)
|
| 481 |
-
|
| 482 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 483 |
-
|
| 484 |
-
pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
| 485 |
-
model_default,
|
| 486 |
-
controlnet=controlnet,
|
| 487 |
-
torch_dtype=torch_dtype,
|
| 488 |
-
use_safetensors=True
|
| 489 |
-
).to(device)
|
| 490 |
-
|
| 491 |
-
pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
|
| 492 |
-
|
| 493 |
-
image = pipe_controlnet(
|
| 494 |
-
prompt=prompt,
|
| 495 |
-
negative_prompt=negative_prompt,
|
| 496 |
-
image=control_image,
|
| 497 |
-
control_image=depth_map,
|
| 498 |
-
width=width,
|
| 499 |
-
height=height,
|
| 500 |
-
num_inference_steps=num_inference_steps,
|
| 501 |
-
guidance_scale=guidance_scale,
|
| 502 |
-
generator=generator
|
| 503 |
-
).images[0]
|
| 504 |
-
else:
|
| 505 |
-
# Генерация изображений с LORA без ControlNet и IP_Adapter ---------------------------------------------------------------------------------------------
|
| 506 |
-
|
| 507 |
-
# Инициализация ControlNet
|
| 508 |
-
controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
|
| 509 |
-
controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
|
| 510 |
-
|
| 511 |
-
generator = torch.Generator(device).manual_seed(seed)
|
| 512 |
-
|
| 513 |
-
if model != model_default:
|
| 514 |
-
pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype).to(device)
|
| 515 |
-
prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
|
| 516 |
-
negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
|
| 517 |
-
prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
|
| 518 |
-
else:
|
| 519 |
-
pipe = pipe_default
|
| 520 |
-
prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
|
| 521 |
-
negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
|
| 522 |
-
prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
|
| 523 |
-
pipe.fuse_lora(lora_scale=lora_scale)
|
| 524 |
-
|
| 525 |
-
params = {
|
| 526 |
-
'prompt_embeds': prompt_embeds,
|
| 527 |
-
'negative_prompt_embeds': negative_prompt_embeds,
|
| 528 |
-
'guidance_scale': guidance_scale,
|
| 529 |
-
'num_inference_steps': num_inference_steps,
|
| 530 |
-
'width': width,
|
| 531 |
-
'height': height,
|
| 532 |
-
'generator': generator,
|
| 533 |
-
}
|
| 534 |
-
|
| 535 |
-
image = pipe(**params).images[0]
|
| 536 |
-
|
| 537 |
-
return image
|
| 538 |
-
# ---------------------------------------------------------------------------------------------------------------------------------------------
|
| 539 |
|
| 540 |
examples = [
|
| 541 |
-
"
|
| 542 |
-
|
| 543 |
-
"The smiling man. His face and hands are visible. Anime style. The best quality.",
|
| 544 |
-
"The smiling girl. Anime style. Best quality, high quality.",
|
| 545 |
-
"lego batman and robin. Rich and vibrant colors.",
|
| 546 |
-
"A photo of Pushkin as a hockey player in uniform with a stick, playing hockey on the ice arena in the NHL and scoring a goal.",
|
| 547 |
-
]
|
| 548 |
|
| 549 |
examples_negative = [
|
| 550 |
-
"
|
| 551 |
-
"Monochrome, lowres, bad anatomy, worst quality, low quality",
|
| 552 |
-
"lowres, bad anatomy, worst quality, low quality, black and white image.",
|
| 553 |
]
|
| 554 |
|
| 555 |
css = """
|
|
@@ -564,67 +74,54 @@ available_models = [
|
|
| 564 |
"CompVis/stable-diffusion-v1-4",
|
| 565 |
]
|
| 566 |
|
| 567 |
-
# -------------------------------------------------------------------------------------------------------------------------------------------------
|
| 568 |
with gr.Blocks(css=css) as demo:
|
|
|
|
| 569 |
with gr.Column(elem_id="col-container"):
|
| 570 |
gr.Markdown(" # Text-to-Image Gradio Template from V. Gorsky")
|
| 571 |
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
prompt = gr.Textbox(
|
| 581 |
label="Prompt",
|
|
|
|
| 582 |
max_lines=1,
|
| 583 |
placeholder="Enter your prompt",
|
|
|
|
| 584 |
)
|
| 585 |
|
| 586 |
-
negative_prompt = gr.
|
| 587 |
label="Negative prompt",
|
| 588 |
max_lines=1,
|
| 589 |
placeholder="Enter a negative prompt",
|
|
|
|
| 590 |
)
|
| 591 |
-
|
| 592 |
-
with gr.Row():
|
| 593 |
-
lora_scale = gr.Slider(
|
| 594 |
-
label="LoRA scale",
|
| 595 |
-
minimum=0.0,
|
| 596 |
-
maximum=1.0,
|
| 597 |
-
step=0.01,
|
| 598 |
-
value=0.7,
|
| 599 |
-
)
|
| 600 |
-
|
| 601 |
-
with gr.Row():
|
| 602 |
-
guidance_scale = gr.Slider(
|
| 603 |
-
label="Guidance scale",
|
| 604 |
-
minimum=0.0,
|
| 605 |
-
maximum=10.0,
|
| 606 |
-
step=0.01,
|
| 607 |
-
value=7.5,
|
| 608 |
-
)
|
| 609 |
|
| 610 |
-
|
| 611 |
-
seed = gr.Slider(
|
| 612 |
label="Seed",
|
| 613 |
minimum=0,
|
| 614 |
maximum=MAX_SEED,
|
| 615 |
step=1,
|
| 616 |
-
value=
|
| 617 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
|
| 619 |
-
with gr.Row():
|
| 620 |
-
num_inference_steps = gr.Slider(
|
| 621 |
-
label="Number of inference steps",
|
| 622 |
-
minimum=1,
|
| 623 |
-
maximum=100,
|
| 624 |
-
step=1,
|
| 625 |
-
value=50,
|
| 626 |
-
)
|
| 627 |
-
|
| 628 |
with gr.Accordion("Advanced Settings", open=False):
|
| 629 |
with gr.Row():
|
| 630 |
width = gr.Slider(
|
|
@@ -632,160 +129,39 @@ with gr.Blocks(css=css) as demo:
|
|
| 632 |
minimum=256,
|
| 633 |
maximum=MAX_IMAGE_SIZE,
|
| 634 |
step=32,
|
| 635 |
-
value=512,
|
| 636 |
)
|
| 637 |
-
|
| 638 |
-
with gr.Row():
|
| 639 |
height = gr.Slider(
|
| 640 |
label="Height",
|
| 641 |
minimum=256,
|
| 642 |
maximum=MAX_IMAGE_SIZE,
|
| 643 |
step=32,
|
| 644 |
-
value=512,
|
| 645 |
)
|
| 646 |
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
with gr.Row():
|
| 650 |
-
use_control_net = gr.Checkbox(
|
| 651 |
-
label="Use ControlNet",
|
| 652 |
-
value=False,
|
| 653 |
-
)
|
| 654 |
-
|
| 655 |
-
with gr.Column(visible=False) as control_net_options:
|
| 656 |
-
strength_cn = gr.Slider(
|
| 657 |
-
label="Strength",
|
| 658 |
-
minimum=0.0,
|
| 659 |
-
maximum=1.0,
|
| 660 |
-
value=0.5,
|
| 661 |
-
step=0.01,
|
| 662 |
-
interactive=True,
|
| 663 |
-
)
|
| 664 |
-
|
| 665 |
-
control_strength = gr.Slider(
|
| 666 |
-
label="Control Strength",
|
| 667 |
-
minimum=0.0,
|
| 668 |
-
maximum=1.0,
|
| 669 |
-
value=0.5,
|
| 670 |
-
step=0.01,
|
| 671 |
-
interactive=True,
|
| 672 |
-
)
|
| 673 |
-
|
| 674 |
-
control_mode = gr.Dropdown(
|
| 675 |
-
label="Control Mode",
|
| 676 |
-
choices=[
|
| 677 |
-
"pose_estimation",
|
| 678 |
-
"edge_detection",
|
| 679 |
-
"depth_map",
|
| 680 |
-
],
|
| 681 |
-
value="pose_estimation",
|
| 682 |
-
interactive=True,
|
| 683 |
-
)
|
| 684 |
-
|
| 685 |
-
cn_source_image = gr.Image(label="Upload Source Image")
|
| 686 |
-
|
| 687 |
-
control_image = gr.Image(label="Upload Control Net Image")
|
| 688 |
-
|
| 689 |
-
use_control_net.change(
|
| 690 |
-
fn=lambda x: gr.update(visible=x),
|
| 691 |
-
inputs=use_control_net,
|
| 692 |
-
outputs=control_net_options
|
| 693 |
-
)
|
| 694 |
-
|
| 695 |
-
# IP_Adapter ------------------------------------------------------------------------------------------------
|
| 696 |
-
with gr.Blocks():
|
| 697 |
-
with gr.Row():
|
| 698 |
-
use_ip_adapter = gr.Checkbox(
|
| 699 |
-
label="Use IP_Adapter",
|
| 700 |
-
value=False,
|
| 701 |
-
)
|
| 702 |
-
|
| 703 |
-
with gr.Column(visible=False) as ip_adapter_options:
|
| 704 |
-
strength_ip = gr.Slider(
|
| 705 |
-
label="Strength",
|
| 706 |
-
minimum=0.0,
|
| 707 |
-
maximum=1.0,
|
| 708 |
-
value=0.5,
|
| 709 |
-
step=0.01,
|
| 710 |
-
interactive=True,
|
| 711 |
-
)
|
| 712 |
-
|
| 713 |
-
ip_adapter_strength = gr.Slider(
|
| 714 |
-
label="IP_Adapter Strength",
|
| 715 |
-
minimum=0.0,
|
| 716 |
-
maximum=1.0,
|
| 717 |
-
value=0.5,
|
| 718 |
-
step=0.01,
|
| 719 |
-
interactive=True,
|
| 720 |
-
)
|
| 721 |
-
|
| 722 |
-
controlnet_conditioning_scale = gr.Slider(
|
| 723 |
-
label="Controlnet conditioning scale",
|
| 724 |
-
minimum=0.0,
|
| 725 |
-
maximum=1.0,
|
| 726 |
-
value=0.5,
|
| 727 |
-
step=0.01,
|
| 728 |
-
interactive=True,
|
| 729 |
-
)
|
| 730 |
-
|
| 731 |
-
ip_adapter_mode = gr.Dropdown(
|
| 732 |
-
label="Ip_Adapter Mode",
|
| 733 |
-
choices=[
|
| 734 |
-
"pose_estimation",
|
| 735 |
-
"edge_detection",
|
| 736 |
-
"depth_map",
|
| 737 |
-
"face_model"
|
| 738 |
-
],
|
| 739 |
-
value="pose_estimation",
|
| 740 |
-
interactive=True,
|
| 741 |
-
)
|
| 742 |
-
|
| 743 |
-
ip_source_image = gr.Image(label="Upload Source Image")
|
| 744 |
-
|
| 745 |
-
ip_adapter_image = gr.Image(label="Upload IP_Adapter Image")
|
| 746 |
-
|
| 747 |
-
use_ip_adapter.change(
|
| 748 |
-
fn=lambda x: gr.update(visible=x),
|
| 749 |
-
inputs=use_ip_adapter,
|
| 750 |
-
outputs=ip_adapter_options
|
| 751 |
-
)
|
| 752 |
-
# ---------------------------------------------------------------------------------------------------------
|
| 753 |
|
| 754 |
-
gr.
|
| 755 |
-
gr.
|
| 756 |
|
| 757 |
-
run_button = gr.Button("Run", scale=1, variant="primary")
|
| 758 |
-
result = gr.Image(label="Result", show_label=False)
|
| 759 |
-
|
| 760 |
gr.on(
|
| 761 |
triggers=[run_button.click, prompt.submit],
|
| 762 |
fn=infer,
|
| 763 |
inputs=[
|
|
|
|
| 764 |
prompt,
|
| 765 |
negative_prompt,
|
| 766 |
-
|
| 767 |
width,
|
| 768 |
height,
|
| 769 |
-
num_inference_steps,
|
| 770 |
-
seed,
|
| 771 |
guidance_scale,
|
| 772 |
-
|
| 773 |
-
use_control_net, # Параметр для включения ControlNet
|
| 774 |
-
control_mode, # Параметр для выбора режима ControlNet
|
| 775 |
-
strength_cn, # Коэфф. зашумления ControlNet
|
| 776 |
-
control_strength, # Сила влияния ControlNet
|
| 777 |
-
cn_source_image, # Исходное изображение ControlNet
|
| 778 |
-
control_image, # Контрольное изображение ControlNet
|
| 779 |
-
use_ip_adapter, # Параметр для включения IP_adapter
|
| 780 |
-
ip_adapter_mode, # Параметр для выбора режима IP_adapter
|
| 781 |
-
strength_ip, # Коэфф. зашумления IP_adapter
|
| 782 |
-
ip_adapter_strength,# Сила влияния IP_adapter
|
| 783 |
-
controlnet_conditioning_scale, # Сила влияния ControlNet
|
| 784 |
-
ip_source_image, # Исходное изображение IP_adapter
|
| 785 |
-
ip_adapter_image, # Контрольное изображение IP_adapter
|
| 786 |
],
|
| 787 |
-
outputs=[result],
|
| 788 |
)
|
| 789 |
|
| 790 |
if __name__ == "__main__":
|
| 791 |
demo.launch()
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
+
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
from diffusers import DiffusionPipeline
|
| 6 |
+
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
if torch.cuda.is_available():
|
| 12 |
+
torch_dtype = torch.float16
|
| 13 |
+
else:
|
| 14 |
+
torch_dtype = torch.float32
|
| 15 |
|
| 16 |
+
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
| 17 |
+
pipe = pipe.to(device)
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 20 |
+
MAX_IMAGE_SIZE = 1024
|
|
|
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# @spaces.GPU #[uncomment to use ZeroGPU]
|
| 24 |
def infer(
|
| 25 |
+
model,
|
| 26 |
+
prompt,
|
| 27 |
+
negative_prompt,
|
| 28 |
+
seed,
|
| 29 |
+
width,
|
| 30 |
+
height,
|
| 31 |
+
guidance_scale,
|
| 32 |
+
num_inference_steps,
|
| 33 |
+
progress=gr.Progress(track_tqdm=True),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
):
|
| 35 |
|
| 36 |
+
global model_repo_id
|
| 37 |
+
if model != model_repo_id:
|
| 38 |
+
print(model, model_repo_id)
|
| 39 |
+
pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype)
|
| 40 |
+
pipe = pipe.to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
generator = torch.Generator().manual_seed(seed)
|
| 43 |
|
| 44 |
+
image = pipe(
|
| 45 |
+
prompt=prompt,
|
| 46 |
+
negative_prompt=negative_prompt,
|
| 47 |
+
guidance_scale=guidance_scale,
|
| 48 |
+
num_inference_steps=num_inference_steps,
|
| 49 |
+
width=width,
|
| 50 |
+
height=height,
|
| 51 |
+
generator=generator,
|
| 52 |
+
).images[0]
|
| 53 |
|
| 54 |
+
return image, seed
|
|
|
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
examples = [
|
| 58 |
+
"Young man in anime style. The image is of high sharpness and resolution. A handsome, thoughtful man. The man is depicted in the foreground, close-up or middle plan. The background is blurry, not sharp. The play of light and shadow is visible on the face and clothes."
|
| 59 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
examples_negative = [
|
| 62 |
+
"blurred details, low resolution, poor image of a man's face, poor quality, artifacts, black and white image"
|
|
|
|
|
|
|
| 63 |
]
|
| 64 |
|
| 65 |
css = """
|
|
|
|
| 74 |
"CompVis/stable-diffusion-v1-4",
|
| 75 |
]
|
| 76 |
|
|
|
|
| 77 |
with gr.Blocks(css=css) as demo:
|
| 78 |
+
|
| 79 |
with gr.Column(elem_id="col-container"):
|
| 80 |
gr.Markdown(" # Text-to-Image Gradio Template from V. Gorsky")
|
| 81 |
|
| 82 |
+
model = gr.Dropdown(
|
| 83 |
+
label="Model Selection",
|
| 84 |
+
choices=available_models,
|
| 85 |
+
value="stable-diffusion-v1-5/stable-diffusion-v1-5",
|
| 86 |
+
interactive=True
|
| 87 |
+
)
|
| 88 |
+
prompt = gr.Text(
|
|
|
|
|
|
|
| 89 |
label="Prompt",
|
| 90 |
+
show_label=False,
|
| 91 |
max_lines=1,
|
| 92 |
placeholder="Enter your prompt",
|
| 93 |
+
container=False,
|
| 94 |
)
|
| 95 |
|
| 96 |
+
negative_prompt = gr.Text(
|
| 97 |
label="Negative prompt",
|
| 98 |
max_lines=1,
|
| 99 |
placeholder="Enter a negative prompt",
|
| 100 |
+
visible=True,
|
| 101 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
+
seed = gr.Slider(
|
|
|
|
| 104 |
label="Seed",
|
| 105 |
minimum=0,
|
| 106 |
maximum=MAX_SEED,
|
| 107 |
step=1,
|
| 108 |
+
value=0,
|
| 109 |
+
)
|
| 110 |
+
guidance_scale = gr.Slider(
|
| 111 |
+
label="Guidance scale",
|
| 112 |
+
minimum=0.0,
|
| 113 |
+
maximum=10.0,
|
| 114 |
+
step=0.1,
|
| 115 |
+
value=7.5, # Replace with defaults that work for your model
|
| 116 |
+
)
|
| 117 |
+
num_inference_steps = gr.Slider(
|
| 118 |
+
label="Number of inference steps",
|
| 119 |
+
minimum=1,
|
| 120 |
+
maximum=100,
|
| 121 |
+
step=1,
|
| 122 |
+
value=30, # Replace with defaults that work for your model
|
| 123 |
+
)
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
with gr.Accordion("Advanced Settings", open=False):
|
| 126 |
with gr.Row():
|
| 127 |
width = gr.Slider(
|
|
|
|
| 129 |
minimum=256,
|
| 130 |
maximum=MAX_IMAGE_SIZE,
|
| 131 |
step=32,
|
| 132 |
+
value=512, # Replace with defaults that work for your model
|
| 133 |
)
|
| 134 |
+
|
|
|
|
| 135 |
height = gr.Slider(
|
| 136 |
label="Height",
|
| 137 |
minimum=256,
|
| 138 |
maximum=MAX_IMAGE_SIZE,
|
| 139 |
step=32,
|
| 140 |
+
value=512, # Replace with defaults that work for your model
|
| 141 |
)
|
| 142 |
|
| 143 |
+
gr.Examples(examples=examples, inputs=[prompt])
|
| 144 |
+
gr.Examples(examples=examples_negative, inputs=[negative_prompt])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
run_button = gr.Button("Run", scale=0, variant="primary")
|
| 147 |
+
result = gr.Image(label="Result", show_label=False)
|
| 148 |
|
|
|
|
|
|
|
|
|
|
| 149 |
gr.on(
|
| 150 |
triggers=[run_button.click, prompt.submit],
|
| 151 |
fn=infer,
|
| 152 |
inputs=[
|
| 153 |
+
model,
|
| 154 |
prompt,
|
| 155 |
negative_prompt,
|
| 156 |
+
seed,
|
| 157 |
width,
|
| 158 |
height,
|
|
|
|
|
|
|
| 159 |
guidance_scale,
|
| 160 |
+
num_inference_steps,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
],
|
| 162 |
+
outputs=[result, seed],
|
| 163 |
)
|
| 164 |
|
| 165 |
if __name__ == "__main__":
|
| 166 |
demo.launch()
|
| 167 |
+
|