Spaces:

dezzman
/

diffusion_models

Running

App Files Files Community

diffusion_models / app.py

dezzman

Update app.py

dcd37d2 verified 11 months ago

raw

history blame

11.6 kB

	import gradio as gr
	import numpy as np
	import torch
	from diffusers.utils import load_image, make_image_grid
	from diffusers import (
	StableDiffusionPipeline,
	StableDiffusionControlNetPipeline,
	ControlNetModel
	)
	from peft import PeftModel, LoraConfig
	from controlnet_aux import HEDdetector
	from PIL import Image
	import cv2 as cv
	import os


	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1024
	IP_ADAPTER = 'h94/IP-Adapter'
	IP_ADAPTER_WEIGHT_NAME = "ip-adapter-plus_sd15.bin"

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model_id_default = "CompVis/stable-diffusion-v1-4"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	hed = None
	dict_controlnet = {
	"edge_detection": "lllyasviel/sd-controlnet-canny",
	# "pose_estimation": "lllyasviel/sd-controlnet-openpose",
	# "depth_map": "lllyasviel/sd-controlnet-depth",
	"scribble": "lllyasviel/sd-controlnet-scribble",
	# "MLSD": "lllyasviel/sd-controlnet-mlsd"
	}

	controlnet = ControlNetModel.from_pretrained(
	dict_controlnet["edge_detection"],
	cache_dir="./models_cache",
	torch_dtype=torch_dtype,
	)


	def get_lora_sd_pipeline(
	ckpt_dir='./lora_logos',
	base_model_name_or_path=None,
	dtype=torch.float16,
	adapter_name="default",
	controlnet=None
	):

	unet_sub_dir = os.path.join(ckpt_dir, "unet")
	text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")

	if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
	config = LoraConfig.from_pretrained(text_encoder_sub_dir)
	base_model_name_or_path = config.base_model_name_or_path

	if base_model_name_or_path is None:
	raise ValueError("Please specify the base model name or path")

	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	base_model_name_or_path,
	torch_dtype=dtype,
	controlnet=controlnet,
	)

	before_params = pipe.unet.parameters()
	pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
	pipe.unet.set_adapter(adapter_name)
	after_params = pipe.unet.parameters()
	print("Parameters changed:", any(torch.any(b != a) for b, a in zip(before_params, after_params)))

	if os.path.exists(text_encoder_sub_dir):
	pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)

	if dtype in (torch.float16, torch.bfloat16):
	pipe.unet.half()
	pipe.text_encoder.half()

	return pipe

	def process_prompt(prompt, tokenizer, text_encoder, max_length=77):
	tokens = tokenizer(prompt, truncation=False, return_tensors="pt")["input_ids"]
	chunks = [tokens[:, i:i + max_length] for i in range(0, tokens.shape[1], max_length)]

	with torch.no_grad():
	embeds = [text_encoder(chunk.to(text_encoder.device))[0] for chunk in chunks]

	return torch.cat(embeds, dim=1)

	def align_embeddings(prompt_embeds, negative_prompt_embeds):
	max_length = max(prompt_embeds.shape[1], negative_prompt_embeds.shape[1])
	return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
	torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))

	def map_edge_detection(image_path: str) -> Image:
	source_img = load_image(image_path).convert('RGB')
	edges = cv.Canny(np.array(source_img), 80, 160)
	edges = np.repeat(edges[:, :, None], 3, axis=2)
	final_image = Image.fromarray(edges)
	return final_image

	def map_scribble(image_path: str) -> Image:
	global hed
	if not hed:
	hed = HEDdetector.from_pretrained('lllyasviel/Annotators')

	image = load_image(image_path).convert('RGB')
	scribble_image = hed(image)
	image_np = np.array(scribble_image)
	image_np = cv.medianBlur(image_np, 3)
	image = cv.convertScaleAbs(image_np, alpha=1.5, beta=0)
	final_image = Image.fromarray(image)
	return final_image



	pipe = get_lora_sd_pipeline(
	ckpt_dir='./lora_logos',
	base_model_name_or_path=model_id_default,
	dtype=torch_dtype,
	controlnet=controlnet
	).to(device)



	def infer(
	prompt,
	negative_prompt,
	width=512,
	height=512,
	num_inference_steps=20,
	model_id='CompVis/stable-diffusion-v1-4',
	seed=42,
	guidance_scale=7.0,
	lora_scale=0.5,
	cn_enable=False,
	cn_strength=0.0,
	cn_mode='edge_detection',
	cn_image=None,
	ip_enable=False,
	ip_scale=0.5,
	ip_image=None,
	progress=gr.Progress(track_tqdm=True)
	):

	generator = torch.Generator(device).manual_seed(seed)

	global pipe
	global controlnet

	controlnet_changed = False

	if cn_enable:
	if dict_controlnet[cn_mode] != pipe.controlnet._name_or_path:
	controlnet = ControlNetModel.from_pretrained(
	dict_controlnet[cn_mode],
	cache_dir="./models_cache",
	torch_dtype=torch_dtype
	)
	controlnet_changed = True
	else:
	cn_strength = 0.0 # отключаем контролнет принудительно

	if model_id != pipe._name_or_path:
	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	controlnet=controlnet,
	controlnet_conditioning_scale=cn_strength,
	).to(device)
	elif (model_id == pipe._name_or_path) and controlnet_changed:
	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	controlnet=controlnet,
	controlnet_conditioning_scale=cn_strength,
	).to(device)
	print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
	print(f"LoRA scale applied: {lora_scale}")
	pipe.fuse_lora(lora_scale=lora_scale)
	elif (model_id == pipe._name_or_path) and not controlnet_changed:
	print(f"LoRA adapter loaded: {pipe.unet.active_adapters}")
	print(f"LoRA scale applied: {lora_scale}")
	pipe.fuse_lora(lora_scale=lora_scale)

	prompt_embeds = process_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
	negative_prompt_embeds = process_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
	prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)

	params = {
	'prompt_embeds': prompt_embeds,
	'negative_prompt_embeds': negative_prompt_embeds,
	'guidance_scale': guidance_scale,
	'num_inference_steps': num_inference_steps,
	'width': width,
	'height': height,
	'generator': generator,
	}

	if cn_enable:
	params['controlnet_conditioning_scale'] = cn_strength
	if cn_mode == 'edge_detection':
	control_image = map_edge_detection(cn_image)
	print(type(control_image))
	elif cn_mode == 'scribble':
	control_image = map_scribble(cn_image)
	params['control_image'] = control_image

	if ip_enable:
	pipe.load_ip_adapter(
	IP_ADAPTER,
	subfolder="models",
	weight_name=IP_ADAPTER_WEIGHT_NAME,
	)
	params['ip_adapter_image'] = load_image(ip_image).convert('RGB')
	pipe.ip_scale(0.6)

	return pipe(**params).images[0]

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 640px;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown(" # DEMO Text-to-Image")

	with gr.Row():
	model_id = gr.Textbox(
	label="Model ID",
	max_lines=1,
	placeholder="Enter model id like 'CompVis/stable-diffusion-v1-4'",
	value=model_id_default
	)

	prompt = gr.Textbox(
	label="Prompt",
	max_lines=1,
	placeholder="Enter your prompt",
	)

	negative_prompt = gr.Textbox(
	label="Negative prompt",
	max_lines=1,
	placeholder="Enter a negative prompt",
	)

	with gr.Row():
	seed = gr.Number(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=7.0,
	)

	with gr.Row():
	lora_scale = gr.Slider(
	label="LoRA scale",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.5,
	)

	with gr.Row():
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=20,
	)

	# Секция Control Net
	cn_enable = gr.Checkbox(label="Enable ControlNet")
	with gr.Column(visible=False) as cn_options:
	with gr.Row():
	cn_strength = gr.Slider(0, 2, value=0.8, step=0.1, label="Control strength", interactive=True)
	cn_mode = gr.Dropdown(
	choices=["edge_detection", "scribble"],
	value="edge_detection",
	label="Work regime",
	interactive=True,
	)
	cn_image = gr.Image(type="filepath", label="Control image")

	cn_enable.change(
	lambda x: gr.update(visible=x),
	inputs=cn_enable,
	outputs=cn_options
	)

	# Секция IP-Adapter
	ip_enable = gr.Checkbox(label="Enable IP-Adapter")
	with gr.Column(visible=False) as ip_options:
	ip_scale = gr.Slider(0, 1, value=0.5, step=0.1, label="IP-adapter scale", interactive=True)
	ip_image = gr.Image(type="filepath", label="IP-adapter image", interactive=True)

	ip_enable.change(
	lambda x: gr.update(visible=x),
	inputs=ip_enable,
	outputs=ip_options
	)

	with gr.Accordion("Optional Settings", open=False):
	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=512,
	)

	with gr.Row():
	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=512,
	)

	run_button = gr.Button("Run", scale=1, variant="primary")
	result = gr.Image(label="Result", show_label=False)

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn=infer,
	inputs=[
	prompt,
	negative_prompt,
	width,
	height,
	num_inference_steps,
	model_id,
	seed,
	guidance_scale,
	lora_scale,
	cn_enable,
	cn_strength,
	cn_mode,
	cn_image,
	ip_enable,
	ip_scale,
	ip_image
	],
	outputs=[result],
	)

	if __name__ == "__main__":
	demo.launch()