Update src/pipeline.py

e1d7c23 verified over 1 year ago

4.17 kB

	import torch
	from PIL.Image import Image
	from diffusers import StableDiffusionXLPipeline
	import torch.nn as nn
	from pipelines.models import TextToImageRequest
	from diffusers import DDIMScheduler
	from torch import Generator
	from loss import SchedulerWrapper
	from onediffx import compile_pipe, save_pipe, load_pipe



	class BasicQuantization:
	def __init__(self, bits=1):
	self.bits = bits
	self.qmin = -(2**(bits-1))
	self.qmax = 2**(bits-1) - 1

	def quantize_tensor(self, tensor):
	scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin)
	zero_point = self.qmin - torch.round(tensor.min() / scale)

	# Quantize
	qtensor = torch.round(tensor / scale + zero_point)
	qtensor = torch.clamp(qtensor, self.qmin, self.qmax)

	# Dequantize
	tensor_q = (qtensor - zero_point) * scale
	return tensor_q, scale, zero_point


	class SDXLQuantization:
	def __init__(self, model, bit_number=6):
	self.model = model
	self.quant = BasicQuantization(bit_number)

	def quantize_model(self, save_name='quantized_layers.pth'):
	quantized_layers_state = {}
	for name, module in self.model.named_modules():
	used = False
	if isinstance(module, (nn.Linear)): # nn.Conv2d
	# Quantize weights
	if hasattr(module, 'weight'):
	quantized_weight, _, _ = self.quant.quantize_tensor(module.weight)
	module.weight = torch.nn.Parameter(quantized_weight)

	# Quantize bias (if applicable)
	if hasattr(module, 'bias') and module.bias is not None:
	quantized_bias, _, _ = self.quant.quantize_tensor(module.bias)
	module.bias = torch.nn.Parameter(quantized_bias)



	def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs):
	if step_index == int(pipe.num_timesteps * 0.78):
	callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1]
	callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1]
	callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1]
	pipe._guidance_scale = 0.1

	return callback_kwargs

	def load_pipeline(pipeline=None) -> StableDiffusionXLPipeline:
	if not pipeline:
	pipeline = StableDiffusionXLPipeline.from_pretrained(
	"stablediffusionapi/newdream-sdxl-20",
	torch_dtype=torch.float16,
	).to("cuda")

	quantizer = SDXLQuantization(pipeline.unet, 14)
	quantizer.quantize_model()
	pipeline.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipeline.scheduler.config))

	pipeline = compile_pipe(pipeline)
	load_pipe(pipeline, dir="/home/sandbox/.cache/huggingface/hub/models--RobertML--cached-pipe-02/snapshots/58d70deae87034cce351b780b48841f9746d4ad7")

	for _ in range(1):
	deepcache_output = pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
	pipeline.scheduler.prepare_loss()
	for _ in range(2):
	pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
	return pipeline

	def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
	if request.seed is None:
	generator = None
	else:
	generator = Generator(pipeline.device).manual_seed(request.seed)

	return pipeline(
	prompt=request.prompt,
	negative_prompt=request.negative_prompt,
	width=request.width,
	height=request.height,
	generator=generator,
	num_inference_steps=17,
	cache_interval=1,
	cache_layer_id=1,
	cache_block_id=0,
	eta=1.0,
	guidance_scale = 5.0,
	guidance_rescale = 0.0,
	callback_on_step_end=callback_dynamic_cfg,
	callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
	).images[0]