import torch from PIL.Image import Image from diffusers import StableDiffusionXLPipeline from pipelines.models import TextToImageRequest from diffusers import DDIMScheduler from torch import Generator from loss import SchedulerWrapper from onediffx import compile_pipe, save_pipe, load_pipe class Quantization: def __init__(self, bits=1): self.bits = bits self.qmin = -(2**(bits-1)) self.qmax = 2**(bits-1) - 1 def quantize_tensor(self, tensor): scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin) zero_point = self.qmin - torch.round(tensor.min() / scale) qtensor = torch.round(tensor / scale + zero_point) qtensor = torch.clamp(qtensor, self.qmin, self.qmax) tensor_q = (qtensor - zero_point) * scale return tensor_q, scale, zero_point class SDXLQuantization: def __init__(self, model, bit_number=16): self.model = model self.quant = Quantization(bit_number) def quantize_model(self, save_name=None): for name, module in self.model.named_modules(): if isinstance(module, (torch.nn.Linear)): if hasattr(module, 'weight'): quantized_weight, _, _ = self.quant.quantize_tensor(module.weight) module.weight = torch.nn.Parameter(quantized_weight) if hasattr(module, 'bias') and module.bias is not None: quantized_bias, _, _ = self.quant.quantize_tensor(module.bias) module.bias = torch.nn.Parameter(quantized_bias) def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs): if step_index == int(pipe.num_timesteps * 0.78): callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1] callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1] callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1] pipe._guidance_scale = 0.1 return callback_kwargs def load_pipeline(pipeline=None) -> StableDiffusionXLPipeline: if not pipeline: pipeline = StableDiffusionXLPipeline.from_pretrained( "stablediffusionapi/newdream-sdxl-20", torch_dtype=torch.float16, ).to("cuda") quantizer = SDXLQuantization(pipeline.unet, 14) quantizer.quantize_model() pipeline.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipeline.scheduler.config)) pipeline = compile_pipe(pipeline) load_pipe(pipeline, dir="/home/sandbox/.cache/huggingface/hub/models--RobertML--cached-pipe-02/snapshots/58d70deae87034cce351b780b48841f9746d4ad7") for _ in range(1): deepcache_output = pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20) pipeline.scheduler.prepare_loss() for _ in range(2): pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20) return pipeline def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image: if request.seed is None: generator = None else: generator = Generator(pipeline.device).manual_seed(request.seed) return pipeline( prompt=request.prompt, negative_prompt=request.negative_prompt, width=request.width, height=request.height, generator=generator, num_inference_steps=18, cache_interval=1, cache_layer_id=1, cache_block_id=0, eta=1.0, guidance_scale = 5.0, guidance_rescale = 0.0, callback_on_step_end=callback_dynamic_cfg, callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'], ).images[0]