experiments_sdxl / src /pipeline.py
manbeast3b's picture
Upload folder using huggingface_hub
9a52394 verified
import torch
from PIL.Image import Image
from diffusers import StableDiffusionXLPipeline
from pipelines.models import TextToImageRequest
from diffusers import DDIMScheduler
from torch import Generator
from loss import SchedulerWrapper
from onediffx import compile_pipe, save_pipe, load_pipe
class Quantization:
def __init__(self, bits=1):
self.bits = bits
self.qmin = -(2**(bits-1))
self.qmax = 2**(bits-1) - 1
def quantize_tensor(self, tensor):
scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin)
zero_point = self.qmin - torch.round(tensor.min() / scale)
qtensor = torch.round(tensor / scale + zero_point)
qtensor = torch.clamp(qtensor, self.qmin, self.qmax)
tensor_q = (qtensor - zero_point) * scale
return tensor_q, scale, zero_point
class SDXLQuantization:
def __init__(self, model, bit_number=16):
self.model = model
self.quant = Quantization(bit_number)
def quantize_model(self, save_name=None):
for name, module in self.model.named_modules():
if isinstance(module, (torch.nn.Linear)):
if hasattr(module, 'weight'):
quantized_weight, _, _ = self.quant.quantize_tensor(module.weight)
module.weight = torch.nn.Parameter(quantized_weight)
if hasattr(module, 'bias') and module.bias is not None:
quantized_bias, _, _ = self.quant.quantize_tensor(module.bias)
module.bias = torch.nn.Parameter(quantized_bias)
def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs):
if step_index == int(pipe.num_timesteps * 0.78):
callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1]
callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1]
callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1]
pipe._guidance_scale = 0.1
return callback_kwargs
def load_pipeline(pipeline=None) -> StableDiffusionXLPipeline:
if not pipeline:
pipeline = StableDiffusionXLPipeline.from_pretrained(
"stablediffusionapi/newdream-sdxl-20",
torch_dtype=torch.float16,
).to("cuda")
quantizer = SDXLQuantization(pipeline.unet, 14)
quantizer.quantize_model()
pipeline.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipeline.scheduler.config))
pipeline = compile_pipe(pipeline)
load_pipe(pipeline, dir="/home/sandbox/.cache/huggingface/hub/models--RobertML--cached-pipe-02/snapshots/58d70deae87034cce351b780b48841f9746d4ad7")
for _ in range(1):
deepcache_output = pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
pipeline.scheduler.prepare_loss()
for _ in range(2):
pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
return pipeline
def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
if request.seed is None:
generator = None
else:
generator = Generator(pipeline.device).manual_seed(request.seed)
return pipeline(
prompt=request.prompt,
negative_prompt=request.negative_prompt,
width=request.width,
height=request.height,
generator=generator,
num_inference_steps=18,
cache_interval=1,
cache_layer_id=1,
cache_block_id=0,
eta=1.0,
guidance_scale = 5.0,
guidance_rescale = 0.0,
callback_on_step_end=callback_dynamic_cfg,
callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
).images[0]