senseicashpls2 / src /pipeline.py
manbeast3b's picture
Update src/pipeline.py
e1d7c23 verified
import torch
from PIL.Image import Image
from diffusers import StableDiffusionXLPipeline
import torch.nn as nn
from pipelines.models import TextToImageRequest
from diffusers import DDIMScheduler
from torch import Generator
from loss import SchedulerWrapper
from onediffx import compile_pipe, save_pipe, load_pipe
class BasicQuantization:
def __init__(self, bits=1):
self.bits = bits
self.qmin = -(2**(bits-1))
self.qmax = 2**(bits-1) - 1
def quantize_tensor(self, tensor):
scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin)
zero_point = self.qmin - torch.round(tensor.min() / scale)
# Quantize
qtensor = torch.round(tensor / scale + zero_point)
qtensor = torch.clamp(qtensor, self.qmin, self.qmax)
# Dequantize
tensor_q = (qtensor - zero_point) * scale
return tensor_q, scale, zero_point
class SDXLQuantization:
def __init__(self, model, bit_number=6):
self.model = model
self.quant = BasicQuantization(bit_number)
def quantize_model(self, save_name='quantized_layers.pth'):
quantized_layers_state = {}
for name, module in self.model.named_modules():
used = False
if isinstance(module, (nn.Linear)): # nn.Conv2d
# Quantize weights
if hasattr(module, 'weight'):
quantized_weight, _, _ = self.quant.quantize_tensor(module.weight)
module.weight = torch.nn.Parameter(quantized_weight)
# Quantize bias (if applicable)
if hasattr(module, 'bias') and module.bias is not None:
quantized_bias, _, _ = self.quant.quantize_tensor(module.bias)
module.bias = torch.nn.Parameter(quantized_bias)
def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs):
if step_index == int(pipe.num_timesteps * 0.78):
callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1]
callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1]
callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1]
pipe._guidance_scale = 0.1
return callback_kwargs
def load_pipeline(pipeline=None) -> StableDiffusionXLPipeline:
if not pipeline:
pipeline = StableDiffusionXLPipeline.from_pretrained(
"stablediffusionapi/newdream-sdxl-20",
torch_dtype=torch.float16,
).to("cuda")
quantizer = SDXLQuantization(pipeline.unet, 14)
quantizer.quantize_model()
pipeline.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipeline.scheduler.config))
pipeline = compile_pipe(pipeline)
load_pipe(pipeline, dir="/home/sandbox/.cache/huggingface/hub/models--RobertML--cached-pipe-02/snapshots/58d70deae87034cce351b780b48841f9746d4ad7")
for _ in range(1):
deepcache_output = pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
pipeline.scheduler.prepare_loss()
for _ in range(2):
pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
return pipeline
def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
if request.seed is None:
generator = None
else:
generator = Generator(pipeline.device).manual_seed(request.seed)
return pipeline(
prompt=request.prompt,
negative_prompt=request.negative_prompt,
width=request.width,
height=request.height,
generator=generator,
num_inference_steps=17,
cache_interval=1,
cache_layer_id=1,
cache_block_id=0,
eta=1.0,
guidance_scale = 5.0,
guidance_rescale = 0.0,
callback_on_step_end=callback_dynamic_cfg,
callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
).images[0]