import torch
from PIL.Image import Image
from diffusers import StableDiffusionXLPipeline
from pipelines.models import TextToImageRequest
from diffusers import DDIMScheduler
from torch import Generator
from loss import SchedulerWrapper
from onediffx import compile_pipe, save_pipe, load_pipe

class Quantization:
  def __init__(self, bits=1):
      self.bits = bits
      self.qmin = -(2**(bits-1))
      self.qmax = 2**(bits-1) - 1
  def quantize_tensor(self, tensor):
      scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin)
      zero_point = self.qmin - torch.round(tensor.min() / scale)
      qtensor = torch.round(tensor / scale + zero_point)
      qtensor = torch.clamp(qtensor, self.qmin, self.qmax)
      tensor_q = (qtensor - zero_point) * scale
      return tensor_q, scale, zero_point

class SDXLQuantization:
  def __init__(self, model, bit_number=16):
      self.model = model
      self.quant = Quantization(bit_number)
  def quantize_model(self, save_name=None):
        for name, module in self.model.named_modules():
            if isinstance(module, (torch.nn.Linear)): 
                if hasattr(module, 'weight'):
                    quantized_weight, _, _ = self.quant.quantize_tensor(module.weight)
                    module.weight = torch.nn.Parameter(quantized_weight)
                if hasattr(module, 'bias') and module.bias is not None:
                    quantized_bias, _, _ = self.quant.quantize_tensor(module.bias)
                    module.bias = torch.nn.Parameter(quantized_bias)

def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs):
  if step_index == int(pipe.num_timesteps * 0.78):
    callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1]
    callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1]
    callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1]
    pipe._guidance_scale = 0.1

  return callback_kwargs

def load_pipeline(pipeline=None) -> StableDiffusionXLPipeline:
    if not pipeline:
        pipeline = StableDiffusionXLPipeline.from_pretrained(
            "stablediffusionapi/newdream-sdxl-20",
            torch_dtype=torch.float16,
        ).to("cuda")

    quantizer = SDXLQuantization(pipeline.unet, 14)
    quantizer.quantize_model()
    pipeline.scheduler = SchedulerWrapper(DDIMScheduler.from_config(pipeline.scheduler.config))

    pipeline = compile_pipe(pipeline)
    load_pipe(pipeline, dir="/home/sandbox/.cache/huggingface/hub/models--RobertML--cached-pipe-02/snapshots/58d70deae87034cce351b780b48841f9746d4ad7")

    for _ in range(1):
        deepcache_output = pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
    pipeline.scheduler.prepare_loss()
    for _ in range(2):
        pipeline(prompt="telestereography, unstrengthen, preadministrator, copatroness, hyperpersonal, paramountness, paranoid, guaniferous", output_type="pil", num_inference_steps=20)
    return pipeline

def infer(request: TextToImageRequest, pipeline: StableDiffusionXLPipeline) -> Image:
    if request.seed is None:
        generator = None
    else:
        generator = Generator(pipeline.device).manual_seed(request.seed)

    return pipeline(
        prompt=request.prompt,
        negative_prompt=request.negative_prompt,
        width=request.width,
        height=request.height,
        generator=generator,
        num_inference_steps=18,
        cache_interval=1,
        cache_layer_id=1,
        cache_block_id=0,
        eta=1.0,
        guidance_scale = 5.0,
        guidance_rescale = 0.0,
        callback_on_step_end=callback_dynamic_cfg,
        callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
    ).images[0]