manbeast3b
/

edgeupdate1

Model card Files Files and versions

xet

Community

manbeast3b commited on Dec 11, 2024

Commit

6746668

verified ·

1 Parent(s): ae03ef6

Update src/pipeline.py

Browse files

Files changed (1) hide show

src/pipeline.py +107 -92

src/pipeline.py CHANGED Viewed

@@ -1,35 +1,49 @@
-from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
-from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
-import torch
-import torch._dynamo
-import gc
 from PIL import Image as img
-from PIL.Image import Image
 from pipelines.models import TextToImageRequest
-from torch import Generator
-import time
-from diffusers import FluxTransformer2DModel, DiffusionPipeline
-from torchao.quantization import quantize_, int8_weight_only
-import os
 from model import Encoder, Decoder
-import torchvision
-import torch.nn as nn
-os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
-Pipeline = None
 torch.backends.cudnn.benchmark = True
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.cuda.set_per_process_memory_fraction(0.95)
 class BasicQuantization:
     def __init__(self, bits=1):
         self.bits = bits
-        self.qmin = -(2**(bits-1))
-        self.qmax = 2**(bits-1) - 1
     def quantize_tensor(self, tensor):
         scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin)
@@ -45,107 +59,108 @@ class ModelQuantization:
     def quantize_model(self):
         for name, module in self.model.named_modules():
-            if isinstance(module, torch.nn.Linear):
-                if hasattr(module, 'weightML'):
                     quantized_weight, _, _ = self.quant.quantize_tensor(module.weight)
-                    module.weight = torch.nn.Parameter(quantized_weight)
                 if hasattr(module, 'bias') and module.bias is not None:
                     quantized_bias, _, _ = self.quant.quantize_tensor(module.bias)
-                    module.bias = torch.nn.Parameter(quantized_bias)
-ckpt_id = "black-forest-labs/FLUX.1-schnell"
-def empty_cache():
-    start = time.time()
-    gc.collect()
-    torch.cuda.empty_cache()
-    torch.cuda.reset_max_memory_allocated()
-    torch.cuda.reset_peak_memory_stats()
-def load_pipeline() -> Pipeline:
-    empty_cache()
     dtype, device = torch.bfloat16, "cuda"
-    vae = AutoencoderTiny.from_pretrained("RobertML/FLUX.1-schnell-vae_e3m2", torch_dtype=dtype)
     vae.encoder = Encoder(16)
     vae.decoder = Decoder(16)
     encoder_path = "encoder.pth"
     decoder_path = "decoder.pth"
-    if encoder_path is not None:
-        encoder_state_dict = torch.load(encoder_path, map_location="cpu", weights_only=True)
-        filtered_state_dict = {k.strip('encoder.'): v for k, v in encoder_state_dict.items() if k.strip('encoder.') in vae.encoder.state_dict() and v.size() == vae.encoder.state_dict()[k.strip('encoder.')].size()}
-        print(f" num of keys in filtered: {len(filtered_state_dict)} and in decoder: {len(vae.encoder.state_dict())}")
         vae.encoder.load_state_dict(filtered_state_dict, strict=False)
         vae.encoder.to(dtype=dtype)
-    if decoder_path is not None:
-        decoder_state_dict = torch.load(decoder_path, map_location="cpu", weights_only=True)
-        filtered_state_dict = {k.strip('decoder.'): v for k, v in decoder_state_dict.items() if k.strip('decoder.') in vae.decoder.state_dict() and v.size() == vae.decoder.state_dict()[k.strip('decoder.')].size()}
-        print(f" num of keys in filtered: {len(filtered_state_dict)} and in decoder: {len(vae.decoder.state_dict())}")
         vae.decoder.load_state_dict(filtered_state_dict, strict=False)
         vae.decoder.to(dtype=dtype)
-    vae.decoder.requires_grad_(False)
     vae.encoder.requires_grad_(False)
-    # quantize_(vae, int8_weight_only())
     quantizer = ModelQuantization(vae)
     quantizer.quantize_model()
-    scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(ckpt_id, subfolder="scheduler")
-    ############ Text Encoder ############
-    text_encoder = CLIPTextModel.from_pretrained(
-        ckpt_id, subfolder="text_encoder", torch_dtype=torch.bfloat16
-    )
-    # quantize_(text_encoder, int8_weight_only())
-    ############ Text Encoder 2 ############
-    text_encoder_2 = T5EncoderModel.from_pretrained(
-        "city96/t5-v1_1-xxl-encoder-bf16", torch_dtype=torch.bfloat16
     )
-    model = FluxTransformer2DModel.from_pretrained(
-        "/root/.cache/huggingface/hub/models--RobertML--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a", torch_dtype=dtype, use_safetensors=False
-    )
-    pipeline = DiffusionPipeline.from_pretrained(
-        ckpt_id,
         scheduler=scheduler,
-        transformer=model,
         text_encoder=text_encoder,
         text_encoder_2=text_encoder_2,
-        torch_dtype=dtype,
         vae=vae,
-        load_in_8bit=True,
-        ).to(device)
-    # pipeline.vae = torch.compile(pipeline.vae, mode="reduce-overhead")
-    pipeline.vae.to(memory_format=torch.channels_last)
-    pipeline.text_encoder.to(memory_format=torch.channels_last)
-    pipeline.text_encoder_2.to(memory_format=torch.channels_last)
-    pipeline.transformer.to(memory_format=torch.channels_last)
-    for _ in range(2):
-        pipeline(prompt="onomancy, aftergo, spirantic, Platyhelmia, modificator, drupaceous, jobbernowl, hereness", width=1024, height=1024, guidance_scale=0.0, num_inference_steps=4, max_sequence_length=256)
-    empty_cache()
-    return pipeline
 @torch.inference_mode()
-def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
     generator = Generator(pipeline.device).manual_seed(request.seed)
-    image=pipeline(request.prompt,
-        generator=generator,
-        guidance_scale=0.0,
-        num_inference_steps=4,
-        max_sequence_length=256,
-        height=request.height,
-        width=request.width,
-        output_type="pt").images[0]
-    return torchvision.transforms.functional.to_pil_image(image.to(torch.float32).mul_(2).sub_(1))

+import os
+import gc
+import time
+import torch
+import torchvision
+import torch.nn as nn
+from torch import Generator
+from diffusers import (
+    FluxPipeline,
+    AutoencoderKL,
+    AutoencoderTiny,
+    DiffusionPipeline,
+    FluxTransformer2DModel
+)
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
+from transformers import (
+    T5EncoderModel,
+    CLIPTextModel
+)
 from PIL import Image as img
 from pipelines.models import TextToImageRequest
 from model import Encoder, Decoder
+# Environment configuration
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "expandable_segments:True"
 torch.backends.cudnn.benchmark = True
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.cuda.set_per_process_memory_fraction(0.95)
+# Constants
+CKPT_ID = "black-forest-labs/FLUX.1-schnell"
+# Utility functions
+def clear():
+    gc.collect()
+    torch.cuda.empty_cache()
+    torch.cuda.reset_max_memory_allocated()
+    torch.cuda.reset_peak_memory_stats()
+# Quantization classes
 class BasicQuantization:
     def __init__(self, bits=1):
         self.bits = bits
+        self.qmin = -(2 ** (bits - 1))
+        self.qmax = 2 ** (bits - 1) - 1
     def quantize_tensor(self, tensor):
         scale = (tensor.max() - tensor.min()) / (self.qmax - self.qmin)
     def quantize_model(self):
         for name, module in self.model.named_modules():
+            if isinstance(module, nn.Linear):
+                if hasattr(module, 'weight'):
                     quantized_weight, _, _ = self.quant.quantize_tensor(module.weight)
+                    module.weight = nn.Parameter(quantized_weight)
                 if hasattr(module, 'bias') and module.bias is not None:
                     quantized_bias, _, _ = self.quant.quantize_tensor(module.bias)
+                    module.bias = nn.Parameter(quantized_bias)
+# Pipeline loading
+def load_pipeline():
+    """Loads and prepares the Diffusion pipeline."""
+    clear()
     dtype, device = torch.bfloat16, "cuda"
+    # Load VAE with custom encoder/decoder
+    vae = AutoencoderTiny.from_pretrained("manbeast3b/flux.1-schnell-vae-quant1", torch_dtype=dtype)
     vae.encoder = Encoder(16)
     vae.decoder = Decoder(16)
+    # Load encoder and decoder state dicts
     encoder_path = "encoder.pth"
     decoder_path = "decoder.pth"
+    if encoder_path:
+        encoder_state_dict = torch.load(encoder_path, map_location="cpu")
+        filtered_state_dict = {
+            k.replace('encoder.', ''): v
+            for k, v in encoder_state_dict.items()
+            if k.replace('encoder.', '') in vae.encoder.state_dict()
+        }
         vae.encoder.load_state_dict(filtered_state_dict, strict=False)
         vae.encoder.to(dtype=dtype)
+    if decoder_path:
+        decoder_state_dict = torch.load(decoder_path, map_location="cpu")
+        filtered_state_dict = {
+            k.replace('decoder.', ''): v
+            for k, v in decoder_state_dict.items()
+            if k.replace('decoder.', '') in vae.decoder.state_dict()
+        }
         vae.decoder.load_state_dict(filtered_state_dict, strict=False)
         vae.decoder.to(dtype=dtype)
     vae.encoder.requires_grad_(False)
+    vae.decoder.requires_grad_(False)
+    # Quantize model
     quantizer = ModelQuantization(vae)
     quantizer.quantize_model()
+    # Scheduler and text encoders
+    scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(CKPT_ID, subfolder="scheduler")
+    text_encoder = CLIPTextModel.from_pretrained(CKPT_ID, subfolder="text_encoder", torch_dtype=dtype)
+    text_encoder_2 = T5EncoderModel.from_pretrained("city96/t5-v1_1-xxl-encoder-bf16", torch_dtype=dtype)
+    # Transformer model
+    transformer_model = FluxTransformer2DModel.from_pretrained(
+        "/root/.cache/huggingface/hub/models--manbeast3b--FLUX.1-schnell-int8wo/snapshots/307e0777d92df966a3c0f99f31a6ee8957a9857a",
+        torch_dtype=dtype,
+        use_safetensors=False
     )
+    # pipeline
+    pipeline = DiffusionPipeline(
         scheduler=scheduler,
+        transformer=transformer_model,
         text_encoder=text_encoder,
         text_encoder_2=text_encoder_2,
         vae=vae,
+        torch_dtype=dtype,
+        load_in_8bit=True
+    ).to(device)
+    # Optimize memory format
+    for component in [pipeline.vae, pipeline.text_encoder, pipeline.text_encoder_2, pipeline.transformer]:
+        component.to(memory_format=torch.channels_last)
+    # Warm-up inference
+    pipeline(
+        prompt="modificator, drupaceous, jobbernowl, hereness",
+        width=1024,
+        height=1024,
+        guidance_scale=0.0,
+        num_inference_steps=4,
+        max_sequence_length=256
+    )
+    clear()
+    return pipeline
 @torch.inference_mode()
+def infer(request: TextToImageRequest, pipeline):
+    """Generates an image based on the given request."""
     generator = Generator(pipeline.device).manual_seed(request.seed)
+    image = pipeline(
+        request.prompt,
+        generator=generator,
+        guidance_scale=0.0,
+        num_inference_steps=4,
+        max_sequence_length=256,
+        height=request.height,
+        width=request.width,
+        output_type="pt"
+    ).images[0]
+    return torchvision.transforms.functional.to_pil_image(image.to(torch.float32).mul_(2).sub_(1))