TrendForge
/

derbim6

Model card Files Files and versions

xet

Community

TrendForge commited on Feb 2, 2025

Commit

524d6b8

verified ·

1 Parent(s): 8e3f1b7

Initial commit with folder contents

Browse files

Files changed (1) hide show

src/pipeline.py +61 -55

src/pipeline.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Coding
 import os
 import torch
 import torch._dynamo
@@ -6,31 +5,27 @@ import gc
 from PIL.Image import Image
 from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
 from huggingface_hub.constants import HF_HUB_CACHE
-from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
-from PIL.Image import Image
-from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
 from pipelines.models import TextToImageRequest
-from PIL.Image import Image
 from torch import Generator
-from diffusers import FluxTransformer2DModel, DiffusionPipeline
 os.environ["TOKENIZERS_PARALLELISM"] = "True"
 torch._dynamo.config.suppress_errors = True
-os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
 Pipeline = None
 CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
 REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
-class QuantativeAnalysis:
     def __init__(self, model, num_bins=256, scale_ratio=1.0):
         self.model = model
         self.num_bins = num_bins
@@ -42,10 +37,12 @@ class QuantativeAnalysis:
                 with torch.no_grad():
                     param_min = param.min()
                     param_max = param.max()
                     if param_range > 0:
-                        params = 0.8*param_min + 0.2*param_max
         return self.model
 class AttentionQuant:
     def __init__(self, model, att_config):
         self.model = model
@@ -58,71 +55,80 @@ class AttentionQuant:
                 if layer_name in self.att_config:
                     num_bins, scale_factor = self.att_config[layer_name]
                     with torch.no_grad():
-                        # Normalize weights, apply binning, and rescale
                         param_min = param.min()
                         param_max = param.max()
                         param_range = param_max - param_min
                         if param_range > 0:
                             normalized = (param - param_min) / param_range
                             binned = torch.round(normalized * (num_bins - 1)) / (num_bins - 1)
-                            rescaled = binned * param_range + param_mins
-                            params.data.copy_(rescaled * scale_factor)
                         else:
-                            params.data.zero_()
         return self.model
-def load_pipeline() -> Pipeline:
-    __t5_model = T5EncoderModel.from_pretrained("TrendForge/extra1manQ1",
-                        revision = "d302b6e39214ed4532be34ec337f93c7eef3eaa6",
-                        torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
     __text_encoder_2 = __t5_model
-    base_vae = AutoencoderTiny.from_pretrained("TrendForge/extra2manQ2",
-                    revision="cef012d2db2f5a006567e797a0b9130aea5449c1",
-                    torch_dtype=torch.bfloat16)
     path = os.path.join(HF_HUB_CACHE, "models--TrendForge--extra0manQ0/snapshots/dc2cda167b8f53792a98020a3ef2f21808b09bb4")
-    base_trans = FluxTransformer2DModel.from_pretrained(path,
-                        torch_dtype=torch.bfloat16,
-                        use_safetensors=False).to(memory_format=torch.channels_last)
     try:
         att_config = {
             "transformer_blocks.15.attn.norm_added_k.weight": (64, 0.1),
             "transformer_blocks.15.attn.norm_added_q.weight": (64, 0.1),
             "transformer_blocks.15.attn.norm_added_v.weight": (64, 0.1)
         }
-        transformer = AttentionQuant(transformer, att_config).apply()
-    except:
         transformer = base_trans
-    pipeline = DiffusionPipeline.from_pretrained(CHECKPOINT,
-                        revision=REVISION,
-                        vae=base_vae,
-                        transformer=transformer,
-                        text_encoder_2=__text_encoder_2,
-                        torch_dtype=torch.bfloat16)
     pipeline.to("cuda")
-    for _warmup_batch in range(3):
-        pipeline(prompt="forswearer, skullcap, Juglandales, bluelegs, cunila, carbro, Ammonites",
-                        width=1024,
-                        height=1024,
-                        guidance_scale=0.0,
-                        num_inference_steps=4,
-                        max_sequence_length=256)
     return pipeline
 @torch.no_grad()
 def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
     generator = Generator(pipeline.device).manual_seed(request.seed)
     return pipeline(
         request.prompt,
         generator=generator,
@@ -130,5 +136,5 @@ def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
         num_inference_steps=4,
         max_sequence_length=256,
         height=request.height,
-        width=request.width,
-    ).images[0]

 import os
 import torch
 import torch._dynamo
 from PIL.Image import Image
 from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
 from huggingface_hub.constants import HF_HUB_CACHE
+from transformers import (
+    T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
+)
+from diffusers import (
+    FluxPipeline, AutoencoderKL, AutoencoderTiny, FluxTransformer2DModel, DiffusionPipeline
+)
 from pipelines.models import TextToImageRequest
 from torch import Generator
+# Set environment variables
 os.environ["TOKENIZERS_PARALLELISM"] = "True"
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "expandable_segments:True"
 torch._dynamo.config.suppress_errors = True
 Pipeline = None
+# Define constants
 CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
 REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
+class QuantativeAnalysis:
     def __init__(self, model, num_bins=256, scale_ratio=1.0):
         self.model = model
         self.num_bins = num_bins
                 with torch.no_grad():
                     param_min = param.min()
                     param_max = param.max()
+                    param_range = param_max - param_min
                     if param_range > 0:
+                        params = 0.8 * param_min + 0.2 * param_max
         return self.model
 class AttentionQuant:
     def __init__(self, model, att_config):
         self.model = model
                 if layer_name in self.att_config:
                     num_bins, scale_factor = self.att_config[layer_name]
                     with torch.no_grad():
                         param_min = param.min()
                         param_max = param.max()
                         param_range = param_max - param_min
                         if param_range > 0:
                             normalized = (param - param_min) / param_range
                             binned = torch.round(normalized * (num_bins - 1)) / (num_bins - 1)
+                            rescaled = binned * param_range + param_min
+                            param.data.copy_(rescaled * scale_factor)
                         else:
+                            param.data.zero_()
         return self.model
+def load_pipeline() -> Pipeline:
+    # Load T5 model
+    __t5_model = T5EncoderModel.from_pretrained(
+        "TrendForge/extra1manQ1",
+        revision="d302b6e39214ed4532be34ec337f93c7eef3eaa6",
+        torch_dtype=torch.bfloat16
+    ).to(memory_format=torch.channels_last)
     __text_encoder_2 = __t5_model
+    # Load VAE
+    base_vae = AutoencoderTiny.from_pretrained(
+        "TrendForge/extra2manQ2",
+        revision="cef012d2db2f5a006567e797a0b9130aea5449c1",
+        torch_dtype=torch.bfloat16
+    )
+    # Load Transformer Model
     path = os.path.join(HF_HUB_CACHE, "models--TrendForge--extra0manQ0/snapshots/dc2cda167b8f53792a98020a3ef2f21808b09bb4")
+    base_trans = FluxTransformer2DModel.from_pretrained(
+        path, torch_dtype=torch.bfloat16, use_safetensors=False
+    ).to(memory_format=torch.channels_last)
     try:
         att_config = {
             "transformer_blocks.15.attn.norm_added_k.weight": (64, 0.1),
             "transformer_blocks.15.attn.norm_added_q.weight": (64, 0.1),
             "transformer_blocks.15.attn.norm_added_v.weight": (64, 0.1)
         }
+        transformer = AttentionQuant(base_trans, att_config).apply()
+    except Exception:
         transformer = base_trans
+    # Load pipeline
+    pipeline = DiffusionPipeline.from_pretrained(
+        CHECKPOINT,
+        revision=REVISION,
+        vae=base_vae,
+        transformer=transformer,
+        text_encoder_2=__text_encoder_2,
+        torch_dtype=torch.bfloat16
+    )
     pipeline.to("cuda")
+    # Warmup
+    for _ in range(3):
+        pipeline(
+            prompt="forswearer, skullcap, Juglandales, bluelegs, cunila, carbro, Ammonites",
+            width=1024,
+            height=1024,
+            guidance_scale=0.0,
+            num_inference_steps=4,
+            max_sequence_length=256
+        )
     return pipeline
 @torch.no_grad()
 def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
     generator = Generator(pipeline.device).manual_seed(request.seed)
     return pipeline(
         request.prompt,
         generator=generator,
         num_inference_steps=4,
         max_sequence_length=256,
         height=request.height,
+        width=request.width
+    ).images[0]