manbeast3b
/

optimization1

Model card Files Files and versions

manbeast3b commited on Dec 10, 2024

Commit

61d3add

·

verified ·

1 Parent(s): 9df77c6

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +24 -11

src/pipeline.py CHANGED Viewed

@@ -18,13 +18,13 @@ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:False,garbage_colle
 Pipeline = None
 # Define the quantization config
-nf4_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
 ckpt_id = "black-forest-labs/FLUX.1-schnell"
 def empty_cache():
@@ -45,11 +45,24 @@ def load_pipeline() -> Pipeline:
     # text_encoder_2 = T5EncoderModel.from_pretrained(
     #     "sayakpaul/flux.1-dev-nf4-pkg", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
     # )
-    text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
-    vae=AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype)
     pipeline = DiffusionPipeline.from_pretrained(
         ckpt_id,
-        vae=vae,
         text_encoder_2 = text_encoder_2,
         torch_dtype=dtype,
         )
@@ -58,7 +71,7 @@ def load_pipeline() -> Pipeline:
     torch.cuda.set_per_process_memory_fraction(0.95)
     pipeline.text_encoder.to(memory_format=torch.channels_last)
     pipeline.transformer.to(memory_format=torch.channels_last)
-    torch.jit.enable_onednn_fusion(True)
     pipeline.vae.to(memory_format=torch.channels_last)

 Pipeline = None
 # Define the quantization config
+# nf4_config = BitsAndBytesConfig(
+#     load_in_4bit=True,
+#     bnb_4bit_quant_type="nf4",
+#     bnb_4bit_use_double_quant=True,
+#     bnb_4bit_compute_dtype=torch.bfloat16
+# )
+config = BitsAndBytesConfig(load_in_8bit=True)
 ckpt_id = "black-forest-labs/FLUX.1-schnell"
 def empty_cache():
     # text_encoder_2 = T5EncoderModel.from_pretrained(
     #     "sayakpaul/flux.1-dev-nf4-pkg", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
     # )
+    # text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
+    model_id = "manbeast3b/flux-schnell-int8"
+    transformer = FluxTransformer2DModel.from_pretrained(
+        model_id, subfolder="transformer", quantization_config=config, torch_dtype=torch.bfloat16
+    )
+    text_encoder_2 = T5EncoderModel.from_pretrained(
+        model_id, subfolder="text_encoder_2", quantization_config=config, torch_dtype=torch.bfloat16
+    )
+    text_encoder = CLIPTextModel.from_pretrained(
+        model_id, subfolder="text_encoder", quantization_config=config, torch_dtype=torch.bfloat16
+    )
+    # vae=AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype)
     pipeline = DiffusionPipeline.from_pretrained(
         ckpt_id,
+        # vae=vae,
+        transformer = transformer,
+        text_encoder = text_encoder,
         text_encoder_2 = text_encoder_2,
         torch_dtype=dtype,
         )
     torch.cuda.set_per_process_memory_fraction(0.95)
     pipeline.text_encoder.to(memory_format=torch.channels_last)
     pipeline.transformer.to(memory_format=torch.channels_last)
+    # torch.jit.enable_onednn_fusion(True)
     pipeline.vae.to(memory_format=torch.channels_last)