Update src/pipeline.py
Browse files- src/pipeline.py +24 -11
src/pipeline.py
CHANGED
|
@@ -18,13 +18,13 @@ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:False,garbage_colle
|
|
| 18 |
Pipeline = None
|
| 19 |
|
| 20 |
# Define the quantization config
|
| 21 |
-
nf4_config = BitsAndBytesConfig(
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
|
| 29 |
ckpt_id = "black-forest-labs/FLUX.1-schnell"
|
| 30 |
def empty_cache():
|
|
@@ -45,11 +45,24 @@ def load_pipeline() -> Pipeline:
|
|
| 45 |
# text_encoder_2 = T5EncoderModel.from_pretrained(
|
| 46 |
# "sayakpaul/flux.1-dev-nf4-pkg", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
|
| 47 |
# )
|
| 48 |
-
text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
pipeline = DiffusionPipeline.from_pretrained(
|
| 51 |
ckpt_id,
|
| 52 |
-
vae=vae,
|
|
|
|
|
|
|
| 53 |
text_encoder_2 = text_encoder_2,
|
| 54 |
torch_dtype=dtype,
|
| 55 |
)
|
|
@@ -58,7 +71,7 @@ def load_pipeline() -> Pipeline:
|
|
| 58 |
torch.cuda.set_per_process_memory_fraction(0.95)
|
| 59 |
pipeline.text_encoder.to(memory_format=torch.channels_last)
|
| 60 |
pipeline.transformer.to(memory_format=torch.channels_last)
|
| 61 |
-
torch.jit.enable_onednn_fusion(True)
|
| 62 |
|
| 63 |
|
| 64 |
pipeline.vae.to(memory_format=torch.channels_last)
|
|
|
|
| 18 |
Pipeline = None
|
| 19 |
|
| 20 |
# Define the quantization config
|
| 21 |
+
# nf4_config = BitsAndBytesConfig(
|
| 22 |
+
# load_in_4bit=True,
|
| 23 |
+
# bnb_4bit_quant_type="nf4",
|
| 24 |
+
# bnb_4bit_use_double_quant=True,
|
| 25 |
+
# bnb_4bit_compute_dtype=torch.bfloat16
|
| 26 |
+
# )
|
| 27 |
+
config = BitsAndBytesConfig(load_in_8bit=True)
|
| 28 |
|
| 29 |
ckpt_id = "black-forest-labs/FLUX.1-schnell"
|
| 30 |
def empty_cache():
|
|
|
|
| 45 |
# text_encoder_2 = T5EncoderModel.from_pretrained(
|
| 46 |
# "sayakpaul/flux.1-dev-nf4-pkg", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
|
| 47 |
# )
|
| 48 |
+
# text_encoder_2 = T5EncoderModel.from_pretrained("hf-internal-testing/tiny-random-t5")
|
| 49 |
+
|
| 50 |
+
model_id = "manbeast3b/flux-schnell-int8"
|
| 51 |
+
transformer = FluxTransformer2DModel.from_pretrained(
|
| 52 |
+
model_id, subfolder="transformer", quantization_config=config, torch_dtype=torch.bfloat16
|
| 53 |
+
)
|
| 54 |
+
text_encoder_2 = T5EncoderModel.from_pretrained(
|
| 55 |
+
model_id, subfolder="text_encoder_2", quantization_config=config, torch_dtype=torch.bfloat16
|
| 56 |
+
)
|
| 57 |
+
text_encoder = CLIPTextModel.from_pretrained(
|
| 58 |
+
model_id, subfolder="text_encoder", quantization_config=config, torch_dtype=torch.bfloat16
|
| 59 |
+
)
|
| 60 |
+
# vae=AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype)
|
| 61 |
pipeline = DiffusionPipeline.from_pretrained(
|
| 62 |
ckpt_id,
|
| 63 |
+
# vae=vae,
|
| 64 |
+
transformer = transformer,
|
| 65 |
+
text_encoder = text_encoder,
|
| 66 |
text_encoder_2 = text_encoder_2,
|
| 67 |
torch_dtype=dtype,
|
| 68 |
)
|
|
|
|
| 71 |
torch.cuda.set_per_process_memory_fraction(0.95)
|
| 72 |
pipeline.text_encoder.to(memory_format=torch.channels_last)
|
| 73 |
pipeline.transformer.to(memory_format=torch.channels_last)
|
| 74 |
+
# torch.jit.enable_onednn_fusion(True)
|
| 75 |
|
| 76 |
|
| 77 |
pipeline.vae.to(memory_format=torch.channels_last)
|