Update src/pipeline.py
Browse files- src/pipeline.py +9 -4
src/pipeline.py
CHANGED
|
@@ -131,8 +131,8 @@ def load_pipeline() -> Pipeline:
|
|
| 131 |
text_encoder_2 = T5EncoderModel.from_pretrained(
|
| 132 |
"city96/t5-v1_1-xxl-encoder-bf16", torch_dtype=torch.bfloat16
|
| 133 |
)
|
| 134 |
-
|
| 135 |
-
vae = torch.load('/root/.cache/huggingface/hub/compiled_vae.pth')
|
| 136 |
# transformer = FluxTransformer2DModel.from_pretrined("manbeast3b/transfomer-flux-schnell-int8") # torch_dtype=dtype
|
| 137 |
pipeline = DiffusionPipeline.from_pretrained(
|
| 138 |
ckpt_id,
|
|
@@ -144,7 +144,11 @@ def load_pipeline() -> Pipeline:
|
|
| 144 |
# quantize_(pipeline.transformer, float8_dynamic_activation_float8_weight())
|
| 145 |
|
| 146 |
torch.backends.cudnn.benchmark = True
|
|
|
|
|
|
|
| 147 |
torch.backends.cuda.matmul.allow_tf32 = True
|
|
|
|
|
|
|
| 148 |
torch.cuda.set_per_process_memory_fraction(0.99)
|
| 149 |
pipeline.text_encoder.to(memory_format=torch.channels_last)
|
| 150 |
pipeline.transformer.to(memory_format=torch.channels_last)
|
|
@@ -153,12 +157,13 @@ def load_pipeline() -> Pipeline:
|
|
| 153 |
# pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
|
| 154 |
# exit()
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
# torch.save(pipeline.vae, '/root/.cache/huggingface/hub/compiled_vae.pth')
|
| 159 |
# exit()
|
| 160 |
|
| 161 |
|
|
|
|
| 162 |
pipeline._exclude_from_cpu_offload = ["vae"]
|
| 163 |
pipeline.enable_sequential_cpu_offload()
|
| 164 |
for _ in range(2):
|
|
|
|
| 131 |
text_encoder_2 = T5EncoderModel.from_pretrained(
|
| 132 |
"city96/t5-v1_1-xxl-encoder-bf16", torch_dtype=torch.bfloat16
|
| 133 |
)
|
| 134 |
+
vae=AutoencoderKL.from_pretrained(ckpt_id, subfolder="vae", torch_dtype=dtype)
|
| 135 |
+
# vae = torch.load('/root/.cache/huggingface/hub/compiled_vae.pth')
|
| 136 |
# transformer = FluxTransformer2DModel.from_pretrined("manbeast3b/transfomer-flux-schnell-int8") # torch_dtype=dtype
|
| 137 |
pipeline = DiffusionPipeline.from_pretrained(
|
| 138 |
ckpt_id,
|
|
|
|
| 144 |
# quantize_(pipeline.transformer, float8_dynamic_activation_float8_weight())
|
| 145 |
|
| 146 |
torch.backends.cudnn.benchmark = True
|
| 147 |
+
torch.backends.cudnn.deterministic = False
|
| 148 |
+
torch.set_deterministic_debug_mode(0)
|
| 149 |
torch.backends.cuda.matmul.allow_tf32 = True
|
| 150 |
+
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
|
| 151 |
+
# torch.cuda.set_memory_growth(True)
|
| 152 |
torch.cuda.set_per_process_memory_fraction(0.99)
|
| 153 |
pipeline.text_encoder.to(memory_format=torch.channels_last)
|
| 154 |
pipeline.transformer.to(memory_format=torch.channels_last)
|
|
|
|
| 157 |
# pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
|
| 158 |
# exit()
|
| 159 |
|
| 160 |
+
pipeline.vae.to(memory_format=torch.channels_last)
|
| 161 |
+
pipeline.vae = torch.compile(pipeline.vae)
|
| 162 |
# torch.save(pipeline.vae, '/root/.cache/huggingface/hub/compiled_vae.pth')
|
| 163 |
# exit()
|
| 164 |
|
| 165 |
|
| 166 |
+
|
| 167 |
pipeline._exclude_from_cpu_offload = ["vae"]
|
| 168 |
pipeline.enable_sequential_cpu_offload()
|
| 169 |
for _ in range(2):
|