Upload 2 files
Browse files- handler.py +6 -6
- requirements.txt +1 -2
handler.py
CHANGED
|
@@ -34,9 +34,9 @@ def load_pipeline_compile(repo_id: str, dtype: torch.dtype) -> Any:
|
|
| 34 |
pipe.transformer.fuse_qkv_projections()
|
| 35 |
pipe.vae.fuse_qkv_projections()
|
| 36 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 37 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False
|
| 38 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 39 |
-
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False
|
| 40 |
pipe.to("cuda")
|
| 41 |
return pipe
|
| 42 |
|
|
@@ -45,9 +45,9 @@ def load_pipeline_autoquant(repo_id: str, dtype: torch.dtype) -> Any:
|
|
| 45 |
pipe.transformer.fuse_qkv_projections()
|
| 46 |
pipe.vae.fuse_qkv_projections()
|
| 47 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 48 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True
|
| 49 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 50 |
-
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True
|
| 51 |
pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
|
| 52 |
pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
|
| 53 |
pipe.to("cuda")
|
|
@@ -75,9 +75,9 @@ def load_pipeline_turbo_compile(repo_id: str, dtype: torch.dtype) -> Any:
|
|
| 75 |
quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
|
| 76 |
quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
|
| 77 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 78 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False
|
| 79 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 80 |
-
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False
|
| 81 |
pipe.to("cuda")
|
| 82 |
return pipe
|
| 83 |
|
|
|
|
| 34 |
pipe.transformer.fuse_qkv_projections()
|
| 35 |
pipe.vae.fuse_qkv_projections()
|
| 36 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 37 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
| 38 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 39 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
| 40 |
pipe.to("cuda")
|
| 41 |
return pipe
|
| 42 |
|
|
|
|
| 45 |
pipe.transformer.fuse_qkv_projections()
|
| 46 |
pipe.vae.fuse_qkv_projections()
|
| 47 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 48 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
| 49 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 50 |
+
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True)
|
| 51 |
pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
|
| 52 |
pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
|
| 53 |
pipe.to("cuda")
|
|
|
|
| 75 |
quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
|
| 76 |
quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
|
| 77 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 78 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
| 79 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 80 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
| 81 |
pipe.to("cuda")
|
| 82 |
return pipe
|
| 83 |
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
huggingface_hub
|
| 2 |
-
torch
|
| 3 |
torchvision
|
| 4 |
torchao==0.9.0
|
| 5 |
diffusers==0.32.2
|
|
@@ -11,5 +11,4 @@ scipy
|
|
| 11 |
Pillow
|
| 12 |
sentencepiece
|
| 13 |
protobuf
|
| 14 |
-
pytorch-lightning
|
| 15 |
triton
|
|
|
|
| 1 |
huggingface_hub
|
| 2 |
+
torch>=2.4.0
|
| 3 |
torchvision
|
| 4 |
torchao==0.9.0
|
| 5 |
diffusers==0.32.2
|
|
|
|
| 11 |
Pillow
|
| 12 |
sentencepiece
|
| 13 |
protobuf
|
|
|
|
| 14 |
triton
|