fix: update torchao FP8 quantization API for HF Spaces compatibility
Browse files
src/video_generator_hf.py
CHANGED
|
@@ -60,8 +60,8 @@ def _get_pipe():
|
|
| 60 |
|
| 61 |
# Quantize transformer to FP8 to fit in 24GB ZeroGPU VRAM
|
| 62 |
# (~28GB bf16 → ~14GB fp8). VAE + image encoder stay float32.
|
| 63 |
-
from torchao.quantization import quantize_,
|
| 64 |
-
quantize_(_pipe.transformer,
|
| 65 |
|
| 66 |
_pipe.to("cuda")
|
| 67 |
|
|
|
|
| 60 |
|
| 61 |
# Quantize transformer to FP8 to fit in 24GB ZeroGPU VRAM
|
| 62 |
# (~28GB bf16 → ~14GB fp8). VAE + image encoder stay float32.
|
| 63 |
+
from torchao.quantization import quantize_, Float8WeightOnlyConfig
|
| 64 |
+
quantize_(_pipe.transformer, Float8WeightOnlyConfig())
|
| 65 |
|
| 66 |
_pipe.to("cuda")
|
| 67 |
|