Upload ModelOpt quantized FLUX.2 Klein 4B transformer variants
Browse files- README.md +41 -0
- fp8/transformer_modelopt.pt +3 -0
- fp8/transformer_modelopt_meta.json +18 -0
- nvfp4/transformer_modelopt.pt +3 -0
- nvfp4/transformer_modelopt_meta.json +18 -0
- w8a8/transformer_modelopt.pt +3 -0
- w8a8/transformer_modelopt_meta.json +18 -0
README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- flux
|
| 4 |
+
- quantization
|
| 5 |
+
- modelopt
|
| 6 |
+
- fp8
|
| 7 |
+
- nvfp4
|
| 8 |
+
- int8
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Quantized FLUX.2 Klein 4B Transformer (ModelOpt)
|
| 13 |
+
|
| 14 |
+
This repo stores NVIDIA Model Optimizer checkpoints for FLUX.2 Klein 4B transformer quantization variants.
|
| 15 |
+
|
| 16 |
+
## Contents
|
| 17 |
+
|
| 18 |
+
- `fp8/transformer_modelopt.pt`
|
| 19 |
+
- `fp8/transformer_modelopt_meta.json`
|
| 20 |
+
- `w8a8/transformer_modelopt.pt`
|
| 21 |
+
- `w8a8/transformer_modelopt_meta.json`
|
| 22 |
+
- `nvfp4/transformer_modelopt.pt`
|
| 23 |
+
- `nvfp4/transformer_modelopt_meta.json`
|
| 24 |
+
|
| 25 |
+
## Restore into pipeline
|
| 26 |
+
|
| 27 |
+
```python
|
| 28 |
+
import modelopt.torch.opt as mto
|
| 29 |
+
from klein_pipeline import Flux2KleinPipeline
|
| 30 |
+
import torch
|
| 31 |
+
|
| 32 |
+
pipe = Flux2KleinPipeline.from_pretrained(
|
| 33 |
+
"black-forest-labs/FLUX.2-klein-4B", torch_dtype=torch.bfloat16
|
| 34 |
+
).to("cuda")
|
| 35 |
+
|
| 36 |
+
ckpt = "fp8/transformer_modelopt.pt" # or w8a8 / nvfp4
|
| 37 |
+
mto.restore(pipe.transformer, ckpt)
|
| 38 |
+
pipe.transformer.eval()
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
Uploaded from Modal volume `klein4B-assets`.
|
fp8/transformer_modelopt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d001d1c3d47729a4f954488be91ed1c2e6d1900ccffb68689b5d599ad761c19d
|
| 3 |
+
size 7751397212
|
fp8/transformer_modelopt_meta.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "/models/FLUX.2-klein-4B",
|
| 3 |
+
"component": "transformer",
|
| 4 |
+
"backend": "nvidia-modelopt",
|
| 5 |
+
"config": "FP8_DEFAULT_CFG",
|
| 6 |
+
"variant": "fp8",
|
| 7 |
+
"dtype": "torch.bfloat16",
|
| 8 |
+
"calibration": {
|
| 9 |
+
"iters": 8,
|
| 10 |
+
"batch_size": 1,
|
| 11 |
+
"image_path": "/models/calib/blue_car_resize.jpeg",
|
| 12 |
+
"steps": 4,
|
| 13 |
+
"height": 576,
|
| 14 |
+
"width": 384,
|
| 15 |
+
"guidance_scale": 4.0
|
| 16 |
+
},
|
| 17 |
+
"version": 1
|
| 18 |
+
}
|
nvfp4/transformer_modelopt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d88a854fb402456cf1337c66b1b3c8714651c9bc8e53952fa063b67908f81b37
|
| 3 |
+
size 7751397788
|
nvfp4/transformer_modelopt_meta.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "/models/FLUX.2-klein-4B",
|
| 3 |
+
"component": "transformer",
|
| 4 |
+
"backend": "nvidia-modelopt",
|
| 5 |
+
"config": "NVFP4_DEFAULT_CFG",
|
| 6 |
+
"variant": "nvfp4",
|
| 7 |
+
"dtype": "torch.bfloat16",
|
| 8 |
+
"calibration": {
|
| 9 |
+
"iters": 8,
|
| 10 |
+
"batch_size": 1,
|
| 11 |
+
"image_path": "/models/calib/blue_car_resize.jpeg",
|
| 12 |
+
"steps": 4,
|
| 13 |
+
"height": 576,
|
| 14 |
+
"width": 384,
|
| 15 |
+
"guidance_scale": 4.0
|
| 16 |
+
},
|
| 17 |
+
"version": 1
|
| 18 |
+
}
|
w8a8/transformer_modelopt.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddd72bcef95822f5101e64348c96d1bf1bb64f5028322ec1e40922519e95e4e5
|
| 3 |
+
size 7756973568
|
w8a8/transformer_modelopt_meta.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "/models/FLUX.2-klein-4B",
|
| 3 |
+
"component": "transformer",
|
| 4 |
+
"backend": "nvidia-modelopt",
|
| 5 |
+
"config": "INT8_SMOOTHQUANT_CFG",
|
| 6 |
+
"variant": "w8a8",
|
| 7 |
+
"dtype": "torch.bfloat16",
|
| 8 |
+
"calibration": {
|
| 9 |
+
"iters": 8,
|
| 10 |
+
"batch_size": 1,
|
| 11 |
+
"image_path": "/models/calib/blue_car_resize.jpeg",
|
| 12 |
+
"steps": 4,
|
| 13 |
+
"height": 576,
|
| 14 |
+
"width": 384,
|
| 15 |
+
"guidance_scale": 4.0
|
| 16 |
+
},
|
| 17 |
+
"version": 1
|
| 18 |
+
}
|