Yuvrajxms09 commited on
Commit
fcfd3ec
·
verified ·
1 Parent(s): 23ef84f

Upload ModelOpt quantized FLUX.2 Klein 4B transformer variants

Browse files
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - flux
4
+ - quantization
5
+ - modelopt
6
+ - fp8
7
+ - nvfp4
8
+ - int8
9
+ license: apache-2.0
10
+ ---
11
+
12
+ # Quantized FLUX.2 Klein 4B Transformer (ModelOpt)
13
+
14
+ This repo stores NVIDIA Model Optimizer checkpoints for FLUX.2 Klein 4B transformer quantization variants.
15
+
16
+ ## Contents
17
+
18
+ - `fp8/transformer_modelopt.pt`
19
+ - `fp8/transformer_modelopt_meta.json`
20
+ - `w8a8/transformer_modelopt.pt`
21
+ - `w8a8/transformer_modelopt_meta.json`
22
+ - `nvfp4/transformer_modelopt.pt`
23
+ - `nvfp4/transformer_modelopt_meta.json`
24
+
25
+ ## Restore into pipeline
26
+
27
+ ```python
28
+ import modelopt.torch.opt as mto
29
+ from klein_pipeline import Flux2KleinPipeline
30
+ import torch
31
+
32
+ pipe = Flux2KleinPipeline.from_pretrained(
33
+ "black-forest-labs/FLUX.2-klein-4B", torch_dtype=torch.bfloat16
34
+ ).to("cuda")
35
+
36
+ ckpt = "fp8/transformer_modelopt.pt" # or w8a8 / nvfp4
37
+ mto.restore(pipe.transformer, ckpt)
38
+ pipe.transformer.eval()
39
+ ```
40
+
41
+ Uploaded from Modal volume `klein4B-assets`.
fp8/transformer_modelopt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d001d1c3d47729a4f954488be91ed1c2e6d1900ccffb68689b5d599ad761c19d
3
+ size 7751397212
fp8/transformer_modelopt_meta.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "/models/FLUX.2-klein-4B",
3
+ "component": "transformer",
4
+ "backend": "nvidia-modelopt",
5
+ "config": "FP8_DEFAULT_CFG",
6
+ "variant": "fp8",
7
+ "dtype": "torch.bfloat16",
8
+ "calibration": {
9
+ "iters": 8,
10
+ "batch_size": 1,
11
+ "image_path": "/models/calib/blue_car_resize.jpeg",
12
+ "steps": 4,
13
+ "height": 576,
14
+ "width": 384,
15
+ "guidance_scale": 4.0
16
+ },
17
+ "version": 1
18
+ }
nvfp4/transformer_modelopt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d88a854fb402456cf1337c66b1b3c8714651c9bc8e53952fa063b67908f81b37
3
+ size 7751397788
nvfp4/transformer_modelopt_meta.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "/models/FLUX.2-klein-4B",
3
+ "component": "transformer",
4
+ "backend": "nvidia-modelopt",
5
+ "config": "NVFP4_DEFAULT_CFG",
6
+ "variant": "nvfp4",
7
+ "dtype": "torch.bfloat16",
8
+ "calibration": {
9
+ "iters": 8,
10
+ "batch_size": 1,
11
+ "image_path": "/models/calib/blue_car_resize.jpeg",
12
+ "steps": 4,
13
+ "height": 576,
14
+ "width": 384,
15
+ "guidance_scale": 4.0
16
+ },
17
+ "version": 1
18
+ }
w8a8/transformer_modelopt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd72bcef95822f5101e64348c96d1bf1bb64f5028322ec1e40922519e95e4e5
3
+ size 7756973568
w8a8/transformer_modelopt_meta.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "/models/FLUX.2-klein-4B",
3
+ "component": "transformer",
4
+ "backend": "nvidia-modelopt",
5
+ "config": "INT8_SMOOTHQUANT_CFG",
6
+ "variant": "w8a8",
7
+ "dtype": "torch.bfloat16",
8
+ "calibration": {
9
+ "iters": 8,
10
+ "batch_size": 1,
11
+ "image_path": "/models/calib/blue_car_resize.jpeg",
12
+ "steps": 4,
13
+ "height": 576,
14
+ "width": 384,
15
+ "guidance_scale": 4.0
16
+ },
17
+ "version": 1
18
+ }