transformers >= 5.0.0

pip install git+https://github.com/huggingface/transformers.git
import torch

from transformers import (
    BitsAndBytesConfig as BitsAndBytesConfig,
    Qwen3ForCausalLM,
)

from diffusers import Flux2KleinPipeline

dtype = torch.float16
device = "cuda"

text_encoder_4bit = Qwen3ForCausalLM.from_pretrained(
    "aifeifei798/FLUX.2-klein-9B-text_encoder-4bit",
    torch_dtype=dtype,
)

pipe = Flux2KleinPipeline.from_pretrained(
    "black-forest-labs/FLUX.2-klein-9B",
    text_encoder=text_encoder_4bit,
    torch_dtype=dtype,
)
#pipe.to("cuda") # GPU < 32G
pipe.enable_model_cpu_offload()  # save some VRAM by offloading the model to CPU,GPU < 32G

prompt = "A cat holding a sign that says hello world"
image = pipe(
    prompt,
    height=1024,
    width=1024,
    guidance_scale=1.0,
    num_inference_steps=4,
    generator=torch.Generator(device=device).manual_seed(0),
).images[0]
image.save("flux-klein.png")
Downloads last month
63
Safetensors
Model size
8B params
Tensor type
F32
F16
U8
Inference Providers NEW
This model isn't deployed by any Inference Provider. 馃檵 Ask for provider support

Model tree for aifeifei798/FLUX.2-klein-9B-text_encoder-4bit

Quantized
(9)
this model