transformers >= 5.0.0

pip install git+https://github.com/huggingface/transformers.git

import torch

from transformers import (
    BitsAndBytesConfig as BitsAndBytesConfig,
    Qwen3ForCausalLM,
)

from diffusers import Flux2KleinPipeline

dtype = torch.float16
device = "cuda"

text_encoder_8bit = Qwen3ForCausalLM.from_pretrained(
    "aifeifei798/FLUX.2-klein-9B-text_encoder-8bit",
    torch_dtype=dtype,
)

pipe = Flux2KleinPipeline.from_pretrained(
    "black-forest-labs/FLUX.2-klein-9B",
    text_encoder=text_encoder_8bit,
    torch_dtype=dtype,
)
#pipe.to("cuda") # GPU < 32G
pipe.enable_model_cpu_offload()  # save some VRAM by offloading the model to CPU,GPU < 32G

prompt = "A cat holding a sign that says hello world"
image = pipe(
    prompt,
    height=1024,
    width=1024,
    guidance_scale=1.0,
    num_inference_steps=4,
    generator=torch.Generator(device=device).manual_seed(0),
).images[0]
image.save("flux-klein.png")

Downloads last month: 138

Safetensors

Model size

8B params

Tensor type

F32

F16

Model tree for aifeifei798/FLUX.2-klein-9B-text_encoder-8bit

Base model

black-forest-labs/FLUX.2-klein-9B

Quantized

(31)

this model