transformers >= 5.0.0

pip install git+https://github.com/huggingface/transformers.git
import torch

from transformers import (
    BitsAndBytesConfig as BitsAndBytesConfig,
    Qwen3ForCausalLM,
)

from diffusers import Flux2KleinPipeline

dtype = torch.float16
device = "cuda"

text_encoder_8bit = Qwen3ForCausalLM.from_pretrained(
    "aifeifei798/FLUX.2-klein-9B-text_encoder-8bit",
    torch_dtype=dtype,
)

pipe = Flux2KleinPipeline.from_pretrained(
    "black-forest-labs/FLUX.2-klein-9B",
    text_encoder=text_encoder_8bit,
    torch_dtype=dtype,
)
#pipe.to("cuda") # GPU < 32G
pipe.enable_model_cpu_offload()  # save some VRAM by offloading the model to CPU,GPU < 32G

prompt = "A cat holding a sign that says hello world"
image = pipe(
    prompt,
    height=1024,
    width=1024,
    guidance_scale=1.0,
    num_inference_steps=4,
    generator=torch.Generator(device=device).manual_seed(0),
).images[0]
image.save("flux-klein.png")
Downloads last month
41
Safetensors
Model size
8B params
Tensor type
F32
F16
I8
Inference Providers NEW
This model isn't deployed by any Inference Provider. 馃檵 Ask for provider support

Model tree for aifeifei798/FLUX.2-klein-9B-text_encoder-8bit

Quantized
(9)
this model