|
|
import os |
|
|
import torch |
|
|
from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel |
|
|
from transformers import Qwen3ForCausalLM, BitsAndBytesConfig, AutoTokenizer |
|
|
import math |
|
|
|
|
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
|
torch.backends.cudnn.allow_tf32 = True |
|
|
torch.backends.cudnn.benchmark = True |
|
|
|
|
|
BNB_CONFIG = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_quant_type="nf4", |
|
|
bnb_4bit_compute_dtype=torch.bfloat16 , |
|
|
bnb_4bit_use_double_quant=True, |
|
|
) |
|
|
|
|
|
model_path = f"./FLUX.2-klein-9B" |
|
|
prompt = "A beautiful korean kpop young woman holding a sign that says hello world" |
|
|
height, width, guidance_scale, steps, seed = 1024, 1024, 4.0, 4, 0 |
|
|
dtype = torch.bfloat16 |
|
|
|
|
|
transformer = Flux2Transformer2DModel.from_pretrained( |
|
|
"./FLUX.2-9B-bnb-4bit/transformer", |
|
|
|
|
|
quantization_config=BNB_CONFIG, |
|
|
torch_dtype=dtype, |
|
|
|
|
|
) |
|
|
|
|
|
text_encoder = Qwen3ForCausalLM.from_pretrained( |
|
|
"./FLUX.2-9B-bnb-4bit/text_encoder", |
|
|
|
|
|
quantization_config=BNB_CONFIG, |
|
|
torch_dtype=dtype |
|
|
) |
|
|
|
|
|
pipe = Flux2KleinPipeline.from_pretrained( |
|
|
"FLUX.2-9B-bnb-4bit", |
|
|
torch_dtype=dtype, |
|
|
transformer=transformer, |
|
|
text_encoder=text_encoder, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
pipe.to("cuda") |
|
|
|
|
|
img = pipe( |
|
|
prompt=prompt, |
|
|
height=height, |
|
|
width=width, |
|
|
guidance_scale=guidance_scale, |
|
|
num_inference_steps=steps, |
|
|
generator=torch.Generator(device="cuda").manual_seed(seed), |
|
|
).images[0] |
|
|
|
|
|
output = "output/flux2_beauty2.png" |
|
|
os.makedirs(os.path.dirname(output) or ".", exist_ok=True) |
|
|
img.save(output) |
|
|
|
|
|
|
|
|
|