import os import torch from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel from transformers import Qwen3ForCausalLM, BitsAndBytesConfig, AutoTokenizer import math torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True torch.backends.cudnn.benchmark = True BNB_CONFIG = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 , bnb_4bit_use_double_quant=True, ) model_path = f"./FLUX.2-klein-9B" prompt = "A beautiful korean kpop young woman holding a sign that says hello world" height, width, guidance_scale, steps, seed = 1024, 1024, 4.0, 4, 0 dtype = torch.bfloat16 transformer = Flux2Transformer2DModel.from_pretrained( "./FLUX.2-9B-bnb-4bit/transformer", #sudfolder="transformer", quantization_config=BNB_CONFIG, torch_dtype=dtype, #use_safetensors=False, ) text_encoder = Qwen3ForCausalLM.from_pretrained( "./FLUX.2-9B-bnb-4bit/text_encoder", #sudfolder="text_encoder", quantization_config=BNB_CONFIG, torch_dtype=dtype ) pipe = Flux2KleinPipeline.from_pretrained( "FLUX.2-9B-bnb-4bit", torch_dtype=dtype, transformer=transformer, text_encoder=text_encoder, ) #pipe.enable_vae_slicing() pipe.to("cuda") img = pipe( prompt=prompt, height=height, width=width, guidance_scale=guidance_scale, num_inference_steps=steps, generator=torch.Generator(device="cuda").manual_seed(seed), ).images[0] output = "output/flux2_beauty2.png" os.makedirs(os.path.dirname(output) or ".", exist_ok=True) img.save(output) #pipe.save_pretrained('./FLUX.2-lightning')