Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
klein.py +8 -22
klein2.py +62 -0
klein3.py +35 -0
ltx.py +91 -0
suji.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+suji.jpg filter=lfs diff=lfs merge=lfs -text

klein.py CHANGED Viewed

@@ -1,36 +1,22 @@
 import torch
 from diffusers import Flux2KleinPipeline
-from transformers import BitsAndBytesConfig
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",          # BEST quality/speed
-    bnb_4bit_compute_dtype=torch.bfloat16,  # fast on Ampere+
-    bnb_4bit_use_double_quant=True,     # lower VRAM
-)
-device = "cuda"
-dtype = torch.bfloat16
-pipe = Flux2KleinPipeline.from_pretrained("./FLUX.2-9B-bnb-4bit", torch_dtype=dtype)
-"""
 pipe = Flux2KleinPipeline.from_pretrained(
-    "black-forest-labs/FLUX.2-klein-4B",
     torch_dtype=torch.bfloat16,
-    device_map="auto",                 # REQUIRED
-    quantization_config=bnb_config,    # APPLY 4-bit
 )
-"""
-pipe.to("cuda")
 #pipe.enable_model_cpu_offload()  # save some VRAM by offloading the model to CPU
 from PIL import Image
 init_image = Image.open("suji.jpg").convert("RGB")
 #prompt = "an very beautiful sexy korean kpop young woman with white bikini is smiling on the waikiki beach. hiqh quality realistic photo."# pixar 3d style"
 #prompt = "beautiful woman in the beach holding plate with Circulus "
-prompt = "A beautiful korean kpop young woman with white red sexy dress"
 image = pipe(
     prompt=prompt,
     image=init_image,
@@ -38,6 +24,6 @@ image = pipe(
     width=1024,
     guidance_scale=1.0,
     num_inference_steps=4,
-    generator=torch.Generator(device=device).manual_seed(0)
 ).images[0]
-image.save("./output/flux_suji6.png")

 import torch
 from diffusers import Flux2KleinPipeline
 pipe = Flux2KleinPipeline.from_pretrained(
+    "./FLUX.2-9B-bnb-4bit",
     torch_dtype=torch.bfloat16,
+    device_map="cuda",                 # REQUIRED
 )
+#pipe.to("cuda")
 #pipe.enable_model_cpu_offload()  # save some VRAM by offloading the model to CPU
 from PIL import Image
 init_image = Image.open("suji.jpg").convert("RGB")
 #prompt = "an very beautiful sexy korean kpop young woman with white bikini is smiling on the waikiki beach. hiqh quality realistic photo."# pixar 3d style"
 #prompt = "beautiful woman in the beach holding plate with Circulus "
+prompt = "피부가 드러나는 흰색 드레스를 입었다." #하얀색의 섹시한 드레스를 입은 아름다운 한국 여성"
 image = pipe(
     prompt=prompt,
     image=init_image,
     width=1024,
     guidance_scale=1.0,
     num_inference_steps=4,
+    generator=torch.Generator(device="cuda").manual_seed(0)
 ).images[0]
+image.save("./output/flux_suji10.png")

klein2.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+import torch
+from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel
+from transformers import Qwen3ForCausalLM, BitsAndBytesConfig, AutoTokenizer
+import math
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+torch.backends.cudnn.benchmark = True
+BNB_CONFIG = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16 ,
+    bnb_4bit_use_double_quant=True,
+)
+model_path = f"./FLUX.2-klein-9B"
+prompt = "A beautiful korean kpop young woman  holding a sign that says hello world"
+height, width, guidance_scale, steps, seed = 1024, 1024, 4.0, 4, 0
+dtype = torch.bfloat16
+transformer = Flux2Transformer2DModel.from_pretrained(
+    "./FLUX.2-9B-bnb-4bit/transformer",
+    #sudfolder="transformer",
+    quantization_config=BNB_CONFIG,
+    torch_dtype=dtype,
+    #use_safetensors=False,
+)
+text_encoder = Qwen3ForCausalLM.from_pretrained(
+     "./FLUX.2-9B-bnb-4bit/text_encoder",
+    #sudfolder="text_encoder",
+    quantization_config=BNB_CONFIG,
+    torch_dtype=dtype
+)
+pipe = Flux2KleinPipeline.from_pretrained(
+    "FLUX.2-9B-bnb-4bit",
+    torch_dtype=dtype,
+    transformer=transformer,
+    text_encoder=text_encoder,
+)
+#pipe.enable_vae_slicing()
+pipe.to("cuda")
+img = pipe(
+    prompt=prompt,
+    height=height,
+    width=width,
+    guidance_scale=guidance_scale,
+    num_inference_steps=steps,
+    generator=torch.Generator(device="cuda").manual_seed(seed),
+).images[0]
+output = "output/flux2_beauty2.png"
+os.makedirs(os.path.dirname(output) or ".", exist_ok=True)
+img.save(output)
+#pipe.save_pretrained('./FLUX.2-lightning')

klein3.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+import diffusers
+from sdnq import SDNQConfig # import sdnq to register it into diffusers and transformers
+from sdnq.common import use_torch_compile as triton_is_available
+from sdnq.loader import apply_sdnq_options_to_model
+pipe = diffusers.Flux2KleinPipeline.from_pretrained("Disty0/FLUX.2-klein-9B-SDNQ-4bit-dynamic-svd-r32", torch_dtype=torch.bfloat16)
+# Enable INT8 MatMul for AMD, Intel ARC and Nvidia GPUs:
+if triton_is_available and (torch.cuda.is_available() or torch.xpu.is_available()):
+    pipe.transformer = apply_sdnq_options_to_model(pipe.transformer, use_quantized_matmul=True)
+    pipe.text_encoder = apply_sdnq_options_to_model(pipe.text_encoder, use_quantized_matmul=True)
+    # pipe.transformer = torch.compile(pipe.transformer) # optional for faster speeds
+pipe.to("cuda")
+#pipe.enable_model_cpu_offload()
+from PIL import Image
+init_image = Image.open("suji.jpg").convert("RGB")
+prompt = "A beautiful korean woman holding a sign that says Circulus Inc. comics style."
+image = pipe(
+    image=init_image,
+    prompt=prompt,
+    height=1024,
+    width=1024,
+    guidance_scale=1.0,
+    num_inference_steps=4,
+    generator=torch.manual_seed(0)
+).images[0]
+image.save("flux-klein-sdnq-4bit-dynamic-svd-r32_d.png")

ltx.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+from diffusers import LTX2Pipeline, LTX2ImageToVideoPipeline, LTX2VideoTransformer3DModel
+from diffusers.pipelines.ltx2.export_utils import encode_video
+from diffusers.utils import load_image
+from transformers import Qwen3ForCausalLM, BitsAndBytesConfig, AutoTokenizer
+import math
+import numpy as np
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+torch.backends.cudnn.benchmark = True
+BNB_CONFIG = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16 ,
+    bnb_4bit_use_double_quant=True,
+)
+from diffusers import LTX2Pipeline
+from diffusers.pipelines.ltx2.export_utils import encode_video
+from transformers import Gemma3ForConditionalGeneration
+repo= "Lightricks/LTX-2"
+text_encoder = Gemma3ForConditionalGeneration.from_pretrained(
+    repo,
+    subfolder="text_encoder",
+    quantization_config=BNB_CONFIG
+)
+### transformer
+transformer_4bit = LTX2VideoTransformer3DModel.from_pretrained(
+    repo,
+    subfolder="transformer",
+    quantization_config=BNB_CONFIG
+)
+pipe = LTX2Pipeline.from_pretrained(
+    repo,
+    torch_dtype=torch.bfloat16,
+    transformer=transformer_4bit,
+    text_encoder=text_encoder,
+)
+pipe.vae.to(dtype=torch.bfloat16)
+pipe.connectors.to(dtype=torch.bfloat16)
+pipe.audio_vae.to(dtype=torch.bfloat16)
+pipe.vocoder.to(dtype=torch.bfloat16)
+pipe.to("cuda", dtype=torch.bfloat16)
+image = load_image(
+    "./suji.jpg"
+)
+prompt = "A very beautiful korean kpop young woman is walking waikiki beach"
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+frame_rate = 24.0
+with torch.autocast("cuda", dtype=torch.bfloat16):
+    video, audio = pipe(
+        #image=image,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=768,
+        height=512,
+        num_frames=121,
+        frame_rate=frame_rate,
+        num_inference_steps=40,
+        guidance_scale=4.0,
+        output_type="np",
+        return_dict=False,
+    )
+video = np.nan_to_num(video, nan=0.0)
+video = np.clip(video, 0, 1)
+video = (video * 255).round().astype("uint8")
+video = torch.from_numpy(video)
+encode_video(
+    video[0],
+    fps=frame_rate,
+    audio=audio[0].float().cpu(),
+    audio_sample_rate=pipe.vocoder.config.output_sampling_rate,  # should be 24000
+    output_path="video2.mp4",
+)
+pipe.save_pretrained("./LTX-2-bnb-4bit")

suji.jpg ADDED Viewed

Git LFS Details

SHA256: 052cd84a4a6a6baf58eb9bf7318b7428358f0b1a587768144aa16f67d34fdd1e
Pointer size: 131 Bytes
Size of remote file: 235 kB