ProGamerGov
/

qwen-360-diffusion

+from PIL import Image
+import torch
+import numpy as np
+from transformers import Qwen2_5_VLForConditionalGeneration
+from diffusers import (
+    QwenImagePipeline,
+    QwenImageTransformer2DModel,
+    QwenImageInpaintPipeline,
+)
+from optimum.quanto import quantize, qint8, freeze
+prompt = (
+    "equirectangular, a woman and a man sitting at a cafe, the woman has red hair "
+    "and she's wearing purple sweater with a black scarf and a white hat, the man "
+    "is sitting on the other side of the table and he's wearing a white shirt with "
+    "a purple scarf and red hat, both of them are sipping their coffee while in the "
+    "table there's some cake slices on their respective plates, each with forks and "
+    "knives at each side."
+)
+negative_prompt = ""
+output_filename = "qwen_int8.png"
+width, height = 2048, 1024
+true_cfg_scale = 4.0
+num_inference_steps = 25
+seed = 42
+lora_model_id = "ProGamerGov/qwen-360-diffusion"
+lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors"
+# Use the base fp16/bf16 model, not the nf4 variant
+model_id = "Qwen/Qwen-Image"
+torch_dtype = torch.bfloat16
+device = "cuda"
+fix_seam = True
+inpaint_strength, seam_width = 0.5, 0.10
+def shift_equirect(img):
+    """Horizontal 50% shift using torch.roll."""
+    t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0
+    t = torch.roll(t, shifts=(0, t.shape[2] // 2), dims=(1, 2))
+    return Image.fromarray((t.permute(1, 2, 0).numpy() * 255).astype(np.uint8))
+def create_seam_mask(w, h, frac=0.10):
+    """Create vertical seam mask as PIL Image (center seam)."""
+    mask = torch.zeros((h, w))
+    seam_w = max(1, int(w * frac))
+    c = w // 2
+    mask[:, c - seam_w // 2:c + seam_w // 2] = 1.0
+    return Image.fromarray((mask.numpy() * 255).astype("uint8"), "L")
+def load_pipeline(text_encoder, transformer, mode="t2i"):
+    pip_class = QwenImagePipeline if mode == "t2i" else QwenImageInpaintPipeline
+    pipe = pip_class.from_pretrained(
+        model_id,
+        transformer=transformer,
+        text_encoder=text_encoder,
+        torch_dtype=torch_dtype,
+        use_safetensors=True,
+        low_cpu_mem_usage=True,
+    )
+    pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
+    pipe.enable_model_cpu_offload()
+    pipe.enable_vae_tiling()
+    # This still works with the quantized transformer
+    return pipe
+def main():
+    # 1) Load and INT8-quantize transformer on CPU
+    transformer = QwenImageTransformer2DModel.from_pretrained(
+        model_id,
+        subfolder="transformer",
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+    )
+    quantize(transformer, weights=qint8)
+    freeze(transformer)
+    # 2) Load and INT8-quantize text encoder on CPU
+    text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        model_id,
+        subfolder="text_encoder",
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        device_map={"": "cpu"},  # keep it on CPU; offload will move as needed
+    )
+    quantize(text_encoder, weights=qint8)
+    freeze(text_encoder)
+    # 3) Build T2I pipeline
+    generator = torch.Generator(device=device).manual_seed(seed)
+    pipe = load_pipeline(text_encoder, transformer, mode="t2i")
+    # 4) First pass: base panorama
+    image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=width,
+        height=height,
+        num_inference_steps=num_inference_steps,
+        true_cfg_scale=true_cfg_scale,
+        generator=generator,
+    ).images[0]
+    image.save(output_filename)
+    # 5) Optional seam-fix pass using inpainting
+    if fix_seam:
+        del pipe
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        shifted = shift_equirect(image)  # roll 50% to expose seam
+        mask = create_seam_mask(width, height, frac=seam_width)
+        pipe = load_pipeline(text_encoder, transformer, mode="i2i")
+        image_fixed = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image=shifted,
+            mask_image=mask,
+            strength=inpaint_strength,
+            width=width,
+            height=height,
+            num_inference_steps=num_inference_steps,
+            true_cfg_scale=true_cfg_scale,
+            generator=generator,
+        ).images[0]
+        image_fixed = shift_equirect(image_fixed)
+        image_fixed.save(output_filename.replace(".png", "_seamfix.png"))
+if __name__ == "__main__":
+    main()

run_qwen_image_nf4.py CHANGED Viewed

@@ -16,8 +16,8 @@ true_cfg_scale = 4.0
 num_inference_steps = 25
 seed = 42
-lora_model_id = "jimmycarter/qwen-3d-epoch-7"
-lora_filename = "pytorch_lora_weights.safetensors"
 model_id = "diffusers/qwen-image-nf4"
 torch_dtype = torch.bfloat16

 num_inference_steps = 25
 seed = 42
+lora_model_id = "ProGamerGov/qwen-360-diffusion"
+lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors"
 model_id = "diffusers/qwen-image-nf4"
 torch_dtype = torch.bfloat16