ProGamerGov commited on
Commit
4f9093b
·
verified ·
1 Parent(s): 5a5a57c

Fix nf4 script and add int8 script

Browse files
Files changed (2) hide show
  1. run_qwen_image_int8.py +143 -0
  2. run_qwen_image_nf4.py +2 -2
run_qwen_image_int8.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import torch
3
+ import numpy as np
4
+
5
+ from transformers import Qwen2_5_VLForConditionalGeneration
6
+
7
+ from diffusers import (
8
+ QwenImagePipeline,
9
+ QwenImageTransformer2DModel,
10
+ QwenImageInpaintPipeline,
11
+ )
12
+
13
+ from optimum.quanto import quantize, qint8, freeze
14
+
15
+
16
+ prompt = (
17
+ "equirectangular, a woman and a man sitting at a cafe, the woman has red hair "
18
+ "and she's wearing purple sweater with a black scarf and a white hat, the man "
19
+ "is sitting on the other side of the table and he's wearing a white shirt with "
20
+ "a purple scarf and red hat, both of them are sipping their coffee while in the "
21
+ "table there's some cake slices on their respective plates, each with forks and "
22
+ "knives at each side."
23
+ )
24
+ negative_prompt = ""
25
+ output_filename = "qwen_int8.png"
26
+ width, height = 2048, 1024
27
+ true_cfg_scale = 4.0
28
+ num_inference_steps = 25
29
+ seed = 42
30
+
31
+ lora_model_id = "ProGamerGov/qwen-360-diffusion"
32
+ lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors"
33
+
34
+ # Use the base fp16/bf16 model, not the nf4 variant
35
+ model_id = "Qwen/Qwen-Image"
36
+ torch_dtype = torch.bfloat16
37
+ device = "cuda"
38
+
39
+ fix_seam = True
40
+ inpaint_strength, seam_width = 0.5, 0.10
41
+
42
+
43
+ def shift_equirect(img):
44
+ """Horizontal 50% shift using torch.roll."""
45
+ t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0
46
+ t = torch.roll(t, shifts=(0, t.shape[2] // 2), dims=(1, 2))
47
+ return Image.fromarray((t.permute(1, 2, 0).numpy() * 255).astype(np.uint8))
48
+
49
+
50
+ def create_seam_mask(w, h, frac=0.10):
51
+ """Create vertical seam mask as PIL Image (center seam)."""
52
+ mask = torch.zeros((h, w))
53
+ seam_w = max(1, int(w * frac))
54
+ c = w // 2
55
+ mask[:, c - seam_w // 2:c + seam_w // 2] = 1.0
56
+ return Image.fromarray((mask.numpy() * 255).astype("uint8"), "L")
57
+
58
+
59
+ def load_pipeline(text_encoder, transformer, mode="t2i"):
60
+ pip_class = QwenImagePipeline if mode == "t2i" else QwenImageInpaintPipeline
61
+ pipe = pip_class.from_pretrained(
62
+ model_id,
63
+ transformer=transformer,
64
+ text_encoder=text_encoder,
65
+ torch_dtype=torch_dtype,
66
+ use_safetensors=True,
67
+ low_cpu_mem_usage=True,
68
+ )
69
+ pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
70
+ pipe.enable_model_cpu_offload()
71
+ pipe.enable_vae_tiling()
72
+
73
+ # This still works with the quantized transformer
74
+ return pipe
75
+
76
+
77
+ def main():
78
+ # 1) Load and INT8-quantize transformer on CPU
79
+ transformer = QwenImageTransformer2DModel.from_pretrained(
80
+ model_id,
81
+ subfolder="transformer",
82
+ torch_dtype=torch_dtype,
83
+ low_cpu_mem_usage=True,
84
+ )
85
+ quantize(transformer, weights=qint8)
86
+ freeze(transformer)
87
+
88
+ # 2) Load and INT8-quantize text encoder on CPU
89
+ text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
90
+ model_id,
91
+ subfolder="text_encoder",
92
+ torch_dtype=torch_dtype,
93
+ low_cpu_mem_usage=True,
94
+ device_map={"": "cpu"}, # keep it on CPU; offload will move as needed
95
+ )
96
+ quantize(text_encoder, weights=qint8)
97
+ freeze(text_encoder)
98
+
99
+ # 3) Build T2I pipeline
100
+ generator = torch.Generator(device=device).manual_seed(seed)
101
+ pipe = load_pipeline(text_encoder, transformer, mode="t2i")
102
+
103
+ # 4) First pass: base panorama
104
+ image = pipe(
105
+ prompt=prompt,
106
+ negative_prompt=negative_prompt,
107
+ width=width,
108
+ height=height,
109
+ num_inference_steps=num_inference_steps,
110
+ true_cfg_scale=true_cfg_scale,
111
+ generator=generator,
112
+ ).images[0]
113
+
114
+ image.save(output_filename)
115
+
116
+ # 5) Optional seam-fix pass using inpainting
117
+ if fix_seam:
118
+ del pipe
119
+ if torch.cuda.is_available():
120
+ torch.cuda.empty_cache()
121
+
122
+ shifted = shift_equirect(image) # roll 50% to expose seam
123
+ mask = create_seam_mask(width, height, frac=seam_width)
124
+
125
+ pipe = load_pipeline(text_encoder, transformer, mode="i2i")
126
+ image_fixed = pipe(
127
+ prompt=prompt,
128
+ negative_prompt=negative_prompt,
129
+ image=shifted,
130
+ mask_image=mask,
131
+ strength=inpaint_strength,
132
+ width=width,
133
+ height=height,
134
+ num_inference_steps=num_inference_steps,
135
+ true_cfg_scale=true_cfg_scale,
136
+ generator=generator,
137
+ ).images[0]
138
+ image_fixed = shift_equirect(image_fixed)
139
+ image_fixed.save(output_filename.replace(".png", "_seamfix.png"))
140
+
141
+
142
+ if __name__ == "__main__":
143
+ main()
run_qwen_image_nf4.py CHANGED
@@ -16,8 +16,8 @@ true_cfg_scale = 4.0
16
  num_inference_steps = 25
17
  seed = 42
18
 
19
- lora_model_id = "jimmycarter/qwen-3d-epoch-7"
20
- lora_filename = "pytorch_lora_weights.safetensors"
21
 
22
  model_id = "diffusers/qwen-image-nf4"
23
  torch_dtype = torch.bfloat16
 
16
  num_inference_steps = 25
17
  seed = 42
18
 
19
+ lora_model_id = "ProGamerGov/qwen-360-diffusion"
20
+ lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors"
21
 
22
  model_id = "diffusers/qwen-image-nf4"
23
  torch_dtype = torch.bfloat16