| import torch |
| from nunchaku.utils import get_gpu_memory, get_precision |
| from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel |
|
|
| class QwenImageBackend: |
| def __init__(self, model_id, optimized_model_path=None): |
| self.model_id = model_id |
| self.optimized_model_path = optimized_model_path |
| self.pipeline = None |
| self.rank = 32 |
|
|
| def load(self): |
| print(f"Loading QwenImageBackend from {self.model_id}...") |
| |
| import math |
| from diffusers import FlowMatchEulerDiscreteScheduler |
| scheduler_config = { |
| "base_image_seq_len": 256, |
| "base_shift": math.log(3), |
| "invert_sigmas": False, |
| "max_image_seq_len": 8192, |
| "max_shift": math.log(3), |
| "num_train_timesteps": 1000, |
| "shift": 1.0, |
| "shift_terminal": None, |
| "stochastic_sampling": False, |
| "time_shift_type": "exponential", |
| "use_beta_sigmas": False, |
| "use_dynamic_shifting": True, |
| "use_exponential_sigmas": False, |
| "use_karras_sigmas": False, |
| } |
| scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) |
|
|
| |
| print(f"Loading NunchakuQwenImageTransformer2DModel from {self.optimized_model_path}...") |
| transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(self.optimized_model_path) |
|
|
| |
| from diffusers import QwenImagePipeline |
| pipeline = QwenImagePipeline.from_pretrained( |
| self.model_id, |
| transformer=transformer, |
| scheduler=scheduler, |
| torch_dtype=torch.bfloat16, |
| ) |
|
|
| |
| if get_gpu_memory() > 18: |
| print("GPU memory > 18GB, using cpu offload") |
| pipeline.enable_model_cpu_offload() |
| else: |
| print("GPU memory <= 18GB, using per-layer offloading for low VRAM") |
| transformer.set_offload(True, use_pin_memory=False, num_blocks_on_gpu=1) |
| pipeline._exclude_from_cpu_offload.append("transformer") |
| pipeline.enable_sequential_cpu_offload() |
|
|
| self.pipeline = pipeline |
| |
| return self.pipeline, self.pipeline |
|
|