catplusplus's picture
Upload folder using huggingface_hub
1e103b7 verified
import torch
from nunchaku.utils import get_gpu_memory, get_precision
from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
class QwenImageBackend:
def __init__(self, model_id, optimized_model_path=None):
self.model_id = model_id
self.optimized_model_path = optimized_model_path
self.pipeline = None
self.rank = 32 # default rank as per example
def load(self):
print(f"Loading QwenImageBackend from {self.model_id}...")
# Scheduler config (same as QwenBackend)
import math
from diffusers import FlowMatchEulerDiscreteScheduler
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3),
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3),
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None,
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
# Load transformer (optimized model)
print(f"Loading NunchakuQwenImageTransformer2DModel from {self.optimized_model_path}...")
transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(self.optimized_model_path)
# Load T2I pipeline
from diffusers import QwenImagePipeline
pipeline = QwenImagePipeline.from_pretrained(
self.model_id,
transformer=transformer,
scheduler=scheduler,
torch_dtype=torch.bfloat16,
)
# Offloading logic (same as QwenBackend)
if get_gpu_memory() > 18:
print("GPU memory > 18GB, using cpu offload")
pipeline.enable_model_cpu_offload()
else:
print("GPU memory <= 18GB, using per-layer offloading for low VRAM")
transformer.set_offload(True, use_pin_memory=False, num_blocks_on_gpu=1)
pipeline._exclude_from_cpu_offload.append("transformer")
pipeline.enable_sequential_cpu_offload()
self.pipeline = pipeline
# For edit endpoint we reuse the same pipeline (ignores image)
return self.pipeline, self.pipeline