imageToImage cpu_offload error

by ZKong - opened Jan 15

•

RuntimeError: Input type (CUDABFloat16Type) and weight type (CPUBFloat16Type) should be the same

    image = pipe(
            ^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\utils\_contextlib.py", line 120, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\diffusers\pipelines\glm_image\pipeline_glm_image.py", line 621, in __call__
    prior_token_ids, prior_token_image_ids = self.generate_prior_tokens(
                                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\diffusers\pipelines\glm_image\pipeline_glm_image.py", line 295, in generate_prior_tokens
    prior_token_image_embed = self.vision_language_encoder.get_image_features(
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 1373, in get_image_features
    return self.model.get_image_features(pixel_values, image_grid_thw)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 1175, in get_image_features
    image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1775, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1786, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 703, in forward
    hidden_states = self.patch_embed(pixel_values)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1775, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1786, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 191, in forward
    hidden_states = self.proj(hidden_states.to(dtype=target_dtype)).view(-1, self.embed_dim)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1775, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1786, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\conv.py", line 548, in forward
    return self._conv_forward(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\conv.py", line 543, in _conv_forward
    return F.conv2d(
           ^^^^^^^^^
RuntimeError: Input type (CUDABFloat16Type) and weight type (CPUBFloat16Type) should be the same

import torch
import diffusers
from sdnq import SDNQConfig # import sdnq to register it into diffusers and transformers
from sdnq.common import use_torch_compile as triton_is_available
from sdnq.loader import apply_sdnq_options_to_model
from PIL import Image

pipe = diffusers.GlmImagePipeline.from_pretrained("m:\GLM-Image-SDNQ-4bit-dynamic", torch_dtype=torch.bfloat16, device_map="cuda")
# Enable INT8 MatMul for AMD, Intel ARC and Nvidia GPUs:
#if triton_is_available and (torch.cuda.is_available() or torch.xpu.is_available()):
    #pipe.transformer = apply_sdnq_options_to_model(pipe.transformer, use_quantized_matmul=True)
    # pipe.transformer = torch.compile(pipe.transformer) # optional for faster speeds
pipe.reset_device_map()
pipe.enable_model_cpu_offload()
img1 = "img1.jpg"
img2 = "img2.jpg"
#prompt = "Replace the background of the snow forest with an underground station featuring an automatic escalator."
image1 = Image.open(img1).convert("RGB")
image2 = Image.open(img2).convert("RGB")
prompt = "提取两张图片的男女主体，生成他们的情侣照"
image = pipe(
    prompt=prompt,
    image=[image1,image2],
    height=32 * 32,
    width=36 * 32,
    num_inference_steps=50,
    guidance_scale=1.5,
    generator=torch.Generator(device="cuda").manual_seed(42),

).images[0]

image.save("output_t2i-sdnq.png")

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment