imageToImage cpu_offload error
#1
by
ZKong
- opened
RuntimeError: Input type (CUDABFloat16Type) and weight type (CPUBFloat16Type) should be the same
image = pipe(
^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\utils\_contextlib.py", line 120, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\diffusers\pipelines\glm_image\pipeline_glm_image.py", line 621, in __call__
prior_token_ids, prior_token_image_ids = self.generate_prior_tokens(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\diffusers\pipelines\glm_image\pipeline_glm_image.py", line 295, in generate_prior_tokens
prior_token_image_embed = self.vision_language_encoder.get_image_features(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 1373, in get_image_features
return self.model.get_image_features(pixel_values, image_grid_thw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 1175, in get_image_features
image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 703, in forward
hidden_states = self.patch_embed(pixel_values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\transformers\models\glm_image\modeling_glm_image.py", line 191, in forward
hidden_states = self.proj(hidden_states.to(dtype=target_dtype)).view(-1, self.embed_dim)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\conv.py", line 548, in forward
return self._conv_forward(input, self.weight, self.bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "M:\ComfyUI\312_cu128\python_embeded\Lib\site-packages\torch\nn\modules\conv.py", line 543, in _conv_forward
return F.conv2d(
^^^^^^^^^
RuntimeError: Input type (CUDABFloat16Type) and weight type (CPUBFloat16Type) should be the same
import torch
import diffusers
from sdnq import SDNQConfig # import sdnq to register it into diffusers and transformers
from sdnq.common import use_torch_compile as triton_is_available
from sdnq.loader import apply_sdnq_options_to_model
from PIL import Image
pipe = diffusers.GlmImagePipeline.from_pretrained("m:\GLM-Image-SDNQ-4bit-dynamic", torch_dtype=torch.bfloat16, device_map="cuda")
# Enable INT8 MatMul for AMD, Intel ARC and Nvidia GPUs:
#if triton_is_available and (torch.cuda.is_available() or torch.xpu.is_available()):
#pipe.transformer = apply_sdnq_options_to_model(pipe.transformer, use_quantized_matmul=True)
# pipe.transformer = torch.compile(pipe.transformer) # optional for faster speeds
pipe.reset_device_map()
pipe.enable_model_cpu_offload()
img1 = "img1.jpg"
img2 = "img2.jpg"
#prompt = "Replace the background of the snow forest with an underground station featuring an automatic escalator."
image1 = Image.open(img1).convert("RGB")
image2 = Image.open(img2).convert("RGB")
prompt = "提取两张图片的男女主体,生成他们的情侣照"
image = pipe(
prompt=prompt,
image=[image1,image2],
height=32 * 32,
width=36 * 32,
num_inference_steps=50,
guidance_scale=1.5,
generator=torch.Generator(device="cuda").manual_seed(42),
).images[0]
image.save("output_t2i-sdnq.png")