How to run in Python uisng diffusers (Solution Included!!!)
#4
by
Mohan-diffuser
- opened
I ran into this error repeatedly: RuntimeError: shape '[1, 32, 52, 2, 78, 2]' is invalid for input of size 259584 After investigating, I confirmed that the input channel dimension did not match what the Diffusers FluxKONtextPipeline expects. A small but critical adjustment fixes the issue:pipe.transformer.config.in_channels = 64 # This is essential, otherwise you will encounter a shape mismatch error
import torch
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
from diffusers import FluxKontextPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
from transformers import T5EncoderModel
ckpt_path = (
"https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF/blob/main/flux1-kontext-dev-Q2_K.gguf"
)
transformer = FluxTransformer2DModel.from_single_file(
ckpt_path,
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
torch_dtype=torch.bfloat16,
)
quant_config = TransformersBitsAndBytesConfig(load_in_4bit=True,)
text_encoder_2_4bit = T5EncoderModel.from_pretrained(
"black-forest-labs/FLUX.1-dev",
subfolder="text_encoder_2",
quantization_config=quant_config,
torch_dtype=torch.bfloat16,
)
pipe = FluxKontextPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Kontext-dev",
transformer=transformer,
text_encoder_2= text_encoder_2_4bit,
torch_dtype=torch.bfloat16,
)
pipe.enable_model_cpu_offload()
pipe.transformer.config.in_channels = 64 # This is super important ortherwise you will get an error for shape mismatch
from diffusers.utils import load_image
from PIL import Image
import matplotlib.pyplot as plt
input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png")
image = pipe(
image=input_image,
prompt="Add a hat to the cat",
guidance_scale=2.5
).images[0]