How to run in Python uisng diffusers (Solution Included!!!)

#4
by Mohan-diffuser - opened

I ran into this error repeatedly: RuntimeError: shape '[1, 32, 52, 2, 78, 2]' is invalid for input of size 259584 After investigating, I confirmed that the input channel dimension did not match what the Diffusers FluxKONtextPipeline expects. A small but critical adjustment fixes the issue:
pipe.transformer.config.in_channels = 64 # This is essential, otherwise you will encounter a shape mismatch error

import torch
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
from diffusers import FluxKontextPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
from transformers import T5EncoderModel

ckpt_path = (
    "https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF/blob/main/flux1-kontext-dev-Q2_K.gguf"
)
transformer = FluxTransformer2DModel.from_single_file(
    ckpt_path,
    quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
    torch_dtype=torch.bfloat16,
)

quant_config = TransformersBitsAndBytesConfig(load_in_4bit=True,)

text_encoder_2_4bit = T5EncoderModel.from_pretrained(
    "black-forest-labs/FLUX.1-dev",
    subfolder="text_encoder_2",
    quantization_config=quant_config,
    torch_dtype=torch.bfloat16,
)


pipe = FluxKontextPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-Kontext-dev",
    transformer=transformer,
    text_encoder_2= text_encoder_2_4bit,
    torch_dtype=torch.bfloat16,
)
pipe.enable_model_cpu_offload()

pipe.transformer.config.in_channels = 64 # This is super important ortherwise you will get an error for shape mismatch

from diffusers.utils import load_image
from PIL import Image
import matplotlib.pyplot as plt

input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png")
image = pipe(
  image=input_image,
  prompt="Add a hat to the cat",
  guidance_scale=2.5
).images[0]

Sign up or log in to comment