Spaces:

soye
/

VISTA

Sleeping

App Files Files Community

ssoxye commited on 19 days ago

Commit

b5130ce

1 Parent(s): f4ac6fe

Fix VAE dtype mismatch (fp16)

Browse files

Files changed (1) hide show

app.py +21 -9

app.py CHANGED Viewed

@@ -211,6 +211,9 @@ def save_cropped(imgs, out_path: str):
     imageio.imsave(out_path, out)
 @lru_cache(maxsize=1)
 def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, str, torch.dtype]:
     device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -218,18 +221,28 @@ def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, s
     print(f"[PIPE] device={device}, dtype={dtype}", flush=True)
     cn_kwargs = dict(torch_dtype=dtype, use_safetensors=True)
     if dtype == torch.float16:
         cn_kwargs["variant"] = "fp16"
     controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, **cn_kwargs).to(device)
-    # ---------------------------------------------------------
-    # ✅ Fix 1) Force a consistent dtype for VAE to avoid:
-    # RuntimeError: Input type (c10::Half) and bias type (float) should be the same
-    # ---------------------------------------------------------
     pipe_kwargs = dict(
         controlnet=controlnet,
         use_safetensors=True,
         torch_dtype=dtype,
         add_watermarker=False,
@@ -242,15 +255,13 @@ def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, s
         **pipe_kwargs,
     ).to(device)
-    # Force VAE params/bias dtype to match the pipeline dtype
     if device == "cuda":
         try:
             pipe.vae.to(dtype=dtype)
-            # Some pipelines keep VAE in fp32 on purpose; your custom pipeline doesn't
-            # auto-cast inputs to vae.dtype before encode, so disable upcast if present.
             if hasattr(pipe.vae, "config") and hasattr(pipe.vae.config, "force_upcast"):
                 pipe.vae.config.force_upcast = False
-            print(f"[PIPE] VAE casted to {dtype}. force_upcast set to False (if supported).", flush=True)
         except Exception as e:
             print("[PIPE] VAE dtype cast failed:", repr(e), flush=True)
@@ -264,6 +275,7 @@ def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, s
     return pipe, device, dtype
 def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     global H, W
     pipe, device, _dtype = get_pipe_and_device()

     imageio.imsave(out_path, out)
+from diffusers import UniPCMultistepScheduler, AutoencoderKL
+# (위 import 라인에 AutoencoderKL 추가)
 @lru_cache(maxsize=1)
 def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, str, torch.dtype]:
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"[PIPE] device={device}, dtype={dtype}", flush=True)
+    # ControlNet
     cn_kwargs = dict(torch_dtype=dtype, use_safetensors=True)
     if dtype == torch.float16:
         cn_kwargs["variant"] = "fp16"
     controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, **cn_kwargs).to(device)
+    # ✅ VAE를 먼저 safetensors로 강제 로드해서 주입 (bin 찾는 경로 우회)
+    vae_kwargs = dict(
+        torch_dtype=dtype,
+        use_safetensors=True,
+        subfolder="vae",
+    )
+    # SDXL base가 fp16 variant를 제공하는 경우에만 도움이 됨 (없어도 동작)
+    if dtype == torch.float16:
+        vae_kwargs["variant"] = "fp16"
+    vae = AutoencoderKL.from_pretrained(BASE_MODEL_ID, **vae_kwargs).to(device)
+    # Pipeline
     pipe_kwargs = dict(
         controlnet=controlnet,
+        vae=vae,  # ✅ 주입
         use_safetensors=True,
         torch_dtype=dtype,
         add_watermarker=False,
         **pipe_kwargs,
     ).to(device)
+    # ✅ (이전 dtype mismatch 방지) VAE dtype 강제 일치
     if device == "cuda":
         try:
             pipe.vae.to(dtype=dtype)
             if hasattr(pipe.vae, "config") and hasattr(pipe.vae.config, "force_upcast"):
                 pipe.vae.config.force_upcast = False
+            print(f"[PIPE] VAE casted to {dtype}. force_upcast False (if supported).", flush=True)
         except Exception as e:
             print("[PIPE] VAE dtype cast failed:", repr(e), flush=True)
     return pipe, device, dtype
 def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     global H, W
     pipe, device, _dtype = get_pipe_and_device()