Spaces:

soye
/

VISTA

Sleeping

App Files Files Community

ssoxye commited on Jan 1

Commit

61345be

1 Parent(s): da21aff

Fix diffusers3 import + lazy assets

Browse files

Files changed (1) hide show

app.py +53 -19

app.py CHANGED Viewed

@@ -1,4 +1,16 @@
 import os
 import tempfile
 from dataclasses import dataclass
 from functools import lru_cache
@@ -14,6 +26,11 @@ from PIL import Image, ImageOps
 from huggingface_hub import hf_hub_download
 from diffusers import UniPCMultistepScheduler
 from diffusers3.models.controlnet import ControlNetModel
 from diffusers3.pipelines.controlnet.pipeline_controlnet_sd_xl_img2img_img import (
     StableDiffusionXLControlNetImg2ImgPipeline,
@@ -30,7 +47,7 @@ from preprocess.simple_extractor import run as run_simple_extractor
 BASE_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
 CONTROLNET_ID = "diffusers/controlnet-depth-sdxl-1.0"
-# 네 assets dataset repo (1번에서 만든 것)
 ASSETS_REPO = os.getenv("ASSETS_REPO", "soye/VISTA_assets")
 ASSETS_REPO_TYPE = "dataset"  # dataset repo로 올렸으니
@@ -50,14 +67,29 @@ def asset_path(relpath: str) -> str:
     )
-# image encoder는 "폴더 경로"가 필요하니,
-# model/config를 둘 다 다운로드 후 같은 폴더를 가리키게 함.
-_IMAGE_ENCODER_WEIGHT = asset_path("image_encoder/model.safetensors")
-_IMAGE_ENCODER_CONFIG = asset_path("image_encoder/config.json")
-IMAGE_ENCODER_DIR = os.path.dirname(_IMAGE_ENCODER_WEIGHT)
-IP_CKPT = asset_path("ip_adapter/ip-adapter_sdxl_vit-h.bin")
-SCHP_CKPT = asset_path("preprocess_ckpts/exp-schp-201908301523-atr.pth")
 DEFAULT_STEPS = 40
 DEBUG_SAVE = False
@@ -84,10 +116,8 @@ def _imread_or_raise(path: str, flag=cv2.IMREAD_COLOR):
 def compute_hw_from_person(person_path: str):
     """
-    네 코드와 동일 개념:
     - height=1024 고정, aspect 유지로 W 계산
-    단, W가 1024보다 커지면 padding이 음수가 되므로(원본 코드 취약점),
-    demo 안정성 위해 W를 1024로 cap.
     """
     img = _imread_or_raise(person_path)
     orig_h, orig_w = img.shape[:2]
@@ -186,6 +216,8 @@ def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, s
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if device == "cuda" else torch.float32
     cn_kwargs = dict(torch_dtype=dtype, use_safetensors=True)
     if dtype == torch.float16:
         cn_kwargs["variant"] = "fp16"
@@ -203,8 +235,8 @@ def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, s
     pipe.enable_attention_slicing()
     try:
         pipe.enable_xformers_memory_efficient_attention()
-    except Exception:
-        pass
     return pipe, device, dtype
@@ -213,13 +245,16 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     global H, W
     pipe, device, _dtype = get_pipe_and_device()
     H, W = compute_hw_from_person(paths.person_path)
     # parsing extractor (원본 호출 형태 유지)
     res = run_simple_extractor(
         category="Upper-clothes",
         input_path=os.path.abspath(paths.person_path),
-        model_restore=SCHP_CKPT,
     )
     parsing_img = res["images"][0] if res.get("images") else None
     if parsing_img is None:
@@ -253,8 +288,7 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     depth_map = make_depth(paths.depth_path)
-    # garment / garment_mask: 너 코드 흐름상 person에서 만들어도 되지만
-    # 여기서는 parsing_img 기반 사이즈/패딩만 맞춰서 전달
     garment_pil = person_pil.copy()
     gm = np.array(parsing_img.convert("L"), dtype=np.uint8)
@@ -271,8 +305,8 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     ip_model = IPAdapterXL(
         pipe,
-        IMAGE_ENCODER_DIR,
-        IP_CKPT,
         device,
         mask_pil,
         person_pil,
@@ -358,4 +392,4 @@ with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
 demo.queue()
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import sys
+# ---------------------------------------------------------
+# 0) Make sure local packages (diffusers3, preprocess, etc.) are importable on HF Spaces
+# ---------------------------------------------------------
+ROOT = os.path.dirname(os.path.abspath(__file__))
+if ROOT not in sys.path:
+    sys.path.insert(0, ROOT)
+print("[BOOT] ROOT =", ROOT, flush=True)
+print("[BOOT] sys.path[:5] =", sys.path[:5], flush=True)
 import tempfile
 from dataclasses import dataclass
 from functools import lru_cache
 from huggingface_hub import hf_hub_download
 from diffusers import UniPCMultistepScheduler
+# Show where diffusers3 is imported from (helps diagnose import collisions on Spaces)
+import diffusers3
+print("[BOOT] diffusers3 loaded from:", getattr(diffusers3, "__file__", "<?>"), flush=True)
 from diffusers3.models.controlnet import ControlNetModel
 from diffusers3.pipelines.controlnet.pipeline_controlnet_sd_xl_img2img_img import (
     StableDiffusionXLControlNetImg2ImgPipeline,
 BASE_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
 CONTROLNET_ID = "diffusers/controlnet-depth-sdxl-1.0"
+# 네 assets dataset repo (가중치 저장소)
 ASSETS_REPO = os.getenv("ASSETS_REPO", "soye/VISTA_assets")
 ASSETS_REPO_TYPE = "dataset"  # dataset repo로 올렸으니
     )
+@lru_cache(maxsize=1)
+def get_assets():
+    """
+    Lazily downloads required assets on first use.
+    Returns:
+        (image_encoder_dir, ip_ckpt_path, schp_ckpt_path)
+    """
+    print("[ASSETS] Downloading assets from:", ASSETS_REPO, flush=True)
+    # Image encoder folder is needed by IPAdapterXL
+    image_encoder_weight = asset_path("image_encoder/model.safetensors")
+    _ = asset_path("image_encoder/config.json")  # ensure config exists locally
+    image_encoder_dir = os.path.dirname(image_encoder_weight)
+    ip_ckpt = asset_path("ip_adapter/ip-adapter_sdxl_vit-h.bin")
+    schp_ckpt = asset_path("preprocess_ckpts/exp-schp-201908301523-atr.pth")
+    print("[ASSETS] image_encoder_dir =", image_encoder_dir, flush=True)
+    print("[ASSETS] ip_ckpt =", ip_ckpt, flush=True)
+    print("[ASSETS] schp_ckpt =", schp_ckpt, flush=True)
+    return image_encoder_dir, ip_ckpt, schp_ckpt
 DEFAULT_STEPS = 40
 DEBUG_SAVE = False
 def compute_hw_from_person(person_path: str):
     """
     - height=1024 고정, aspect 유지로 W 계산
+    - demo 안정성 위해 W를 1024로 cap.
     """
     img = _imread_or_raise(person_path)
     orig_h, orig_w = img.shape[:2]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if device == "cuda" else torch.float32
+    print(f"[PIPE] device={device}, dtype={dtype}", flush=True)
     cn_kwargs = dict(torch_dtype=dtype, use_safetensors=True)
     if dtype == torch.float16:
         cn_kwargs["variant"] = "fp16"
     pipe.enable_attention_slicing()
     try:
         pipe.enable_xformers_memory_efficient_attention()
+    except Exception as e:
+        print("[PIPE] xformers not enabled:", repr(e), flush=True)
     return pipe, device, dtype
     global H, W
     pipe, device, _dtype = get_pipe_and_device()
+    # lazy assets download here (NOT at import time)
+    image_encoder_dir, ip_ckpt, schp_ckpt = get_assets()
     H, W = compute_hw_from_person(paths.person_path)
     # parsing extractor (원본 호출 형태 유지)
     res = run_simple_extractor(
         category="Upper-clothes",
         input_path=os.path.abspath(paths.person_path),
+        model_restore=schp_ckpt,
     )
     parsing_img = res["images"][0] if res.get("images") else None
     if parsing_img is None:
     depth_map = make_depth(paths.depth_path)
+    # garment / garment_mask
     garment_pil = person_pil.copy()
     gm = np.array(parsing_img.convert("L"), dtype=np.uint8)
     ip_model = IPAdapterXL(
         pipe,
+        image_encoder_dir,
+        ip_ckpt,
         device,
         mask_pil,
         person_pil,
 demo.queue()
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)