Spaces:

soye
/

VISTA

Sleeping

App Files Files Community

ssoxye commited on Jan 3

Commit

c94fce0

1 Parent(s): 1a497a0

update sketch input

Browse files

Files changed (1) hide show

app.py +120 -41

app.py CHANGED Viewed

@@ -90,7 +90,7 @@ W: Optional[int] = None
 @dataclass
 class Paths:
     person_path: str
-    depth_path: str
     style_path: str
     output_path: str
@@ -101,6 +101,7 @@ def _imread_or_raise(path: str, flag=cv2.IMREAD_COLOR):
         raise FileNotFoundError(f"cv2.imread failed: {path} (exists={os.path.exists(path)})")
     return img
 def apply_parsing_white_mask_to_person_cv2(
     person_pil: Image.Image,
     parsing_img: Image.Image
@@ -108,16 +109,12 @@ def apply_parsing_white_mask_to_person_cv2(
     """
     person_pil(RGB) 크기에 parsing_img(L) 마스크를 맞춰서
     흰색(255) 영역만 person을 남기고 나머지는 흰색 배경으로 만드는 함수.
-    - parsing_img는 person 크기에 반드시 맞춰야 함 (NEAREST)
     """
     person_rgb = np.array(person_pil.convert("RGB"), dtype=np.uint8)
-    # parsing 마스크 (L)
     mask = np.array(parsing_img.convert("L"), dtype=np.uint8)
-    # ✅ 핵심: 크기 불일치 해결 (H,W) 맞춤
-    if mask.shape[0] != person_rgb.shape[0] or mask.shape[1] != person_rgb.shape[1]:
         mask = cv2.resize(mask, (person_rgb.shape[1], person_rgb.shape[0]), interpolation=cv2.INTER_NEAREST)
     white_mask = (mask == 255)
@@ -129,7 +126,6 @@ def apply_parsing_white_mask_to_person_cv2(
     return result_bgr
 def compute_hw_from_person(person_path: str):
     img = _imread_or_raise(person_path)
     orig_h, orig_w = img.shape[:2]
@@ -141,11 +137,10 @@ def compute_hw_from_person(person_path: str):
     return new_h, new_w
-def invert_sketch_area(sketch_pil: Image.Image) -> Image.Image:
-    return ImageOps.invert(sketch_pil.convert("L")).convert("RGB")
 def fill_sketch_from_image_path_to_pil(image_path: str) -> Image.Image:
     global H, W
     if H is None or W is None:
         raise RuntimeError("Global H/W not set.")
@@ -186,9 +181,7 @@ def preprocess_mask(mask_img: Image.Image) -> Image.Image:
         left_padding = total_padding // 2
         right_padding = total_padding - left_padding
         m = cv2.copyMakeBorder(
-            m,
-            top=0, bottom=0,
-            left=left_padding, right=right_padding,
             borderType=cv2.BORDER_CONSTANT,
             value=0,
         )
@@ -204,7 +197,11 @@ def preprocess_mask(mask_img: Image.Image) -> Image.Image:
     return Image.fromarray(m, mode="L").convert("RGB")
 def make_depth(depth_path: str) -> Image.Image:
     global H, W
     if H is None or W is None:
         raise RuntimeError("Global H/W not set. Call run_one() first.")
@@ -242,6 +239,70 @@ def make_depth(depth_path: str) -> Image.Image:
     return image_depth
 def center_crop_lr_to_768x1024(arr: np.ndarray) -> np.ndarray:
@@ -269,7 +330,7 @@ def save_cropped(imgs, out_path: str):
 @lru_cache(maxsize=1)
 def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, str, torch.dtype]:
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    dtype = torch.float32  # 현재 너 설정 유지
     print(f"[PIPE] device={device}, dtype={dtype}", flush=True)
@@ -332,6 +393,7 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     H, W = compute_hw_from_person(paths.person_path)
     res = run_simple_extractor(
         category="Upper-clothes",
         input_path=os.path.abspath(paths.person_path),
@@ -341,10 +403,27 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     if parsing_img is None:
         raise RuntimeError("run_simple_extractor returned no parsing images.")
-    sketch_area = fill_sketch_from_image_path_to_pil(paths.depth_path)
     merged_img = merge_white_regions_or(parsing_img, sketch_area)
     mask_pil = preprocess_mask(merged_img)
     person_bgr = _imread_or_raise(paths.person_path)
     person_bgr = cv2.resize(person_bgr, (W, H), interpolation=cv2.INTER_AREA)
@@ -367,25 +446,23 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     person_rgb = cv2.cvtColor(padded_person, cv2.COLOR_BGR2RGB)
     person_pil = Image.fromarray(person_rgb)
-    depth_map = make_depth(paths.depth_path)
-    personn = Image.open(paths.person_path)
-    garment_ = apply_parsing_white_mask_to_person_cv2(
-        personn,
-        parsing_img
-    )
     garment_rgb = cv2.cvtColor(garment_, cv2.COLOR_BGR2RGB)
-    # ✅ (중요) garment_는 원본 person 크기일 수 있으니 전역 (W,H)로 맞춘 뒤 padding
     garment_rgb = cv2.resize(garment_rgb, (W, H), interpolation=cv2.INTER_AREA)
-    target_width = 1024  # ✅ 고정
-    padding = (target_width - person_bgr.shape[1]) // 2
     garment_rgb = cv2.copyMakeBorder(
         garment_rgb,
         top=0, bottom=0,
@@ -395,9 +472,11 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
     )
     garment_pil = Image.fromarray(garment_rgb)
     gm = np.array(parsing_img.convert("L"), dtype=np.uint8)
-    gm = cv2.resize(gm, (W, H), interpolation=cv2.INTER_AREA)
     gm = cv2.cvtColor(gm, cv2.COLOR_GRAY2RGB)
     cur_w2 = gm.shape[1]
     if cur_w2 < target_width:
         total = target_width - cur_w2
@@ -409,7 +488,6 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS):
         gm = gm[:, left2:left2 + target_width]
     garment_mask_pil = Image.fromarray(gm)
-    # --- sanity sizes (optional)
     print(
         "[SIZE] person:", person_pil.size,
         "mask:", mask_pil.size,
@@ -474,8 +552,10 @@ def set_seed(seed: int):
 def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed):
     print("[UI] infer_web called", flush=True)
-    if person_fp is None or sketch_fp is None or style_fp is None:
-        raise gr.Error("person / sketch / style 이미지를 모두 업로드해야 합니다.")
     set_seed(int(seed) if seed is not None else -1)
@@ -484,7 +564,7 @@ def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed):
     paths = Paths(
         person_path=person_fp,
-        depth_path=sketch_fp,
         style_path=style_fp,
         output_path=out_path,
     )
@@ -498,12 +578,12 @@ def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed):
 with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
-    gr.Markdown("## VISTA Demo\nperson / sketch(guide) / style 입력으로 결과를 생성합니다.")
     with gr.Row():
-        person_in = gr.Image(label="Person Image", type="filepath")
-        sketch_in = gr.Image(label="Sketch / Guide (depth_path)", type="filepath")
-        style_in = gr.Image(label="Style Image", type="filepath")
     with gr.Row():
         prompt_in = gr.Textbox(label="Prompt", value="upper garment", lines=2)
@@ -534,4 +614,3 @@ with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
 demo.queue()
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 @dataclass
 class Paths:
     person_path: str
+    depth_path: Optional[str]  # ✅ (1) sketch(guide) optional
     style_path: str
     output_path: str
         raise FileNotFoundError(f"cv2.imread failed: {path} (exists={os.path.exists(path)})")
     return img
 def apply_parsing_white_mask_to_person_cv2(
     person_pil: Image.Image,
     parsing_img: Image.Image
     """
     person_pil(RGB) 크기에 parsing_img(L) 마스크를 맞춰서
     흰색(255) 영역만 person을 남기고 나머지는 흰색 배경으로 만드는 함수.
     """
     person_rgb = np.array(person_pil.convert("RGB"), dtype=np.uint8)
     mask = np.array(parsing_img.convert("L"), dtype=np.uint8)
+    if mask.shape[:2] != person_rgb.shape[:2]:
         mask = cv2.resize(mask, (person_rgb.shape[1], person_rgb.shape[0]), interpolation=cv2.INTER_NEAREST)
     white_mask = (mask == 255)
     return result_bgr
 def compute_hw_from_person(person_path: str):
     img = _imread_or_raise(person_path)
     orig_h, orig_w = img.shape[:2]
     return new_h, new_w
 def fill_sketch_from_image_path_to_pil(image_path: str) -> Image.Image:
+    """
+    sketch(guide) 업로드 이미지를 filled mask(RGB)로 만드는 함수
+    """
     global H, W
     if H is None or W is None:
         raise RuntimeError("Global H/W not set.")
         left_padding = total_padding // 2
         right_padding = total_padding - left_padding
         m = cv2.copyMakeBorder(
+            m, 0, 0, left_padding, right_padding,
             borderType=cv2.BORDER_CONSTANT,
             value=0,
         )
     return Image.fromarray(m, mode="L").convert("RGB")
 def make_depth(depth_path: str) -> Image.Image:
+    """
+    depth_path(guide/sketch)로부터 depth_map 생성 (기존 로직)
+    """
     global H, W
     if H is None or W is None:
         raise RuntimeError("Global H/W not set. Call run_one() first.")
     return image_depth
+def _edges_from_parsing(parsing_img: Image.Image) -> np.ndarray:
+    """
+    ✅ parsing_pil에서 edge 추출해서 depth_img(0~255, uint8)로 만듦.
+    - parsing_img가 마스크(흰색=255)라고 가정하고 edge를 뽑음
+    - edge는 선(흰색)으로 나오게 만들어서 아래 make_depth와 동일 파이프라인에 넣을 수 있게 함
+    """
+    m = np.array(parsing_img.convert("L"), dtype=np.uint8)
+    # parsing이 이미 흰색 마스크라고 해도, 혹시 값이 애매하면 이진화
+    _, m_bin = cv2.threshold(m, 127, 255, cv2.THRESH_BINARY)
+    # edge 추출: Canny (마스크 기반이라 threshold 낮게)
+    edges = cv2.Canny(m_bin, 50, 150)
+    # edges는 0/255 (흰 선) 형태
+    # 너무 얇으면 contour fill이 불안정할 수 있어서 조금 두껍게
+    edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
+    return edges.astype(np.uint8)
+def make_depth_from_parsing_edges(parsing_img: Image.Image) -> Image.Image:
+    """
+    ✅ (4) depth_map 없을 때:
+    - parsing_pil에서 edge 추출
+    - 아래 제공한 코드(=make_depth 내부 파이프라인) 그대로 적용
+    """
+    global H, W
+    if H is None or W is None:
+        raise RuntimeError("Global H/W not set. Call run_one() first.")
+    depth_img = _edges_from_parsing(parsing_img)  # ✅ parsing에서 edge 추출 결과
+    # ---------- 사용자 제공 코드 그대로 ----------
+    inverted_depth = cv2.bitwise_not(depth_img)
+    contours, _ = cv2.findContours(inverted_depth, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    filled_depth = inverted_depth.copy()
+    cv2.drawContours(filled_depth, contours, -1, (255), thickness=cv2.FILLED)
+    # ✅ resize는 전역 (W,H)
+    filled_depth = cv2.resize(filled_depth, (W, H), interpolation=cv2.INTER_AREA)
+    height, width = filled_depth.shape
+    total_padding = 1024 - width
+    left_padding = total_padding // 2
+    right_padding = total_padding - left_padding
+    padded_depth = cv2.copyMakeBorder(
+        filled_depth, 0, 0, left_padding, right_padding,
+        borderType=cv2.BORDER_CONSTANT,
+        value=0,
+    )
+    inverted_image = ImageOps.invert(Image.fromarray(padded_depth))
+    with torch.inference_mode():
+        image_depth = depth_estimator(inverted_image)["depth"]
+    if DEBUG_SAVE:
+        image_depth.save("depth.png")
+    return image_depth
+    # --------------------------------------------
 def center_crop_lr_to_768x1024(arr: np.ndarray) -> np.ndarray:
 @lru_cache(maxsize=1)
 def get_pipe_and_device() -> Tuple[StableDiffusionXLControlNetImg2ImgPipeline, str, torch.dtype]:
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    dtype = torch.float32  # 유지
     print(f"[PIPE] device={device}, dtype={dtype}", flush=True)
     H, W = compute_hw_from_person(paths.person_path)
+    # parsing 추출 (화이트=255 마스크 형태로 오는 걸 가정)
     res = run_simple_extractor(
         category="Upper-clothes",
         input_path=os.path.abspath(paths.person_path),
     if parsing_img is None:
         raise RuntimeError("run_simple_extractor returned no parsing images.")
+    # -------------------------------------------------
+    # ✅ (2) UI sketch 업로드는 optional
+    # ✅ (3) depth_path 없으면 sketch_area = parsing_img
+    # -------------------------------------------------
+    use_depth_path = (
+        paths.depth_path is not None
+        and isinstance(paths.depth_path, str)
+        and len(paths.depth_path) > 0
+        and os.path.exists(paths.depth_path)
+    )
+    if use_depth_path:
+        sketch_area = fill_sketch_from_image_path_to_pil(paths.depth_path)
+    else:
+        sketch_area = parsing_img.convert("RGB")  # ✅ depth_path 없으면 sketch_area = parsing_img
+    # mask 생성
     merged_img = merge_white_regions_or(parsing_img, sketch_area)
     mask_pil = preprocess_mask(merged_img)
+    # person padding to 1024 width
     person_bgr = _imread_or_raise(paths.person_path)
     person_bgr = cv2.resize(person_bgr, (W, H), interpolation=cv2.INTER_AREA)
     person_rgb = cv2.cvtColor(padded_person, cv2.COLOR_BGR2RGB)
     person_pil = Image.fromarray(person_rgb)
+    # -------------------------------------------------
+    # ✅ (4) depth_map:
+    # - depth_path 있으면 make_depth(paths.depth_path)
+    # - 없으면 parsing_pil edge 추출 -> 제공 코드 적용
+    # -------------------------------------------------
+    if use_depth_path:
+        depth_map = make_depth(paths.depth_path)
+    else:
+        depth_map = make_depth_from_parsing_edges(parsing_img)
+    # garment 추출 (parsing white mask 기반)
+    personn = Image.open(paths.person_path).convert("RGB")
+    garment_ = apply_parsing_white_mask_to_person_cv2(personn, parsing_img)
     garment_rgb = cv2.cvtColor(garment_, cv2.COLOR_BGR2RGB)
     garment_rgb = cv2.resize(garment_rgb, (W, H), interpolation=cv2.INTER_AREA)
+    padding = (target_width - W) // 2 if W < target_width else 0
     garment_rgb = cv2.copyMakeBorder(
         garment_rgb,
         top=0, bottom=0,
     )
     garment_pil = Image.fromarray(garment_rgb)
+    # garment mask (parsing 자체를 동일 크기로)
     gm = np.array(parsing_img.convert("L"), dtype=np.uint8)
+    gm = cv2.resize(gm, (W, H), interpolation=cv2.INTER_NEAREST)
     gm = cv2.cvtColor(gm, cv2.COLOR_GRAY2RGB)
     cur_w2 = gm.shape[1]
     if cur_w2 < target_width:
         total = target_width - cur_w2
         gm = gm[:, left2:left2 + target_width]
     garment_mask_pil = Image.fromarray(gm)
     print(
         "[SIZE] person:", person_pil.size,
         "mask:", mask_pil.size,
 def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed):
     print("[UI] infer_web called", flush=True)
+    # ✅ person / style만 필수. sketch는 선택.
+    if person_fp is None or style_fp is None:
+        raise gr.Error("person / style 이미지는 필수입니다. (sketch는 선택)")
     set_seed(int(seed) if seed is not None else -1)
     paths = Paths(
         person_path=person_fp,
+        depth_path=sketch_fp,  # None 가능
         style_path=style_fp,
         output_path=out_path,
     )
 with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
+    gr.Markdown("## VISTA Demo\nperson / style 필수, sketch(guide)는 선택입니다.")
     with gr.Row():
+        person_in = gr.Image(label="Person Image (required)", type="filepath")
+        sketch_in = gr.Image(label="Sketch / Guide (optional)", type="filepath")  # ✅ (2) 선택
+        style_in = gr.Image(label="Style Image (required)", type="filepath")
     with gr.Row():
         prompt_in = gr.Textbox(label="Prompt", value="upper garment", lines=2)
 demo.queue()
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)