Spaces:

soye
/

VISTA

Sleeping

App Files Files Community

ssoxye commited on 18 days ago

Commit

2cc3e3c

1 Parent(s): d19c49c

update bitwise

Browse files

Files changed (1) hide show

app.py +63 -33

app.py CHANGED Viewed

@@ -114,8 +114,8 @@ W: Optional[int] = None
 @dataclass
 class Paths:
     person_path: str
-    depth_path: Optional[str]  # sketch(guide) optional
-    style_path: str
     output_path: str
@@ -184,7 +184,7 @@ def remove_small_white_components(
     parsing_img: Image.Image,
     *,
     white_threshold: int = 128,
-    min_white_area: int = 50,
     use_open: bool = False,
     open_ksize: int = 3,
     morph_iters: int = 1,
@@ -220,7 +220,6 @@ def remove_small_white_components(
     return Image.fromarray(mask, mode="L")
 def compute_hw_from_person(person_path: str):
     img = _imread_or_raise(person_path)
     orig_h, orig_w = img.shape[:2]
@@ -237,6 +236,7 @@ def fill_sketch_from_image_path_to_pil(image_path: str) -> Image.Image:
     if H is None or W is None:
         raise RuntimeError("Global H/W not set.")
     img = _imread_or_raise(image_path, cv2.IMREAD_GRAYSCALE)
     img = cv2.resize(img, (W, H), interpolation=cv2.INTER_NEAREST)
     _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
     contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
@@ -283,10 +283,10 @@ def make_depth(depth_path: str) -> Image.Image:
         raise RuntimeError("Global H/W not set. Call run_one() first.")
     depth_img = _imread_or_raise(depth_path, 0)
-    inverted_depth = cv2.bitwise_not(depth_img)
-    contours, _ = cv2.findContours(inverted_depth, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    filled_depth = inverted_depth.copy()
     cv2.drawContours(filled_depth, contours, -1, (255), thickness=cv2.FILLED)
     filled_depth = cv2.resize(filled_depth, (W, H), interpolation=cv2.INTER_AREA)
@@ -421,6 +421,29 @@ _UI_TO_EXTRACTOR_CATEGORY = {
 }
 def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str = "Dress"):
     global H, W
     pipe, device, _dtype = get_pipe_and_device()
@@ -438,9 +461,7 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
     parsing_img = res["images"][0] if res.get("images") else None
     if parsing_img is None:
         raise RuntimeError("run_simple_extractor returned no parsing images.")
     parsing_img = remove_small_white_components(
         parsing_img,
         white_threshold=128,
@@ -448,13 +469,7 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
         use_open=False,
     )
-    use_depth_path = (
-        paths.depth_path is not None
-        and isinstance(paths.depth_path, str)
-        and len(paths.depth_path) > 0
-        and os.path.exists(paths.depth_path)
-    )
     if use_depth_path:
         sketch_area = fill_sketch_from_image_path_to_pil(paths.depth_path)
@@ -482,7 +497,6 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
     garment_bgr = apply_parsing_white_mask_to_person_cv2(personn, parsing_img)
     garment_rgb = cv2.cvtColor(garment_bgr, cv2.COLOR_BGR2RGB)
     garment_rgb = cv2.resize(garment_rgb, (W, H), interpolation=cv2.INTER_AREA)
     garment_rgb = _pad_or_crop_to_width_np(garment_rgb, 1024, pad_value=[255, 255, 255])
     garment_pil = Image.fromarray(garment_rgb)
@@ -493,6 +507,11 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
     gm = _pad_or_crop_to_width_np(gm, 1024, pad_value=[0, 0, 0])
     garment_mask_pil = Image.fromarray(gm)
     print(
         "[SIZE] person:", person_pil.size,
         "mask:", mask_pil.size,
@@ -501,6 +520,10 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
         "gmask:", garment_mask_pil.size,
         "ui_category:", category,
         "extractor_category:", extractor_category,
         flush=True
     )
@@ -511,8 +534,8 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
         device,
         mask_pil,
         person_pil,
-        content_scale=0.3,
-        style_scale=0.5,
         garment_images=garment_pil,
         garment_mask=garment_mask_pil,
     )
@@ -525,13 +548,19 @@ def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str
         except Exception:
             pass
-    style_img = Image.open(paths.style_path).convert("RGB")
-    if prompt != "":
-        prompt = extractor_category + " with " + prompt
     else:
         prompt = extractor_category
     print("==== prompt? ", prompt, flush=True)
     with torch.inference_mode():
@@ -566,8 +595,9 @@ def set_seed(seed: int):
 def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed, category):
     print("[UI] infer_web called", flush=True)
-    if person_fp is None or style_fp is None:
-        raise gr.Error("person / style 이미지는 필수입니다. (sketch는 선택)")
     if category not in ("Upper-body", "Lower-body", "Dress"):
         raise gr.Error(f"Invalid category: {category}")
@@ -580,7 +610,7 @@ def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed, category):
     paths = Paths(
         person_path=person_fp,
         depth_path=sketch_fp,
-        style_path=style_fp,
         output_path=out_path,
     )
@@ -593,7 +623,7 @@ def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed, category):
 with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
-    gr.Markdown("## VISTA Demo\nperson / style 필수, sketch(guide)는 선택입니다.")
     category_toggle = gr.Radio(
         choices=["Dress", "Upper-body", "Lower-body"],
@@ -617,18 +647,18 @@ with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
                 gr.Markdown("#### Examples")
                 gr.Examples(
                     examples=person_examples,
-                    inputs=[person_in],          # ✅ person만 채움 (독립 선택)
                     examples_per_page=8,
                 )
         # -------- Style column --------
         with gr.Column(scale=1):
-            style_in = gr.Image(label="Style Image (required)", type="filepath")
             if style_examples:
                 gr.Markdown("#### Examples")
                 gr.Examples(
                     examples=style_examples,
-                    inputs=[style_in],           # ✅ style만 채움 (독립 선택)
                     examples_per_page=8,
                 )
@@ -642,7 +672,7 @@ with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
             gr.Markdown("#### Examples")
             gr.Examples(
                 examples=sketch_examples,
-                inputs=[sketch_in],             # ✅ sketch만 채움 (독립 선택)
                 examples_per_page=8,
             )
@@ -650,7 +680,7 @@ with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
         prompt_in = gr.Textbox(
             label="Prompt",
             value="",
-            placeholder="ex) lace, button, …",
             lines=2,
         )
         steps_in = gr.Slider(1, 80, value=DEFAULT_STEPS, step=1, label="Steps")

 @dataclass
 class Paths:
     person_path: str
+    depth_path: Optional[str]          # sketch(guide) optional
+    style_path: Optional[str]          # ✅ style optional (변경)
     output_path: str
     parsing_img: Image.Image,
     *,
     white_threshold: int = 128,
+    min_white_area: int = 150,
     use_open: bool = False,
     open_ksize: int = 3,
     morph_iters: int = 1,
     return Image.fromarray(mask, mode="L")
 def compute_hw_from_person(person_path: str):
     img = _imread_or_raise(person_path)
     orig_h, orig_w = img.shape[:2]
     if H is None or W is None:
         raise RuntimeError("Global H/W not set.")
     img = _imread_or_raise(image_path, cv2.IMREAD_GRAYSCALE)
+    img = cv2.bitwise_not(img)
     img = cv2.resize(img, (W, H), interpolation=cv2.INTER_NEAREST)
     _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
     contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         raise RuntimeError("Global H/W not set. Call run_one() first.")
     depth_img = _imread_or_raise(depth_path, 0)
+#     inverted_depth = cv2.bitwise_not(depth_img)
+    contours, _ = cv2.findContours(depth_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    filled_depth = depth_img.copy()
     cv2.drawContours(filled_depth, contours, -1, (255), thickness=cv2.FILLED)
     filled_depth = cv2.resize(filled_depth, (W, H), interpolation=cv2.INTER_AREA)
 }
+def _has_valid_file(path: Optional[str]) -> bool:
+    return (
+        path is not None
+        and isinstance(path, str)
+        and len(path) > 0
+        and os.path.exists(path)
+    )
+def _resolve_content_style_scales(style_present: bool, prompt_present: bool) -> Tuple[float, float]:
+    """
+    요구사항:
+    - style image 없으면: (0.0, 0.0)
+    - prompt 없으면: (0.4, 0.6)
+    - 둘 다 있으면: 기존 유지 (0.3, 0.5)
+    """
+    if not style_present:
+        return 0.0, 0.0
+    if not prompt_present:
+        return 0.4, 0.65
+    return 0.4, 0.5
 def run_one(paths: Paths, prompt: str, steps: int = DEFAULT_STEPS, category: str = "Dress"):
     global H, W
     pipe, device, _dtype = get_pipe_and_device()
     parsing_img = res["images"][0] if res.get("images") else None
     if parsing_img is None:
         raise RuntimeError("run_simple_extractor returned no parsing images.")
     parsing_img = remove_small_white_components(
         parsing_img,
         white_threshold=128,
         use_open=False,
     )
+    use_depth_path = _has_valid_file(paths.depth_path)
     if use_depth_path:
         sketch_area = fill_sketch_from_image_path_to_pil(paths.depth_path)
     garment_bgr = apply_parsing_white_mask_to_person_cv2(personn, parsing_img)
     garment_rgb = cv2.cvtColor(garment_bgr, cv2.COLOR_BGR2RGB)
     garment_rgb = cv2.resize(garment_rgb, (W, H), interpolation=cv2.INTER_AREA)
     garment_rgb = _pad_or_crop_to_width_np(garment_rgb, 1024, pad_value=[255, 255, 255])
     garment_pil = Image.fromarray(garment_rgb)
     gm = _pad_or_crop_to_width_np(gm, 1024, pad_value=[0, 0, 0])
     garment_mask_pil = Image.fromarray(gm)
+    # ✅ 조건에 따른 scale 결정
+    style_present = _has_valid_file(paths.style_path)
+    prompt_present = (prompt is not None) and (str(prompt).strip() != "")
+    content_scale, style_scale = _resolve_content_style_scales(style_present, prompt_present)
     print(
         "[SIZE] person:", person_pil.size,
         "mask:", mask_pil.size,
         "gmask:", garment_mask_pil.size,
         "ui_category:", category,
         "extractor_category:", extractor_category,
+        "style_present:", style_present,
+        "prompt_present:", prompt_present,
+        "content_scale:", content_scale,
+        "style_scale:", style_scale,
         flush=True
     )
         device,
         mask_pil,
         person_pil,
+        content_scale=content_scale,   # ✅ 변경
+        style_scale=style_scale,       # ✅ 변경
         garment_images=garment_pil,
         garment_mask=garment_mask_pil,
     )
         except Exception:
             pass
+    # ✅ style image 없을 때도 generate 입력이 None이 되지 않게 대체
+    if style_present:
+        style_img = Image.open(paths.style_path).convert("RGB")
+    else:
+        # scale이 0이므로 영향은 없고, 함수 시그니처만 만족시키기 위한 대체값
+        style_img = garment_pil
+    # prompt 구성은 기존 유지
+    if prompt is not None and str(prompt).strip() != "":
+        prompt = extractor_category + " with " + str(prompt).strip()
     else:
         prompt = extractor_category
     print("==== prompt? ", prompt, flush=True)
     with torch.inference_mode():
 def infer_web(person_fp, sketch_fp, style_fp, prompt, steps, seed, category):
     print("[UI] infer_web called", flush=True)
+    # ✅ person만 필수, style은 선택
+    if person_fp is None:
+        raise gr.Error("person 이미지는 필수입니다. (style/sketch는 선택)")
     if category not in ("Upper-body", "Lower-body", "Dress"):
         raise gr.Error(f"Invalid category: {category}")
     paths = Paths(
         person_path=person_fp,
         depth_path=sketch_fp,
+        style_path=style_fp,   # ✅ None 가능
         output_path=out_path,
     )
 with gr.Blocks(title="VISTA Demo (HF Spaces)") as demo:
+    gr.Markdown("## VISTA Demo\nperson 필수, style/sketch(guide)는 선택입니다.")
     category_toggle = gr.Radio(
         choices=["Dress", "Upper-body", "Lower-body"],
                 gr.Markdown("#### Examples")
                 gr.Examples(
                     examples=person_examples,
+                    inputs=[person_in],
                     examples_per_page=8,
                 )
         # -------- Style column --------
         with gr.Column(scale=1):
+            style_in = gr.Image(label="Style Image (optional)", type="filepath")
             if style_examples:
                 gr.Markdown("#### Examples")
                 gr.Examples(
                     examples=style_examples,
+                    inputs=[style_in],
                     examples_per_page=8,
                 )
             gr.Markdown("#### Examples")
             gr.Examples(
                 examples=sketch_examples,
+                inputs=[sketch_in],
                 examples_per_page=8,
             )
         prompt_in = gr.Textbox(
             label="Prompt",
             value="",
+            placeholder="ex) crystal, lace, button, …",
             lines=2,
         )
         steps_in = gr.Slider(1, 80, value=DEFAULT_STEPS, step=1, label="Steps")