IDM-VTON-RV-local

Paused

App Files Files Community

ArmanRV commited on 27 days ago

Commit

978ca4f

verified ·

1 Parent(s): 0d734ed

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -212

app.py CHANGED Viewed

@@ -9,12 +9,14 @@ from PIL import Image
 # =========================
 # FIX: gradio 4.24 / gradio_client crashes on boolean JSON Schemas in /api_info
 # =========================
 def _patch_gradio_client_bool_schema():
     try:
         import gradio_client.utils as gcu
         patched_any = False
         if hasattr(gcu, "get_type"):
             _orig_get_type = gcu.get_type
@@ -26,6 +28,7 @@ def _patch_gradio_client_bool_schema():
             gcu.get_type = _get_type_patched
             patched_any = True
         if hasattr(gcu, "get_desc"):
             _orig_get_desc = gcu.get_desc
@@ -37,10 +40,12 @@ def _patch_gradio_client_bool_schema():
             gcu.get_desc = _get_desc_patched
             patched_any = True
         if hasattr(gcu, "_json_schema_to_python_type"):
             _orig_json2py = gcu._json_schema_to_python_type
             def _json_schema_to_python_type_patched(schema, defs=None):
                 if isinstance(schema, bool):
                     return "any"
                 return _orig_json2py(schema, defs)
@@ -58,6 +63,7 @@ def _patch_gradio_client_bool_schema():
 _patch_gradio_client_bool_schema()
 import torch
 import numpy as np
 from torchvision import transforms
@@ -97,11 +103,14 @@ APP_AUTH = (DEMO_USER, DEMO_PASS) if (DEMO_USER and DEMO_PASS) else None
 # =========================
 GARMENT_DIR = "garments"
 ALLOWED_EXTS = (".png", ".jpg", ".jpeg", ".webp")
-GARMENTS_DATASET = os.getenv("GARMENTS_DATASET", "").strip()
 HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
 def ensure_garments_downloaded() -> None:
     os.makedirs(GARMENT_DIR, exist_ok=True)
     if HF_TOKEN:
@@ -129,6 +138,9 @@ def ensure_garments_downloaded() -> None:
 def list_garments() -> List[str]:
     files: List[str] = []
     if not os.path.isdir(GARMENT_DIR):
         return files
@@ -162,7 +174,7 @@ def build_gallery_items(files: List[str]):
 # =========================
-# Helpers
 # =========================
 def clamp_int(x, lo, hi):
     try:
@@ -183,116 +195,8 @@ def allow_call(min_interval_sec: float = 2.5) -> Tuple[bool, str]:
     return True, ""
-def _quality_metrics(img: Image.Image) -> Tuple[int, int, float, float]:
-    """(w, h, brightness, sharpness)"""
-    img = img.convert("RGB")
-    w, h = img.size
-    gray = np.array(img.convert("L"))
-    brightness = float(gray.mean())
-    gy, gx = np.gradient(gray.astype(np.float32))
-    sharpness = float((gx * gx + gy * gy).mean())
-    return w, h, brightness, sharpness
-# =========================
-# Person photo evaluation (UX gate)
-# - главное: если НЕ похоже на фото человека -> предупреждение и блокируем try-on
-# - предупреждения по качеству показываем только при явной проблеме
-# =========================
-def _count_openpose_keypoints(keypoints) -> int:
-    """
-    Пытаемся универсально посчитать найденные ключевые точки (score > 0.2)
-    под разные форматы, которые могут возвращать разные реализации OpenPose.
-    """
-    try:
-        if isinstance(keypoints, dict):
-            cand = keypoints.get("candidate", None)
-            if cand is None:
-                # иногда внутри другой ключ
-                cand = keypoints.get("candidates", None)
-            if cand is not None:
-                cand = np.array(cand)
-                if cand.ndim >= 2 and cand.shape[-1] >= 3:
-                    return int((cand[:, 2] > 0.2).sum())
-            # иногда subset/candidate в другом виде — если не распознали, возвращаем 0
-            return 0
-        arr = np.array(keypoints)
-        if arr.ndim >= 2 and arr.shape[-1] >= 3:
-            return int((arr[..., 2] > 0.2).sum())
-    except Exception:
-        return 0
-    return 0
-def _detect_person_openpose_or_parsing(img: Image.Image) -> bool:
-    """
-    True если похоже на человека:
-    - OpenPose нашёл достаточно keypoints, ИЛИ
-    - Human Parsing дал заметную область "не фон"
-    """
-    try:
-        # EXIF-поворот (часто ломает детект на телефонных фотках)
-        try:
-            img = _apply_exif_orientation(img)
-        except Exception:
-            pass
-        small = img.convert("RGB").resize((384, 512))
-        # 1) OpenPose
-        keypoints = openpose_model(small)
-        kpt_count = _count_openpose_keypoints(keypoints)
-        if kpt_count >= 6:
-            return True
-        # 2) Parsing
-        model_parse, _ = parsing_model(small)
-        mp = np.array(model_parse) if not isinstance(model_parse, np.ndarray) else model_parse
-        # доля пикселей не фона
-        non_bg = float((mp > 0).mean())
-        if non_bg >= 0.03:
-            return True
-        return False
-    except Exception:
-        return False
-def evaluate_person_photo(img: Optional[Image.Image]) -> Tuple[bool, str]:
-    """
-    UX-логика:
-    1) Если НЕ похоже на фото человека (нет keypoints и нет parsing-области) -> ⚠️ и просим другое фото
-    2) Если похоже -> ✅ Фото подходит
-       ЛИБО ⚠️ (только при явной проблеме качества)
-    """
-    if img is None:
-        return False, ""
-    is_person = _detect_person_openpose_or_parsing(img)
-    if not is_person:
-        return False, "⚠️ Не похоже на фото человека. Загрузите фото человека (по пояс или в полный рост)."
-    w, h, brightness, sharpness = _quality_metrics(img)
-    issues = []
-    # только явные проблемы
-    if min(w, h) < 520:
-        issues.append("низкое разрешение")
-    if brightness < 50:
-        issues.append("слишком темно")
-    if sharpness < 8:
-        issues.append("сильно размыто")
-    if issues:
-        return True, "⚠️ Фото может плохо подойти для примерки (" + ", ".join(issues) + "). Лучше загрузить другое."
-    return True, "✅ Фото подходит для примерки."
 # =========================
-# Model init
 # =========================
 base_path = "yisol/IDM-VTON"
@@ -300,8 +204,11 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
 print("DEVICE:", DEVICE, "DTYPE:", DTYPE, flush=True)
-tensor_transfrom = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
 unet = UNet2DConditionModel.from_pretrained(base_path, subfolder="unet", torch_dtype=DTYPE)
 unet.requires_grad_(False)
@@ -319,6 +226,7 @@ vae = AutoencoderKL.from_pretrained(base_path, subfolder="vae", torch_dtype=DTYP
 UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(base_path, subfolder="unet_encoder", torch_dtype=DTYPE)
 UNet_Encoder.requires_grad_(False)
 parsing_model = Parsing(0)
 openpose_model = OpenPose(0)
@@ -341,6 +249,9 @@ pipe = TryonPipeline.from_pretrained(
 pipe.unet_encoder = UNet_Encoder
 @spaces.GPU
 def start_tryon(
     human_pil: Image.Image,
@@ -349,11 +260,12 @@ def start_tryon(
     crop_center: bool = True,
     denoise_steps: int = 25,
     seed: int = 42,
-    guidance_scale: float = 2.0,
 ) -> Image.Image:
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if device == "cuda" else torch.float32
     if device == "cuda":
         openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
@@ -362,6 +274,7 @@ def start_tryon(
     garm_img = garm_img.convert("RGB").resize((768, 1024))
     human_img_orig = human_pil.convert("RGB")
     if crop_center:
         width, height = human_img_orig.size
         target_width = int(min(width, height * (3 / 4)))
@@ -375,9 +288,8 @@ def start_tryon(
         human_img = cropped_img.resize((768, 1024))
     else:
         human_img = human_img_orig.resize((768, 1024))
-        crop_size = None
-        left = top = None
     if auto_mask:
         keypoints = openpose_model(human_img.resize((384, 512)))
         model_parse, _ = parsing_model(human_img.resize((384, 512)))
@@ -386,6 +298,7 @@ def start_tryon(
     else:
         mask = Image.new("L", (768, 1024), 0)
     human_img_arg = _apply_exif_orientation(human_img.resize((384, 512)))
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
@@ -403,6 +316,7 @@ def start_tryon(
     pose_img = pose_img[:, :, ::-1]
     pose_img = Image.fromarray(pose_img).resize((768, 1024))
     garment_des = "a garment"
     prompt_main = "model is wearing " + garment_des
     prompt_cloth = "a photo of " + garment_des
@@ -410,7 +324,6 @@ def start_tryon(
     denoise_steps = clamp_int(denoise_steps, 20, 40)
     seed = clamp_int(seed, 0, 999999)
-    guidance_scale = float(max(0.1, min(10.0, guidance_scale)))
     with torch.no_grad():
         if device == "cuda":
@@ -467,11 +380,11 @@ def start_tryon(
                 height=1024,
                 width=768,
                 ip_adapter_image=garm_img.resize((768, 1024)),
-                guidance_scale=guidance_scale,
             )[0]
     out_img = images[0]
-    if crop_center and crop_size is not None:
         out_img_rs = out_img.resize(crop_size)
         human_img_orig.paste(out_img_rs, (int(left), int(top)))
         return human_img_orig
@@ -479,7 +392,7 @@ def start_tryon(
 # =========================
-# UI
 # =========================
 CUSTOM_CSS = """
 footer {display:none !important;}
@@ -502,86 +415,26 @@ def on_gallery_select(files_list: List[str], evt: gr.SelectData):
     idx = max(0, min(idx, len(files_list) - 1))
     return files_list[idx], f"👕 Выбрано: {files_list[idx]}"
-def on_person_change(person_pil):
-    # Показываем либо:
-    # - ⚠️ (если не похоже на человека или явное плохое качество)
-    # - ✅ (если подходит)
-    # - "" (если нет фото)
-    _, msg = evaluate_person_photo(person_pil)
-    return msg or ""
-def tryon_ui_imageslider(person_pil, selected_filename):
-    yield (None, None), "⏳ Проверяем ввод..."
-    ok, msg = allow_call(2.5)
-    if not ok:
-        yield (None, None), msg
-        return
-    if person_pil is None:
-        yield (None, None), "❌ Загрузите фото человека"
-        return
-    is_person, verdict = evaluate_person_photo(person_pil)
-    if not is_person:
-        yield (None, None), verdict
-        return
-    if not selected_filename:
-        yield (None, None), "❌ Выберите одежду (клик по превью)"
-        return
-    garm = load_garment_pil(selected_filename)
-    if garm is None:
-        yield (None, None), "❌ Не удалось загрузить выбранную одежду"
-        return
-    yield (None, None), "🧠 Анализируем силуэт..."
-    time.sleep(0.05)
-    yield (None, None), "✨ Примеряем..."
-    try:
-        out = start_tryon(
-            human_pil=person_pil,
-            garm_img=garm,
-            auto_mask=True,
-            crop_center=True,
-            denoise_steps=25,
-            seed=42,
-            guidance_scale=2.0,
-        )
-        yield (person_pil, out), "✅ Готово"
-    except Exception as e:
-        yield (None, None), f"❌ Ошибка: {type(e).__name__}: {str(e)[:220]}"
-def tryon_ui_pair(person_pil, selected_filename):
-    yield None, None, "⏳ Проверяем ввод..."
     ok, msg = allow_call(2.5)
     if not ok:
-        yield None, None, msg
         return
     if person_pil is None:
-        yield None, None, "❌ Загрузите фото человека"
-        return
-    is_person, verdict = evaluate_person_photo(person_pil)
-    if not is_person:
-        yield None, None, verdict
         return
     if not selected_filename:
-        yield None, None, "❌ Выберите одежду (клик по превью)"
         return
     garm = load_garment_pil(selected_filename)
     if garm is None:
-        yield None, None, "❌ Не удалось загрузить выбранную одежду"
         return
-    yield None, None, "🧠 Анализируем силуэт..."
-    time.sleep(0.05)
-    yield None, None, "✨ Примеряем..."
     try:
         out = start_tryon(
             human_pil=person_pil,
@@ -590,11 +443,10 @@ def tryon_ui_pair(person_pil, selected_filename):
             crop_center=True,
             denoise_steps=25,
             seed=42,
-            guidance_scale=2.0,
         )
-        yield person_pil, out, "✅ Готово"
     except Exception as e:
-        yield None, None, f"❌ Ошибка: {type(e).__name__}: {str(e)[:220]}"
 # Preload garments
@@ -612,10 +464,6 @@ with gr.Blocks(title="Virtual Try-On Rendez-vous", css=CUSTOM_CSS) as demo:
         with gr.Column():
             person = gr.Image(label="Фото человека", type="pil", height=420)
-            # Оценка/предупреждение по фото (✅/⚠️/пусто)
-            warning = gr.Markdown("")
-            person.change(fn=on_person_change, inputs=[person], outputs=[warning])
             with gr.Row():
                 refresh_btn = gr.Button("🔄 Обновить каталог одежды", variant="secondary")
                 selected_label = gr.Markdown("👕 Выберите одежду ниже")
@@ -632,25 +480,7 @@ with gr.Blocks(title="Virtual Try-On Rendez-vous", css=CUSTOM_CSS) as demo:
             status = gr.Textbox(value="Ожидание...", interactive=False)
         with gr.Column():
-            gr.Markdown("### Результат (До / После)")
-            if hasattr(gr, "ImageSlider"):
-                compare = gr.ImageSlider(label="До / После")
-                run.click(
-                    fn=tryon_ui_imageslider,
-                    inputs=[person, selected_garment_state],
-                    outputs=[compare, status],
-                    concurrency_limit=1,
-                )
-            else:
-                with gr.Row():
-                    before_img = gr.Image(label="До", type="pil", height=360)
-                    after_img = gr.Image(label="После", type="pil", height=360)
-                run.click(
-                    fn=tryon_ui_pair,
-                    inputs=[person, selected_garment_state],
-                    outputs=[before_img, after_img, status],
-                    concurrency_limit=1,
-                )
     garment_gallery.select(
         fn=on_gallery_select,
@@ -664,6 +494,13 @@ with gr.Blocks(title="Virtual Try-On Rendez-vous", css=CUSTOM_CSS) as demo:
         outputs=[garment_gallery, garment_files_state, selected_garment_state, status],
     )
 demo.queue(max_size=20)
 if __name__ == "__main__":
@@ -674,5 +511,5 @@ if __name__ == "__main__":
         auth=APP_AUTH,
         max_threads=4,
         show_error=True,
-        show_api=False,
     )

 # =========================
 # FIX: gradio 4.24 / gradio_client crashes on boolean JSON Schemas in /api_info
+# - works across gradio_client versions (get_desc may not exist)
 # =========================
 def _patch_gradio_client_bool_schema():
     try:
         import gradio_client.utils as gcu
         patched_any = False
+        # 1) Patch get_type if exists
         if hasattr(gcu, "get_type"):
             _orig_get_type = gcu.get_type
             gcu.get_type = _get_type_patched
             patched_any = True
+        # 2) Patch get_desc if exists (some versions don't have it)
         if hasattr(gcu, "get_desc"):
             _orig_get_desc = gcu.get_desc
             gcu.get_desc = _get_desc_patched
             patched_any = True
+        # 3) Patch internal json-schema conversion (this is the key crash site)
         if hasattr(gcu, "_json_schema_to_python_type"):
             _orig_json2py = gcu._json_schema_to_python_type
             def _json_schema_to_python_type_patched(schema, defs=None):
+                # JSON Schema allows boolean schemas (True/False). Treat as "any".
                 if isinstance(schema, bool):
                     return "any"
                 return _orig_json2py(schema, defs)
 _patch_gradio_client_bool_schema()
 import torch
 import numpy as np
 from torchvision import transforms
 # =========================
 GARMENT_DIR = "garments"
 ALLOWED_EXTS = (".png", ".jpg", ".jpeg", ".webp")
+GARMENTS_DATASET = os.getenv("GARMENTS_DATASET", "").strip()  # e.g. "ArmanRV/armanrv-garments"
 HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
 def ensure_garments_downloaded() -> None:
+    """
+    Downloads garments from HF Dataset into ./garments to avoid Space repo 1GB limit.
+    """
     os.makedirs(GARMENT_DIR, exist_ok=True)
     if HF_TOKEN:
 def list_garments() -> List[str]:
+    """
+    Recursively list images inside ./garments (handles dataset subfolders).
+    """
     files: List[str] = []
     if not os.path.isdir(GARMENT_DIR):
         return files
 # =========================
+# Small helpers
 # =========================
 def clamp_int(x, lo, hi):
     try:
     return True, ""
 # =========================
+# Model init (local IDM-VTON)
 # =========================
 base_path = "yisol/IDM-VTON"
 DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
 print("DEVICE:", DEVICE, "DTYPE:", DTYPE, flush=True)
+tensor_transfrom = transforms.Compose(
+    [transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
+)
+# Components
 unet = UNet2DConditionModel.from_pretrained(base_path, subfolder="unet", torch_dtype=DTYPE)
 unet.requires_grad_(False)
 UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(base_path, subfolder="unet_encoder", torch_dtype=DTYPE)
 UNet_Encoder.requires_grad_(False)
+# Preprocessors
 parsing_model = Parsing(0)
 openpose_model = OpenPose(0)
 pipe.unet_encoder = UNet_Encoder
+# =========================
+# Inference (returns ONLY final image)
+# =========================
 @spaces.GPU
 def start_tryon(
     human_pil: Image.Image,
     crop_center: bool = True,
     denoise_steps: int = 25,
     seed: int = 42,
 ) -> Image.Image:
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if device == "cuda" else torch.float32
+    # Move models
     if device == "cuda":
         openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     garm_img = garm_img.convert("RGB").resize((768, 1024))
     human_img_orig = human_pil.convert("RGB")
+    # Crop
     if crop_center:
         width, height = human_img_orig.size
         target_width = int(min(width, height * (3 / 4)))
         human_img = cropped_img.resize((768, 1024))
     else:
         human_img = human_img_orig.resize((768, 1024))
+    # Mask
     if auto_mask:
         keypoints = openpose_model(human_img.resize((384, 512)))
         model_parse, _ = parsing_model(human_img.resize((384, 512)))
     else:
         mask = Image.new("L", (768, 1024), 0)
+    # DensePose
     human_img_arg = _apply_exif_orientation(human_img.resize((384, 512)))
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
     pose_img = pose_img[:, :, ::-1]
     pose_img = Image.fromarray(pose_img).resize((768, 1024))
+    # Fixed prompts
     garment_des = "a garment"
     prompt_main = "model is wearing " + garment_des
     prompt_cloth = "a photo of " + garment_des
     denoise_steps = clamp_int(denoise_steps, 20, 40)
     seed = clamp_int(seed, 0, 999999)
     with torch.no_grad():
         if device == "cuda":
                 height=1024,
                 width=768,
                 ip_adapter_image=garm_img.resize((768, 1024)),
+                guidance_scale=2.0,
             )[0]
     out_img = images[0]
+    if crop_center:
         out_img_rs = out_img.resize(crop_size)
         human_img_orig.paste(out_img_rs, (int(left), int(top)))
         return human_img_orig
 # =========================
+# UI (API-like)
 # =========================
 CUSTOM_CSS = """
 footer {display:none !important;}
     idx = max(0, min(idx, len(files_list) - 1))
     return files_list[idx], f"👕 Выбрано: {files_list[idx]}"
+def tryon_ui(person_pil, selected_filename):
+    yield None, "⏳ Обработка... (первый запуск может быть дольше)"
     ok, msg = allow_call(2.5)
     if not ok:
+        yield None, msg
         return
     if person_pil is None:
+        yield None, "❌ Загрузите фото человека"
         return
     if not selected_filename:
+        yield None, "❌ Выберите одежду (клик по превью)"
         return
     garm = load_garment_pil(selected_filename)
     if garm is None:
+        yield None, "❌ Не удалось загрузить выбранную одежду"
         return
     try:
         out = start_tryon(
             human_pil=person_pil,
             crop_center=True,
             denoise_steps=25,
             seed=42,
         )
+        yield out, "✅ Готово"
     except Exception as e:
+        yield None, f"❌ Ошибка: {type(e).__name__}: {str(e)[:220]}"
 # Preload garments
         with gr.Column():
             person = gr.Image(label="Фото человека", type="pil", height=420)
             with gr.Row():
                 refresh_btn = gr.Button("🔄 Обновить каталог одежды", variant="secondary")
                 selected_label = gr.Markdown("👕 Выберите одежду ниже")
             status = gr.Textbox(value="Ожидание...", interactive=False)
         with gr.Column():
+            out = gr.Image(label="Результат", type="pil", height=760)
     garment_gallery.select(
         fn=on_gallery_select,
         outputs=[garment_gallery, garment_files_state, selected_garment_state, status],
     )
+    run.click(
+        fn=tryon_ui,
+        inputs=[person, selected_garment_state],
+        outputs=[out, status],
+        concurrency_limit=1,
+    )
 demo.queue(max_size=20)
 if __name__ == "__main__":
         auth=APP_AUTH,
         max_threads=4,
         show_error=True,
+        show_api=False,  # важно: не показываем API, но /api_info могут дергать — патч это чинит
     )