Spaces:

JustForWorld
/

simba-watermark-cleaner

Running on T4

App Files Files Community

JustForWorld commited on Oct 8, 2025

Commit

98f1dc6

1 Parent(s): fd2ee05

feat: implement lazy loading for YOLO and Stable Diffusion models to speed up startup time

Browse files

Files changed (1) hide show

logic.py +113 -65

logic.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from ultralytics import YOLO
 import cv2
-import os
 import numpy as np
 from PIL import Image, ImageDraw
 import torch
@@ -8,41 +7,52 @@ from loguru import logger
 import time
 from diffusers import AutoPipelineForInpainting
-# ===================================================================
-# Класс WatermarkRemover
-# ===================================================================
 class WatermarkRemover:
     def __init__(self, device=None):
-        # 👇 Автоматический выбор GPU, если доступен
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        logger.info(f"Используемое устройство: {self.device}")
-        # ---------------------------------------------------------------
-        # Загрузка кастомной модели YOLOv8
-        # ---------------------------------------------------------------
-        logger.info("Загрузка кастомной модели YOLOv8 ('best.pt')...")
-        self.detector = YOLO("best.pt")
-        self.detector.to(self.device)
-        self.detector.fuse()  # 👈 ускоряет инференс YOLO
-        logger.info("Кастомная модель YOLOv8 успешно загружена.")
-        # ---------------------------------------------------------------
-        # Загрузка модели Stable Diffusion 2 Inpainting
-        # ---------------------------------------------------------------
-        logger.info("Загрузка модели Stable Diffusion 2 Inpainting...")
-        self.inpainting_pipe = AutoPipelineForInpainting.from_pretrained(
-            "stabilityai/stable-diffusion-2-inpainting",
-            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,  # 👈 экономия VRAM
-            safety_checker=None,  # 👈 не нужен для локального inpainting
-        )
-        self.inpainting_pipe = self.inpainting_pipe.to(self.device)
-        self.inpainting_pipe.enable_attention_slicing()  # 👈 снижает пиковое использование VRAM
-        logger.info("Модель Stable Diffusion 2 Inpainting успешно загружена.")
-    # ===================================================================
-    # Генерация маски с помощью YOLO
-    # ===================================================================
     def _get_mask_yolo(self, image: Image.Image) -> Image.Image:
         img_np = np.array(image.convert("RGB"))
         results = self.detector.predict(img_np, conf=0.25, imgsz=864, device=self.device)
         mask = Image.new("L", image.size, 0)
@@ -50,17 +60,19 @@ class WatermarkRemover:
         if results and len(results[0].boxes) > 0:
             draw = ImageDraw.Draw(mask)
             boxes = results[0].boxes.xyxy.cpu().numpy()
-            logger.info(f"Кастомная модель YOLO нашла {len(boxes)} bbox.")
             for bbox in boxes:
                 draw.rectangle(list(bbox), fill=255)
         else:
-            logger.warning("Кастомная модель YOLO не нашла watermark.")
         return mask
-    # ===================================================================
-    # Инпейнтинг изображения с помощью diffusers
-    # ===================================================================
-    def _inpaint_image(self, image: Image.Image, mask: Image.Image) -> np.ndarray:
         prompt = (
             "ultra realistic photo of interior or exterior architecture, "
             "natural lighting, clean surface, consistent material texture, realistic color balance"
@@ -70,58 +82,94 @@ class WatermarkRemover:
             "painting, mirror artifact, blurry, distorted, deformed, low quality, noise, grain"
         )
-        logger.info("Запуск Stable Diffusion Inpainting с 30 шагами...")
-        # --- 🔹 Сохраняем оригинальный размер
-        orig_size = image.size  # (width, height)
-        # --- 🔹 Resize до кратного 8 (иначе модель может ругаться)
-        new_w = (orig_size[0] // 8) * 8
-        new_h = (orig_size[1] // 8) * 8
-        resized_image = image.resize((new_w, new_h), Image.LANCZOS)
-        resized_mask = mask.resize((new_w, new_h), Image.LANCZOS)
-        # --- 🔹 Инференс
         with torch.inference_mode():
             result = self.inpainting_pipe(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
-                image=resized_image,
                 mask_image=resized_mask,
-                num_inference_steps=30,
-                guidance_scale=7.5,
             ).images[0]
-        # --- 🔹 Возвращаем к оригинальному размеру
-        result = result.resize(orig_size, Image.LANCZOS)
-        return np.array(result)
-    # ===================================================================
-    # Основной процесс
-    # ===================================================================
     def run(self, image: Image.Image) -> Image.Image:
         start_time = time.time()
-        logger.info("Начало процесса удаления вотермарок (YOLOv8 + Stable Diffusion)...")
         mask_image = self._get_mask_yolo(image)
         mask_np = np.array(mask_image)
         if not np.any(mask_np):
-            logger.info("Вотермарки не найдены. Возвращаем оригинальное изображение.")
             return image
-        logger.info("Постобработка маски...")
         kernel = np.ones((15, 15), np.uint8)
         closed_mask = cv2.morphologyEx(mask_np, cv2.MORPH_CLOSE, kernel)
         final_kernel = np.ones((7, 7), np.uint8)
         processed_mask_np = cv2.dilate(closed_mask, final_kernel, iterations=1)
         processed_mask_pil = Image.fromarray(processed_mask_np)
-        logger.success("Маска обработана.")
-        logger.info("Закрашивание области с помощью Stable Diffusion...")
-        result_np_rgb = self._inpaint_image(image, processed_mask_pil)
         end_time = time.time()
-        logger.success(f"Удаление watermark завершено за {end_time - start_time:.2f} сек.")
-        return Image.fromarray(result_np_rgb)

 from ultralytics import YOLO
 import cv2
 import numpy as np
 from PIL import Image, ImageDraw
 import torch
 import time
 from diffusers import AutoPipelineForInpainting
 class WatermarkRemover:
     def __init__(self, device=None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        # Lazy-loaded models
+        self.detector = None
+        self.inpainting_pipe = None
+    # ======================================================
+    # Lazy-load YOLO
+    # ======================================================
+    def _load_detector(self):
+        if self.detector is None:
+            logger.info("Loading YOLOv8 custom model ('best.pt')...")
+            self.detector = YOLO("best.pt")
+            self.detector.to(self.device)
+            try:
+                self.detector.fuse()
+            except Exception:
+                pass
+            logger.success("YOLOv8 model loaded successfully.")
+    # ======================================================
+    # Lazy-load Stable Diffusion
+    # ======================================================
+    def _load_inpainting_model(self):
+        if self.inpainting_pipe is None:
+            logger.info("Loading Stable Diffusion 2 Inpainting...")
+            self.inpainting_pipe = AutoPipelineForInpainting.from_pretrained(
+                "stabilityai/stable-diffusion-2-inpainting",
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                safety_checker=None,
+            ).to(self.device)
+            try:
+                self.inpainting_pipe.enable_attention_slicing()
+            except Exception:
+                pass
+            logger.success("Stable Diffusion 2 Inpainting model loaded successfully.")
+    # ======================================================
+    # Mask generation via YOLO
+    # ======================================================
     def _get_mask_yolo(self, image: Image.Image) -> Image.Image:
+        self._load_detector()  # ensure YOLO loaded
         img_np = np.array(image.convert("RGB"))
         results = self.detector.predict(img_np, conf=0.25, imgsz=864, device=self.device)
         mask = Image.new("L", image.size, 0)
         if results and len(results[0].boxes) > 0:
             draw = ImageDraw.Draw(mask)
             boxes = results[0].boxes.xyxy.cpu().numpy()
+            logger.info(f"YOLO found {len(boxes)} watermark box(es).")
             for bbox in boxes:
                 draw.rectangle(list(bbox), fill=255)
         else:
+            logger.warning("No watermark detected.")
         return mask
+    # ======================================================
+    # Partial inpainting
+    # ======================================================
+    def _inpaint_image(self, image: Image.Image, mask: Image.Image) -> Image.Image:
+        self._load_inpainting_model()  # ensure pipeline loaded
         prompt = (
             "ultra realistic photo of interior or exterior architecture, "
             "natural lighting, clean surface, consistent material texture, realistic color balance"
             "painting, mirror artifact, blurry, distorted, deformed, low quality, noise, grain"
         )
+        logger.info("Running partial Stable Diffusion inpainting...")
+        orig_w, orig_h = image.size
+        mask_np = np.array(mask)
+        ys, xs = np.where(mask_np > 0)
+        if xs.size == 0 or ys.size == 0:
+            logger.info("Mask empty — skipping inpainting.")
+            return image
+        pad = max(48, int(min(orig_w, orig_h) * 0.03))
+        x_min = max(int(xs.min()) - pad, 0)
+        x_max = min(int(xs.max()) + pad, orig_w)
+        y_min = max(int(ys.min()) - pad, 0)
+        y_max = min(int(ys.max()) + pad, orig_h)
+        crop_box = (x_min, y_min, x_max, y_max)
+        crop_img = image.crop(crop_box)
+        crop_mask = mask.crop(crop_box)
+        crop_w, crop_h = crop_img.size
+        max_side = 1024
+        scale = 1.0
+        if max(crop_w, crop_h) > max_side:
+            scale = max_side / max(crop_w, crop_h)
+        new_w = int(np.ceil((crop_w * scale) / 8) * 8)
+        new_h = int(np.ceil((crop_h * scale) / 8) * 8)
+        if (new_w, new_h) != (crop_w, crop_h):
+            resized_img = crop_img.resize((new_w, new_h), resample=Image.LANCZOS)
+            resized_mask = crop_mask.resize((new_w, new_h), resample=Image.LANCZOS)
+        else:
+            resized_img, resized_mask = crop_img, crop_mask
+        resized_mask = resized_mask.convert("L")
+        mask_thr = np.array(resized_mask)
+        mask_thr = (mask_thr > 127).astype(np.uint8) * 255
+        resized_mask = Image.fromarray(mask_thr, mode="L")
         with torch.inference_mode():
             result = self.inpainting_pipe(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
+                image=resized_img,
                 mask_image=resized_mask,
+                num_inference_steps=35,
+                guidance_scale=8.0,
             ).images[0]
+        if result.size != crop_img.size:
+            result_resized = result.resize(crop_img.size, resample=Image.LANCZOS)
+        else:
+            result_resized = result
+        base = image.copy()
+        paste_mask = crop_mask.convert("L")
+        paste_mask = Image.fromarray((np.array(paste_mask) > 127).astype(np.uint8) * 255, mode="L")
+        base.paste(result_resized, (x_min, y_min), mask=paste_mask)
+        if self.device == "cuda":
+            torch.cuda.empty_cache()
+        return base
+    # ======================================================
+    # Main process
+    # ======================================================
     def run(self, image: Image.Image) -> Image.Image:
         start_time = time.time()
+        logger.info("Starting watermark removal...")
         mask_image = self._get_mask_yolo(image)
         mask_np = np.array(mask_image)
         if not np.any(mask_np):
+            logger.info("No watermark found. Returning original image.")
             return image
+        logger.info("Post-processing mask (morphology)...")
         kernel = np.ones((15, 15), np.uint8)
         closed_mask = cv2.morphologyEx(mask_np, cv2.MORPH_CLOSE, kernel)
         final_kernel = np.ones((7, 7), np.uint8)
         processed_mask_np = cv2.dilate(closed_mask, final_kernel, iterations=1)
         processed_mask_pil = Image.fromarray(processed_mask_np)
+        logger.success("Mask processed.")
+        result_img = self._inpaint_image(image, processed_mask_pil)
         end_time = time.time()
+        logger.success(f"Watermark removal completed in {end_time - start_time:.2f}s.")
+        return result_img