Spaces:
Paused
Paused
| """ | |
| Watermark auto-detection module. | |
| Two strategies: | |
| 1. Text detection via EasyOCR (optional install). | |
| 2. Contrast-anomaly detection for semi-transparent logos/patterns (always available). | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from typing import List, Dict | |
| try: | |
| import easyocr | |
| _reader_instance = None | |
| EASYOCR_AVAILABLE = True | |
| except ImportError: | |
| EASYOCR_AVAILABLE = False | |
| def _get_reader(): | |
| global _reader_instance | |
| if _reader_instance is None: | |
| import easyocr | |
| _reader_instance = easyocr.Reader(["en"], gpu=False) | |
| return _reader_instance | |
| def detect_watermarks(image_path: str) -> List[Dict]: | |
| """ | |
| Detect watermarks in an image using available methods. | |
| Returns a list of region dicts: | |
| {x, y, w, h, confidence, type} | |
| All coordinates are in original image pixel space. | |
| """ | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| return [] | |
| regions: List[Dict] = [] | |
| if EASYOCR_AVAILABLE: | |
| regions.extend(_detect_text(img)) | |
| regions.extend(_detect_transparent(img)) | |
| return _merge_overlapping(regions) | |
| # --------------------------------------------------------------------------- | |
| # Text detection | |
| # --------------------------------------------------------------------------- | |
| def _detect_text(img: np.ndarray) -> List[Dict]: | |
| reader = _get_reader() | |
| h, w = img.shape[:2] | |
| results = reader.readtext(img, paragraph=False, min_size=10) | |
| regions = [] | |
| for bbox, text, confidence in results: | |
| if confidence < 0.3: | |
| continue | |
| xs = [pt[0] for pt in bbox] | |
| ys = [pt[1] for pt in bbox] | |
| x = max(0, int(min(xs)) - 5) | |
| y = max(0, int(min(ys)) - 5) | |
| x2 = min(w, int(max(xs)) + 5) | |
| y2 = min(h, int(max(ys)) + 5) | |
| regions.append({ | |
| "x": x, "y": y, | |
| "w": x2 - x, "h": y2 - y, | |
| "confidence": float(confidence), | |
| "type": "text", | |
| "label": text[:20], | |
| }) | |
| return regions | |
| # --------------------------------------------------------------------------- | |
| # Semi-transparent / logo detection | |
| # --------------------------------------------------------------------------- | |
| def _detect_transparent(img: np.ndarray) -> List[Dict]: | |
| """ | |
| Detect logo/pattern watermarks by finding structured residuals | |
| that deviate from the smoothed background. | |
| """ | |
| h, w = img.shape[:2] | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32) | |
| # Background estimate via heavy blur | |
| bg = cv2.GaussianBlur(gray, (51, 51), 0) | |
| residual = np.abs(gray - bg).astype(np.uint8) | |
| _, thresh = cv2.threshold(residual, 15, 255, cv2.THRESH_BINARY) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) | |
| closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3) | |
| n_labels, _, stats, _ = cv2.connectedComponentsWithStats(closed) | |
| min_area = h * w * 0.005 | |
| max_area = h * w * 0.40 | |
| regions = [] | |
| for i in range(1, n_labels): | |
| area = stats[i, cv2.CC_STAT_AREA] | |
| if not (min_area <= area <= max_area): | |
| continue | |
| bx = stats[i, cv2.CC_STAT_LEFT] | |
| by = stats[i, cv2.CC_STAT_TOP] | |
| bw = stats[i, cv2.CC_STAT_WIDTH] | |
| bh = stats[i, cv2.CC_STAT_HEIGHT] | |
| aspect = bw / bh if bh > 0 else 0 | |
| if aspect > 10 or aspect < 0.1: | |
| continue | |
| regions.append({ | |
| "x": int(bx), "y": int(by), | |
| "w": int(bw), "h": int(bh), | |
| "confidence": 0.55, | |
| "type": "logo", | |
| }) | |
| return regions | |
| # --------------------------------------------------------------------------- | |
| # Merge overlapping boxes | |
| # --------------------------------------------------------------------------- | |
| def _merge_overlapping(regions: List[Dict]) -> List[Dict]: | |
| if len(regions) <= 1: | |
| return regions | |
| boxes = [(r["x"], r["y"], r["x"] + r["w"], r["y"] + r["h"]) for r in regions] | |
| changed = True | |
| while changed: | |
| changed = False | |
| out: List = [] | |
| used = [False] * len(boxes) | |
| for i in range(len(boxes)): | |
| if used[i]: | |
| continue | |
| x1, y1, x2, y2 = boxes[i] | |
| for j in range(i + 1, len(boxes)): | |
| if used[j]: | |
| continue | |
| bx1, by1, bx2, by2 = boxes[j] | |
| if x1 < bx2 and x2 > bx1 and y1 < by2 and y2 > by1: | |
| x1, y1 = min(x1, bx1), min(y1, by1) | |
| x2, y2 = max(x2, bx2), max(y2, by2) | |
| used[j] = True | |
| changed = True | |
| out.append((x1, y1, x2, y2)) | |
| used[i] = True | |
| boxes = out | |
| return [ | |
| {"x": x1, "y": y1, "w": x2 - x1, "h": y2 - y1, "confidence": 0.7, "type": "merged"} | |
| for x1, y1, x2, y2 in boxes | |
| ] | |