""" 악보 페이지 단위 전처리: 해상도 상한, 조명 정규화, LAB-CLAHE, 선택적 원근 보정, 가벼운 엣지 보존 스무딩·언샤프, 소각도 데스큐. 검출·인식은 전처리된 이미지에서 수행하고, 응답 bbox는 업로드 원본 픽셀 좌표로 되돌린다. """ from __future__ import annotations import os from dataclasses import dataclass, field from typing import Any, Dict, List, Tuple import cv2 import numpy as np _MAX_SIDE_DEFAULT = int(os.environ.get("STELLA_PREPROCESS_MAX_SIDE", "4096")) _SKIP_DESKEW = os.environ.get("STELLA_SKIP_DESKEW", "").lower() in ("1", "true", "yes") _SKIP_PERSPECTIVE = os.environ.get("STELLA_SKIP_PERSPECTIVE", "").lower() in ("1", "true", "yes") # 원근 보정은 악보 단독 크롭에서 오탐 시 화질을 망가뜨릴 수 있어 기본 끔. 문서형 촬영만 켠다. _ENABLE_PERSPECTIVE = os.environ.get("STELLA_ENABLE_PERSPECTIVE", "").lower() in ("1", "true", "yes") _SKIP_ILLUMINATION = os.environ.get("STELLA_SKIP_ILLUMINATION", "").lower() in ("1", "true", "yes") _SKIP_ENHANCE = os.environ.get("STELLA_SKIP_ENHANCE", "").lower() in ("1", "true", "yes") _DESKEW_MIN_DEG = 0.35 _DESKEW_MAX_DEG = 6.0 # 원근: 너무 약하면 효과 없음, 너무 공격적이면 악보를 찌그러뜨림 _PERSP_MIN_AREA_RATIO = 0.22 _PERSP_FULL_FRAME_RATIO = 0.92 _PERSP_MAX_EDGE_RATIO = 0.035 @dataclass class PageGeometry: """최종 work 픽셀 → 업로드 원본(디코드) 픽셀. 내부적으로 3×3 동차 변환.""" orig_w: int orig_h: int work_w: int work_h: int scale_x: float # work_w / orig_w (역변환에 사용) scale_y: float # work_h / orig_h deskew_deg_applied: float _work_to_orig: np.ndarray = field(repr=False) def work_point_to_original(self, x: float, y: float) -> Tuple[int, int]: v = self._work_to_orig @ np.array([float(x), float(y), 1.0], dtype=np.float64) wv = float(v[2]) if abs(wv) < 1e-12: xo = float(v[0]) / self.scale_x yo = float(v[1]) / self.scale_y else: xo = float(v[0]) / wv yo = float(v[1]) / wv return int(round(xo)), int(round(yo)) def work_rect_to_original_aabb(self, x0: int, y0: int, w: int, h: int) -> List[int]: corners = [ (x0, y0), (x0 + w, y0), (x0, y0 + h), (x0 + w, y0 + h), ] mapped = [self.work_point_to_original(px, py) for px, py in corners] xs = [p[0] for p in mapped] ys = [p[1] for p in mapped] x_min, x_max = min(xs), max(xs) y_min, y_max = min(ys), max(ys) return [x_min, y_min, max(1, x_max - x_min), max(1, y_max - y_min)] def _clahe_lab_bgr(image_bgr: np.ndarray, clip_limit: float = 2.0, grid: int = 8) -> np.ndarray: lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) l, a, b_ch = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(grid, grid)) l2 = clahe.apply(l) merged = cv2.merge((l2, a, b_ch)) return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR) def _normalize_illumination_lab(image_bgr: np.ndarray) -> np.ndarray: """저주파 조명 성분으로 L 채널을 나누어 그림자·비네팅을 완화.""" lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) l, a, b_ch = cv2.split(lab) h, w = l.shape[:2] k = int(round(min(h, w) * 0.06)) k = max(31, k | 1) bg = cv2.GaussianBlur(l, (k, k), 0) bg_f = np.maximum(bg.astype(np.float32), 8.0) l_f = l.astype(np.float32) l2 = np.clip((l_f / bg_f) * 96.0 + 48.0, 0, 255).astype(np.uint8) merged = cv2.merge((l2, a, b_ch)) return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR) def _mild_unsharp_lab_bgr(image_bgr: np.ndarray, sigma: float = 1.0, amount: float = 0.35) -> np.ndarray: lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) l, a, b_ch = cv2.split(lab) blur = cv2.GaussianBlur(l, (0, 0), sigma) l2 = cv2.addWeighted(l, 1.0 + amount, blur, -amount, 0) merged = cv2.merge((l2, a, b_ch)) return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR) def _bilateral_or_light_blur_bgr(image_bgr: np.ndarray) -> Tuple[np.ndarray, str]: h, w = image_bgr.shape[:2] m = max(h, w) if m <= 2000: out = cv2.bilateralFilter(image_bgr, d=5, sigmaColor=42, sigmaSpace=42) return out, "bilateral_d5" if m <= 3200: out = cv2.bilateralFilter(image_bgr, d=3, sigmaColor=38, sigmaSpace=38) return out, "bilateral_d3" lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) l, a, b_ch = cv2.split(lab) l2 = cv2.GaussianBlur(l, (3, 3), 0) merged = cv2.merge((l2, a, b_ch)) return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR), "gaussian_l3_skip_heavy_bilateral" def _order_quad_points(pts: np.ndarray) -> np.ndarray: """pts (4,2) float → [tl, tr, br, bl].""" rect = np.zeros((4, 2), dtype=np.float32) s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1).flatten() rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] return rect def _quad_corners_near_frame(ordered: np.ndarray, w: int, h: int, frac: float = 0.16) -> bool: """실제 문서/페이지 모서리는 보통 프레임 가까이에 있다. 내부 잡음 사각형을 걸러낸다.""" lim = frac * float(min(w, h)) for x, y in ordered: d = min(float(x), float(y), float(w - 1) - float(x), float(h - 1) - float(y)) if d > lim: return False return True def _try_perspective_rectify(image_bgr: np.ndarray) -> Tuple[np.ndarray, np.ndarray, str]: """ 문서형 사각형을 찾으면 원근 보정. 반환: (warped_bgr, M_src_to_dst, reason). M_src_to_dst 는 getPerspectiveTransform 규약: dst_hom ∝ M @ src_hom. """ h, w = image_bgr.shape[:2] gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5, 5), 0) edges = cv2.Canny(blur, 35, 110) edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1) contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) img_area = float(h * w) min_area = _PERSP_MIN_AREA_RATIO * img_area for cnt in sorted(contours, key=cv2.contourArea, reverse=True)[:8]: area = cv2.contourArea(cnt) if area < min_area: break peri = cv2.arcLength(cnt, True) if peri < 1e-6: continue approx = cv2.approxPolyDP(cnt, 0.02 * peri, True) if len(approx) != 4: continue if not cv2.isContourConvex(approx): continue if area > _PERSP_FULL_FRAME_RATIO * img_area: return image_bgr, np.eye(3, dtype=np.float64), "skipped_near_full_frame" pts = approx.reshape(4, 2).astype(np.float32) ordered = _order_quad_points(pts) side = min( np.linalg.norm(ordered[0] - ordered[1]), np.linalg.norm(ordered[1] - ordered[2]), np.linalg.norm(ordered[2] - ordered[3]), np.linalg.norm(ordered[3] - ordered[0]), ) if side < _PERSP_MAX_EDGE_RATIO * float(min(h, w)): continue if not _quad_corners_near_frame(ordered, w, h): continue width_top = np.linalg.norm(ordered[0] - ordered[1]) width_bot = np.linalg.norm(ordered[3] - ordered[2]) height_r = np.linalg.norm(ordered[1] - ordered[2]) height_l = np.linalg.norm(ordered[0] - ordered[3]) max_w = int(max(width_top, width_bot)) max_h = int(max(height_r, height_l)) max_w = max(max_w, 32) max_h = max(max_h, 32) dst = np.array( [[0, 0], [max_w - 1, 0], [max_w - 1, max_h - 1], [0, max_h - 1]], dtype=np.float32, ) m_src_to_dst = cv2.getPerspectiveTransform(ordered, dst) warped = cv2.warpPerspective( image_bgr, m_src_to_dst, (max_w, max_h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE, ) return warped, m_src_to_dst.astype(np.float64), "applied" return image_bgr, np.eye(3, dtype=np.float64), "no_quadrilateral_found" def estimate_deskew_angle_deg(gray: np.ndarray) -> float: """수평 오선에 가까운 선분 각도(도). 양(+)은 오른쪽이 아래로 내려간 기울기에 대응.""" h, w = gray.shape[:2] max_side = max(h, w) scale = 800.0 / max_side if max_side > 800 else 1.0 if scale < 1.0: small = cv2.resize(gray, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA) else: small = gray blur = cv2.GaussianBlur(small, (3, 3), 0) _, bw = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) kernel_w = max(15, small.shape[1] // 20) h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1)) horiz = cv2.morphologyEx(bw, cv2.MORPH_OPEN, h_kernel, iterations=1) min_len = max(30, int(small.shape[1] * 0.18)) lines = cv2.HoughLinesP( horiz, 1, np.pi / 180, threshold=40, minLineLength=min_len, maxLineGap=25, ) if lines is None: return 0.0 angles: List[float] = [] for x1, y1, x2, y2 in lines.reshape(-1, 4): dx, dy = float(x2 - x1), float(y2 - y1) if abs(dx) < 2.0: continue ang = float(np.degrees(np.arctan2(dy, dx))) if ang > 45.0: ang -= 180.0 if ang < -45.0: ang += 180.0 if abs(ang) < 14.0: angles.append(ang) if not angles: return 0.0 return float(np.median(angles)) def preprocess_page_bgr(image_bgr: np.ndarray) -> Tuple[np.ndarray, PageGeometry, Dict[str, Any]]: """ 1) 긴 변 max_side 상한으로 축소 2) LAB 기반 조명 정규화(선택) 3) LAB CLAHE 4) 원근 보정(선택, 문서형 사각형 검출 시) 5) bilateral 또는 가벼운 L 블러(선택) 6) LAB 언샤프(선택) 7) |각|이 [_DESKEW_MIN_DEG, _DESKEW_MAX_DEG] 안이면 데스큐 """ meta: Dict[str, Any] = { "illumination_normalized": False, "perspective": "off", "perspective_detail": None, "enhance": None, "unsharp": False, } orig_h, orig_w = image_bgr.shape[:2] max_side = max(orig_h, orig_w) cap = max(512, _MAX_SIDE_DEFAULT) scale_f = min(1.0, float(cap) / float(max_side)) if scale_f < 1.0: work = cv2.resize( image_bgr, (int(round(orig_w * scale_f)), int(round(orig_h * scale_f))), interpolation=cv2.INTER_AREA, ) else: work = image_bgr.copy() wh, ww = work.shape[:2] sx = ww / float(orig_w) sy = wh / float(orig_h) t_scale = np.diag([sx, sy, 1.0]).astype(np.float64) if not _SKIP_ILLUMINATION: work = _normalize_illumination_lab(work) meta["illumination_normalized"] = True work = _clahe_lab_bgr(work) wh, ww = work.shape[:2] m_src_to_dst = np.eye(3, dtype=np.float64) if _SKIP_PERSPECTIVE: meta["perspective"] = "skipped" meta["perspective_detail"] = "disabled_by_env" elif not _ENABLE_PERSPECTIVE: meta["perspective"] = "skipped" meta["perspective_detail"] = "disabled_by_default_set_STELLA_ENABLE_PERSPECTIVE" else: work, m_src_to_dst, persp_reason = _try_perspective_rectify(work) meta["perspective"] = "applied" if persp_reason == "applied" else "skipped" meta["perspective_detail"] = persp_reason wh, ww = work.shape[:2] if not _SKIP_ENHANCE: work, enh_label = _bilateral_or_light_blur_bgr(work) meta["enhance"] = enh_label work = _mild_unsharp_lab_bgr(work, sigma=1.0, amount=0.32) meta["unsharp"] = True else: meta["enhance"] = "disabled_by_env" meta["unsharp"] = False deskew_applied = 0.0 a_desk = np.eye(3, dtype=np.float64) if not _SKIP_DESKEW: gray = cv2.cvtColor(work, cv2.COLOR_BGR2GRAY) skew = estimate_deskew_angle_deg(gray) if _DESKEW_MIN_DEG <= abs(skew) <= _DESKEW_MAX_DEG: correction = -skew deskew_applied = correction m23 = cv2.getRotationMatrix2D((ww / 2.0, wh / 2.0), correction, 1.0) work = cv2.warpAffine( work, m23, (ww, wh), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE, ) a_desk = np.vstack([m23.astype(np.float64), np.array([[0.0, 0.0, 1.0]], dtype=np.float64)]) # 최종 work → 원본: inv(T_scale) @ inv(M_src_to_dst) @ A_desk # (orig → final: inv(A) @ M @ T) m_inv = np.linalg.inv(m_src_to_dst) work_to_orig = np.linalg.inv(t_scale) @ m_inv @ a_desk geom = PageGeometry( orig_w=orig_w, orig_h=orig_h, work_w=int(work.shape[1]), work_h=int(work.shape[0]), scale_x=sx, scale_y=sy, deskew_deg_applied=deskew_applied, _work_to_orig=work_to_orig, ) return work, geom, meta