Spaces:
Sleeping
Sleeping
| """ | |
| ์ ๋ณด ํ์ด์ง ๋จ์ ์ ์ฒ๋ฆฌ: ํด์๋ ์ํ, ์กฐ๋ช ์ ๊ทํ, LAB-CLAHE, ์ ํ์ ์๊ทผ ๋ณด์ , | |
| ๊ฐ๋ฒผ์ด ์ฃ์ง ๋ณด์กด ์ค๋ฌด๋ฉยท์ธ์คํ, ์๊ฐ๋ ๋ฐ์คํ. | |
| ๊ฒ์ถยท์ธ์์ ์ ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง์์ ์ํํ๊ณ , ์๋ต bbox๋ ์ ๋ก๋ ์๋ณธ ํฝ์ ์ขํ๋ก ๋๋๋ฆฐ๋ค. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from dataclasses import dataclass, field | |
| from typing import Any, Dict, List, Tuple | |
| import cv2 | |
| import numpy as np | |
| _MAX_SIDE_DEFAULT = int(os.environ.get("STELLA_PREPROCESS_MAX_SIDE", "4096")) | |
| _SKIP_DESKEW = os.environ.get("STELLA_SKIP_DESKEW", "").lower() in ("1", "true", "yes") | |
| _SKIP_PERSPECTIVE = os.environ.get("STELLA_SKIP_PERSPECTIVE", "").lower() in ("1", "true", "yes") | |
| # ์๊ทผ ๋ณด์ ์ ์ ๋ณด ๋จ๋ ํฌ๋กญ์์ ์คํ ์ ํ์ง์ ๋ง๊ฐ๋จ๋ฆด ์ ์์ด ๊ธฐ๋ณธ ๋. ๋ฌธ์ํ ์ดฌ์๋ง ์ผ ๋ค. | |
| _ENABLE_PERSPECTIVE = os.environ.get("STELLA_ENABLE_PERSPECTIVE", "").lower() in ("1", "true", "yes") | |
| _SKIP_ILLUMINATION = os.environ.get("STELLA_SKIP_ILLUMINATION", "").lower() in ("1", "true", "yes") | |
| _SKIP_ENHANCE = os.environ.get("STELLA_SKIP_ENHANCE", "").lower() in ("1", "true", "yes") | |
| _DESKEW_MIN_DEG = 0.35 | |
| _DESKEW_MAX_DEG = 6.0 | |
| # ์๊ทผ: ๋๋ฌด ์ฝํ๋ฉด ํจ๊ณผ ์์, ๋๋ฌด ๊ณต๊ฒฉ์ ์ด๋ฉด ์ ๋ณด๋ฅผ ์ฐ๊ทธ๋ฌ๋จ๋ฆผ | |
| _PERSP_MIN_AREA_RATIO = 0.22 | |
| _PERSP_FULL_FRAME_RATIO = 0.92 | |
| _PERSP_MAX_EDGE_RATIO = 0.035 | |
| class PageGeometry: | |
| """์ต์ข work ํฝ์ โ ์ ๋ก๋ ์๋ณธ(๋์ฝ๋) ํฝ์ . ๋ด๋ถ์ ์ผ๋ก 3ร3 ๋์ฐจ ๋ณํ.""" | |
| orig_w: int | |
| orig_h: int | |
| work_w: int | |
| work_h: int | |
| scale_x: float # work_w / orig_w (์ญ๋ณํ์ ์ฌ์ฉ) | |
| scale_y: float # work_h / orig_h | |
| deskew_deg_applied: float | |
| _work_to_orig: np.ndarray = field(repr=False) | |
| def work_point_to_original(self, x: float, y: float) -> Tuple[int, int]: | |
| v = self._work_to_orig @ np.array([float(x), float(y), 1.0], dtype=np.float64) | |
| wv = float(v[2]) | |
| if abs(wv) < 1e-12: | |
| xo = float(v[0]) / self.scale_x | |
| yo = float(v[1]) / self.scale_y | |
| else: | |
| xo = float(v[0]) / wv | |
| yo = float(v[1]) / wv | |
| return int(round(xo)), int(round(yo)) | |
| def work_rect_to_original_aabb(self, x0: int, y0: int, w: int, h: int) -> List[int]: | |
| corners = [ | |
| (x0, y0), | |
| (x0 + w, y0), | |
| (x0, y0 + h), | |
| (x0 + w, y0 + h), | |
| ] | |
| mapped = [self.work_point_to_original(px, py) for px, py in corners] | |
| xs = [p[0] for p in mapped] | |
| ys = [p[1] for p in mapped] | |
| x_min, x_max = min(xs), max(xs) | |
| y_min, y_max = min(ys), max(ys) | |
| return [x_min, y_min, max(1, x_max - x_min), max(1, y_max - y_min)] | |
| def _clahe_lab_bgr(image_bgr: np.ndarray, clip_limit: float = 2.0, grid: int = 8) -> np.ndarray: | |
| lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b_ch = cv2.split(lab) | |
| clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(grid, grid)) | |
| l2 = clahe.apply(l) | |
| merged = cv2.merge((l2, a, b_ch)) | |
| return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR) | |
| def _normalize_illumination_lab(image_bgr: np.ndarray) -> np.ndarray: | |
| """์ ์ฃผํ ์กฐ๋ช ์ฑ๋ถ์ผ๋ก L ์ฑ๋์ ๋๋์ด ๊ทธ๋ฆผ์ยท๋น๋คํ ์ ์ํ.""" | |
| lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b_ch = cv2.split(lab) | |
| h, w = l.shape[:2] | |
| k = int(round(min(h, w) * 0.06)) | |
| k = max(31, k | 1) | |
| bg = cv2.GaussianBlur(l, (k, k), 0) | |
| bg_f = np.maximum(bg.astype(np.float32), 8.0) | |
| l_f = l.astype(np.float32) | |
| l2 = np.clip((l_f / bg_f) * 96.0 + 48.0, 0, 255).astype(np.uint8) | |
| merged = cv2.merge((l2, a, b_ch)) | |
| return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR) | |
| def _mild_unsharp_lab_bgr(image_bgr: np.ndarray, sigma: float = 1.0, amount: float = 0.35) -> np.ndarray: | |
| lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b_ch = cv2.split(lab) | |
| blur = cv2.GaussianBlur(l, (0, 0), sigma) | |
| l2 = cv2.addWeighted(l, 1.0 + amount, blur, -amount, 0) | |
| merged = cv2.merge((l2, a, b_ch)) | |
| return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR) | |
| def _bilateral_or_light_blur_bgr(image_bgr: np.ndarray) -> Tuple[np.ndarray, str]: | |
| h, w = image_bgr.shape[:2] | |
| m = max(h, w) | |
| if m <= 2000: | |
| out = cv2.bilateralFilter(image_bgr, d=5, sigmaColor=42, sigmaSpace=42) | |
| return out, "bilateral_d5" | |
| if m <= 3200: | |
| out = cv2.bilateralFilter(image_bgr, d=3, sigmaColor=38, sigmaSpace=38) | |
| return out, "bilateral_d3" | |
| lab = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b_ch = cv2.split(lab) | |
| l2 = cv2.GaussianBlur(l, (3, 3), 0) | |
| merged = cv2.merge((l2, a, b_ch)) | |
| return cv2.cvtColor(merged, cv2.COLOR_LAB2BGR), "gaussian_l3_skip_heavy_bilateral" | |
| def _order_quad_points(pts: np.ndarray) -> np.ndarray: | |
| """pts (4,2) float โ [tl, tr, br, bl].""" | |
| rect = np.zeros((4, 2), dtype=np.float32) | |
| s = pts.sum(axis=1) | |
| rect[0] = pts[np.argmin(s)] | |
| rect[2] = pts[np.argmax(s)] | |
| diff = np.diff(pts, axis=1).flatten() | |
| rect[1] = pts[np.argmin(diff)] | |
| rect[3] = pts[np.argmax(diff)] | |
| return rect | |
| def _quad_corners_near_frame(ordered: np.ndarray, w: int, h: int, frac: float = 0.16) -> bool: | |
| """์ค์ ๋ฌธ์/ํ์ด์ง ๋ชจ์๋ฆฌ๋ ๋ณดํต ํ๋ ์ ๊ฐ๊น์ด์ ์๋ค. ๋ด๋ถ ์ก์ ์ฌ๊ฐํ์ ๊ฑธ๋ฌ๋ธ๋ค.""" | |
| lim = frac * float(min(w, h)) | |
| for x, y in ordered: | |
| d = min(float(x), float(y), float(w - 1) - float(x), float(h - 1) - float(y)) | |
| if d > lim: | |
| return False | |
| return True | |
| def _try_perspective_rectify(image_bgr: np.ndarray) -> Tuple[np.ndarray, np.ndarray, str]: | |
| """ | |
| ๋ฌธ์ํ ์ฌ๊ฐํ์ ์ฐพ์ผ๋ฉด ์๊ทผ ๋ณด์ . ๋ฐํ: (warped_bgr, M_src_to_dst, reason). | |
| M_src_to_dst ๋ getPerspectiveTransform ๊ท์ฝ: dst_hom โ M @ src_hom. | |
| """ | |
| h, w = image_bgr.shape[:2] | |
| gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) | |
| blur = cv2.GaussianBlur(gray, (5, 5), 0) | |
| edges = cv2.Canny(blur, 35, 110) | |
| edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1) | |
| contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| img_area = float(h * w) | |
| min_area = _PERSP_MIN_AREA_RATIO * img_area | |
| for cnt in sorted(contours, key=cv2.contourArea, reverse=True)[:8]: | |
| area = cv2.contourArea(cnt) | |
| if area < min_area: | |
| break | |
| peri = cv2.arcLength(cnt, True) | |
| if peri < 1e-6: | |
| continue | |
| approx = cv2.approxPolyDP(cnt, 0.02 * peri, True) | |
| if len(approx) != 4: | |
| continue | |
| if not cv2.isContourConvex(approx): | |
| continue | |
| if area > _PERSP_FULL_FRAME_RATIO * img_area: | |
| return image_bgr, np.eye(3, dtype=np.float64), "skipped_near_full_frame" | |
| pts = approx.reshape(4, 2).astype(np.float32) | |
| ordered = _order_quad_points(pts) | |
| side = min( | |
| np.linalg.norm(ordered[0] - ordered[1]), | |
| np.linalg.norm(ordered[1] - ordered[2]), | |
| np.linalg.norm(ordered[2] - ordered[3]), | |
| np.linalg.norm(ordered[3] - ordered[0]), | |
| ) | |
| if side < _PERSP_MAX_EDGE_RATIO * float(min(h, w)): | |
| continue | |
| if not _quad_corners_near_frame(ordered, w, h): | |
| continue | |
| width_top = np.linalg.norm(ordered[0] - ordered[1]) | |
| width_bot = np.linalg.norm(ordered[3] - ordered[2]) | |
| height_r = np.linalg.norm(ordered[1] - ordered[2]) | |
| height_l = np.linalg.norm(ordered[0] - ordered[3]) | |
| max_w = int(max(width_top, width_bot)) | |
| max_h = int(max(height_r, height_l)) | |
| max_w = max(max_w, 32) | |
| max_h = max(max_h, 32) | |
| dst = np.array( | |
| [[0, 0], [max_w - 1, 0], [max_w - 1, max_h - 1], [0, max_h - 1]], | |
| dtype=np.float32, | |
| ) | |
| m_src_to_dst = cv2.getPerspectiveTransform(ordered, dst) | |
| warped = cv2.warpPerspective( | |
| image_bgr, | |
| m_src_to_dst, | |
| (max_w, max_h), | |
| flags=cv2.INTER_LINEAR, | |
| borderMode=cv2.BORDER_REPLICATE, | |
| ) | |
| return warped, m_src_to_dst.astype(np.float64), "applied" | |
| return image_bgr, np.eye(3, dtype=np.float64), "no_quadrilateral_found" | |
| def estimate_deskew_angle_deg(gray: np.ndarray) -> float: | |
| """์ํ ์ค์ ์ ๊ฐ๊น์ด ์ ๋ถ ๊ฐ๋(๋). ์(+)์ ์ค๋ฅธ์ชฝ์ด ์๋๋ก ๋ด๋ ค๊ฐ ๊ธฐ์ธ๊ธฐ์ ๋์.""" | |
| h, w = gray.shape[:2] | |
| max_side = max(h, w) | |
| scale = 800.0 / max_side if max_side > 800 else 1.0 | |
| if scale < 1.0: | |
| small = cv2.resize(gray, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA) | |
| else: | |
| small = gray | |
| blur = cv2.GaussianBlur(small, (3, 3), 0) | |
| _, bw = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) | |
| kernel_w = max(15, small.shape[1] // 20) | |
| h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1)) | |
| horiz = cv2.morphologyEx(bw, cv2.MORPH_OPEN, h_kernel, iterations=1) | |
| min_len = max(30, int(small.shape[1] * 0.18)) | |
| lines = cv2.HoughLinesP( | |
| horiz, | |
| 1, | |
| np.pi / 180, | |
| threshold=40, | |
| minLineLength=min_len, | |
| maxLineGap=25, | |
| ) | |
| if lines is None: | |
| return 0.0 | |
| angles: List[float] = [] | |
| for x1, y1, x2, y2 in lines.reshape(-1, 4): | |
| dx, dy = float(x2 - x1), float(y2 - y1) | |
| if abs(dx) < 2.0: | |
| continue | |
| ang = float(np.degrees(np.arctan2(dy, dx))) | |
| if ang > 45.0: | |
| ang -= 180.0 | |
| if ang < -45.0: | |
| ang += 180.0 | |
| if abs(ang) < 14.0: | |
| angles.append(ang) | |
| if not angles: | |
| return 0.0 | |
| return float(np.median(angles)) | |
| def preprocess_page_bgr(image_bgr: np.ndarray) -> Tuple[np.ndarray, PageGeometry, Dict[str, Any]]: | |
| """ | |
| 1) ๊ธด ๋ณ max_side ์ํ์ผ๋ก ์ถ์ | |
| 2) LAB ๊ธฐ๋ฐ ์กฐ๋ช ์ ๊ทํ(์ ํ) | |
| 3) LAB CLAHE | |
| 4) ์๊ทผ ๋ณด์ (์ ํ, ๋ฌธ์ํ ์ฌ๊ฐํ ๊ฒ์ถ ์) | |
| 5) bilateral ๋๋ ๊ฐ๋ฒผ์ด L ๋ธ๋ฌ(์ ํ) | |
| 6) LAB ์ธ์คํ(์ ํ) | |
| 7) |๊ฐ|์ด [_DESKEW_MIN_DEG, _DESKEW_MAX_DEG] ์์ด๋ฉด ๋ฐ์คํ | |
| """ | |
| meta: Dict[str, Any] = { | |
| "illumination_normalized": False, | |
| "perspective": "off", | |
| "perspective_detail": None, | |
| "enhance": None, | |
| "unsharp": False, | |
| } | |
| orig_h, orig_w = image_bgr.shape[:2] | |
| max_side = max(orig_h, orig_w) | |
| cap = max(512, _MAX_SIDE_DEFAULT) | |
| scale_f = min(1.0, float(cap) / float(max_side)) | |
| if scale_f < 1.0: | |
| work = cv2.resize( | |
| image_bgr, | |
| (int(round(orig_w * scale_f)), int(round(orig_h * scale_f))), | |
| interpolation=cv2.INTER_AREA, | |
| ) | |
| else: | |
| work = image_bgr.copy() | |
| wh, ww = work.shape[:2] | |
| sx = ww / float(orig_w) | |
| sy = wh / float(orig_h) | |
| t_scale = np.diag([sx, sy, 1.0]).astype(np.float64) | |
| if not _SKIP_ILLUMINATION: | |
| work = _normalize_illumination_lab(work) | |
| meta["illumination_normalized"] = True | |
| work = _clahe_lab_bgr(work) | |
| wh, ww = work.shape[:2] | |
| m_src_to_dst = np.eye(3, dtype=np.float64) | |
| if _SKIP_PERSPECTIVE: | |
| meta["perspective"] = "skipped" | |
| meta["perspective_detail"] = "disabled_by_env" | |
| elif not _ENABLE_PERSPECTIVE: | |
| meta["perspective"] = "skipped" | |
| meta["perspective_detail"] = "disabled_by_default_set_STELLA_ENABLE_PERSPECTIVE" | |
| else: | |
| work, m_src_to_dst, persp_reason = _try_perspective_rectify(work) | |
| meta["perspective"] = "applied" if persp_reason == "applied" else "skipped" | |
| meta["perspective_detail"] = persp_reason | |
| wh, ww = work.shape[:2] | |
| if not _SKIP_ENHANCE: | |
| work, enh_label = _bilateral_or_light_blur_bgr(work) | |
| meta["enhance"] = enh_label | |
| work = _mild_unsharp_lab_bgr(work, sigma=1.0, amount=0.32) | |
| meta["unsharp"] = True | |
| else: | |
| meta["enhance"] = "disabled_by_env" | |
| meta["unsharp"] = False | |
| deskew_applied = 0.0 | |
| a_desk = np.eye(3, dtype=np.float64) | |
| if not _SKIP_DESKEW: | |
| gray = cv2.cvtColor(work, cv2.COLOR_BGR2GRAY) | |
| skew = estimate_deskew_angle_deg(gray) | |
| if _DESKEW_MIN_DEG <= abs(skew) <= _DESKEW_MAX_DEG: | |
| correction = -skew | |
| deskew_applied = correction | |
| m23 = cv2.getRotationMatrix2D((ww / 2.0, wh / 2.0), correction, 1.0) | |
| work = cv2.warpAffine( | |
| work, | |
| m23, | |
| (ww, wh), | |
| flags=cv2.INTER_LINEAR, | |
| borderMode=cv2.BORDER_REPLICATE, | |
| ) | |
| a_desk = np.vstack([m23.astype(np.float64), np.array([[0.0, 0.0, 1.0]], dtype=np.float64)]) | |
| # ์ต์ข work โ ์๋ณธ: inv(T_scale) @ inv(M_src_to_dst) @ A_desk | |
| # (orig โ final: inv(A) @ M @ T) | |
| m_inv = np.linalg.inv(m_src_to_dst) | |
| work_to_orig = np.linalg.inv(t_scale) @ m_inv @ a_desk | |
| geom = PageGeometry( | |
| orig_w=orig_w, | |
| orig_h=orig_h, | |
| work_w=int(work.shape[1]), | |
| work_h=int(work.shape[0]), | |
| scale_x=sx, | |
| scale_y=sy, | |
| deskew_deg_applied=deskew_applied, | |
| _work_to_orig=work_to_orig, | |
| ) | |
| return work, geom, meta | |