EpiText-Hanja-OCR / ai_modules /preprocessor_unified.py

donghyun

Add OCR code, modules, and weights

8672bad 17 days ago

22.7 kB

	# Epitext_Back/ai_modules/preprocessor_unified.py
	# -- coding: utf-8 --
	"""
	================================================================================
	Unified Image Preprocessing Module for Epitext AI Project
	================================================================================

	모듈명: preprocessor_unified.py (v1.0.0 - Production Ready)
	작성일: 2025-12-02
	목적: 한자 이미지를 Swin Gray와 OCR용으로 동시에 전처리
	상태: Production Ready

	핵심 기능:
	한 번에 두 가지 전처리 완료:
	1. Swin Gray: 그레이 비이진화 -> 3채널 (정보 손실 최소)
	2. OCR: 이진화 -> 1채널 (명확한 흑백)

	자동 배경 보장:
	- Swin: 밝은배경 (>=127)
	- OCR: 흰배경 + 검정글자 (255/0)

	탁본 자동 검출: 큰 어두운 영역 식별
	영역 검출 1회: 효율성
	설정 파일 지원: JSON 기반 커스터마이징
	로깅 지원: DEBUG, INFO, WARNING, ERROR

	의존성:
	- opencv-python >= 4.8.0
	- numpy >= 1.24.0

	단일 함수:
	preprocess_image_unified(input_path, output_swin_path, output_ocr_path, ...)

	사용 예시:
	>>> from ai_modules.preprocessor_unified import preprocess_image_unified
	>>> result = preprocess_image_unified(
	... "input.jpg",
	... "swin.jpg",
	... "ocr.png"
	... )

	================================================================================
	"""


	import cv2
	import numpy as np
	from pathlib import Path
	import json
	import logging
	from typing import Dict, Optional, Tuple


	# ================================================================================
	# Logging Configuration
	# ================================================================================


	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - [%(levelname)s] %(message)s'
	)
	logger = logging.getLogger(__name__)


	# ================================================================================
	# Constants
	# ================================================================================


	# 기본 설정값
	DEFAULT_MARGIN = 10
	DEFAULT_BRIGHTNESS_THRESHOLD = 127
	DEFAULT_RUBBING_MIN_AREA_RATIO = 0.1
	DEFAULT_TEXT_MIN_AREA = 16
	DEFAULT_TEXT_AREA_RATIO = 0.00005
	DEFAULT_MORPHOLOGY_KERNEL_SIZE = (2, 2)
	DEFAULT_MORPHOLOGY_CLOSE_ITERATIONS = 3
	DEFAULT_MORPHOLOGY_OPEN_ITERATIONS = 2
	DEFAULT_RUBBING_KERNEL_SIZE = (5, 5)
	DEFAULT_RUBBING_CLOSE_ITERATIONS = 10
	DEFAULT_RUBBING_OPEN_ITERATIONS = 5


	# ================================================================================
	# Main Preprocessing Class
	# ================================================================================


	class UnifiedImagePreprocessor:
	"""
	통합 이미지 전처리 클래스 (Swin + OCR)

	한 번의 처리로 Swin Gray와 OCR용 이미지를 모두 생성합니다.

	Attributes:
	config (dict): 전처리 설정 파라미터

	Example:
	>>> prep = UnifiedImagePreprocessor()
	>>> result = prep.preprocess_unified("input.jpg", "swin.jpg", "ocr.png")
	"""

	def __init__(self, config_path: Optional[str] = None) -> None:
	"""
	UnifiedImagePreprocessor 초기화

	Args:
	config_path (str, optional): 설정 파일 경로 (JSON)
	"""
	self.config = self._load_config(config_path)
	logger.info("[INIT] UnifiedImagePreprocessor v1.0.0 초기화 완료")

	def _load_config(self, config_path: Optional[str]) -> Dict:
	"""설정 파일 로드"""
	default_config = {
	"margin": DEFAULT_MARGIN,
	"brightness_threshold": DEFAULT_BRIGHTNESS_THRESHOLD,
	"rubbing_min_area_ratio": DEFAULT_RUBBING_MIN_AREA_RATIO,
	"text_min_area": DEFAULT_TEXT_MIN_AREA,
	"text_area_ratio": DEFAULT_TEXT_AREA_RATIO,
	"morphology_kernel_size": DEFAULT_MORPHOLOGY_KERNEL_SIZE,
	"morphology_close_iterations": DEFAULT_MORPHOLOGY_CLOSE_ITERATIONS,
	"morphology_open_iterations": DEFAULT_MORPHOLOGY_OPEN_ITERATIONS,
	"rubbing_kernel_size": DEFAULT_RUBBING_KERNEL_SIZE,
	"rubbing_close_iterations": DEFAULT_RUBBING_CLOSE_ITERATIONS,
	"rubbing_open_iterations": DEFAULT_RUBBING_OPEN_ITERATIONS,
	}

	# 기본 설정 파일 경로 (config_path가 없을 때)
	if config_path is None:
	default_config_path = Path(__file__).parent / "config" / "preprocess_config.json"
	if default_config_path.exists():
	config_path = str(default_config_path)

	if config_path and Path(config_path).exists():
	try:
	with open(config_path, 'r', encoding='utf-8') as f:
	user_config = json.load(f)
	# _description 필드는 제외하고 업데이트
	user_config_clean = {k: v for k, v in user_config.items() if not k.startswith('_')}
	default_config.update(user_config_clean)
	logger.info(f"[CONFIG] 설정 파일 로드: {config_path}")
	except Exception as e:
	logger.warning(f"[CONFIG] 설정 파일 로드 실패: {e} - 기본 설정 사용")

	return default_config

	def _find_rubbing_bbox(self, gray_image: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
	"""
	탁본 영역 검출 (큰 어두운 사각형 찾기)

	Args:
	gray_image (np.ndarray): 그레이스케일 이미지

	Returns:
	tuple: (x, y, w, h) 또는 None
	"""
	H_img, W_img = gray_image.shape

	# Step 1: 어두운 영역 추출
	_, dark_mask = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY_INV)

	# Step 2: 모폴로지 연산
	kernel_rub = np.ones(self.config["rubbing_kernel_size"], np.uint8)
	dark_mask = cv2.morphologyEx(
	dark_mask, cv2.MORPH_CLOSE, kernel_rub,
	iterations=self.config["rubbing_close_iterations"]
	)
	dark_mask = cv2.morphologyEx(
	dark_mask, cv2.MORPH_OPEN, kernel_rub,
	iterations=self.config["rubbing_open_iterations"]
	)

	# Step 3: 컨투어 검출
	contours, _ = cv2.findContours(dark_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	if not contours:
	return None

	# Step 4: 가장 큰 컨투어
	largest = max(contours, key=cv2.contourArea)
	area = cv2.contourArea(largest)

	# Step 5: 면적 검증
	min_area = (H_img * W_img) * self.config["rubbing_min_area_ratio"]
	if area < min_area:
	return None

	return cv2.boundingRect(largest)

	def _find_text_bbox(self, gray_image: np.ndarray) -> Tuple[int, int, int, int]:
	"""
	텍스트 영역 검출

	Args:
	gray_image (np.ndarray): 그레이스케일 이미지

	Returns:
	tuple: (x, y, w, h)
	"""
	H_img, W_img = gray_image.shape

	# Step 1: Otsu 이진화
	_, binary = cv2.threshold(
	gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
	)

	# Step 2: 모폴로지 연산
	kernel_morph = np.ones(self.config["morphology_kernel_size"], np.uint8)
	binary = cv2.morphologyEx(
	binary, cv2.MORPH_CLOSE, kernel_morph,
	iterations=self.config["morphology_close_iterations"]
	)
	binary = cv2.morphologyEx(
	binary, cv2.MORPH_OPEN, kernel_morph,
	iterations=self.config["morphology_open_iterations"]
	)

	# Step 3: 컨투어 검출
	contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Step 4: 최소 면적 설정
	min_area = max(
	self.config["text_min_area"],
	int((H_img * W_img) * self.config["text_area_ratio"])
	)

	# Step 5: 유효한 컨투어 필터링
	valid_contours = [
	cnt for cnt in contours
	if cv2.contourArea(cv2.boundingRect(cnt)) >= min_area
	]

	# Step 6: 경계박스 계산
	if valid_contours:
	all_points = np.vstack(valid_contours)
	return cv2.boundingRect(all_points)
	else:
	return (0, 0, W_img, H_img)

	def _apply_margin(
	self,
	bbox: Tuple[int, int, int, int],
	gray_image: np.ndarray,
	margin_val: int
	) -> Tuple[int, int, int, int]:
	"""여백 추가"""
	x, y, w, h = bbox
	H_img, W_img = gray_image.shape

	x_new = max(0, x - margin_val)
	y_new = max(0, y - margin_val)
	w_new = min(W_img - x_new, w + 2 * margin_val)
	h_new = min(H_img - y_new, h + 2 * margin_val)

	return (x_new, y_new, w_new, h_new)

	def _ensure_bright_background(
	self,
	gray_cropped: np.ndarray
	) -> Tuple[np.ndarray, Dict]:
	"""
	밝은배경 보장 (Swin용)

	Returns:
	tuple: (처리된 그레이 이미지, 처리 정보)
	"""
	mean_brightness = np.mean(gray_cropped)
	is_inverted = False

	if mean_brightness < self.config["brightness_threshold"]:
	gray_bright = cv2.bitwise_not(gray_cropped)
	is_inverted = True
	else:
	gray_bright = gray_cropped.copy()

	# 재확인
	final_brightness = np.mean(gray_bright)
	if final_brightness < self.config["brightness_threshold"]:
	gray_bright = cv2.bitwise_not(gray_bright)
	is_inverted = not is_inverted
	final_brightness = np.mean(gray_bright)

	return gray_bright, {
	"mean_brightness_before": float(mean_brightness),
	"mean_brightness_after": float(final_brightness),
	"is_inverted": is_inverted,
	"is_bright_bg": final_brightness >= self.config["brightness_threshold"]
	}

	def _ensure_white_background(
	self,
	gray_cropped: np.ndarray
	) -> Tuple[np.ndarray, Dict]:
	"""
	흰배경 보장 (OCR용)

	Returns:
	tuple: (처리된 이진 이미지, 처리 정보)
	"""
	# Step 1: 이진화
	_, binary = cv2.threshold(
	gray_cropped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
	)

	# Step 2: 폴라리티 판단
	mean_brightness = np.mean(binary)

	# Step 3: 필요시 반전
	if mean_brightness < self.config["brightness_threshold"]:
	binary_final = cv2.bitwise_not(binary)
	polarity = "inverted"
	else:
	binary_final = binary
	polarity = "normal"

	final_brightness = np.mean(binary_final)

	return binary_final, {
	"mean_brightness_before": float(mean_brightness),
	"mean_brightness_after": float(final_brightness),
	"polarity": polarity,
	"is_white_bg": final_brightness > self.config["brightness_threshold"]
	}

	def preprocess_unified(
	self,
	input_image_path: str,
	output_swin_path: str,
	output_ocr_path: str,
	margin: Optional[int] = None,
	use_rubbing: bool = False
	) -> Dict:
	"""
	통합 전처리 (Swin Gray + OCR 동시 생성)

	한 번의 함수 호출로 Swin Gray와 OCR용 이미지를 모두 생성합니다.
	탁본 및 텍스트 영역 검출은 1회만 수행되어 효율성을 보장합니다.

	Args:
	input_image_path (str): 입력 이미지 경로
	output_swin_path (str): Swin Gray 출력 경로 (JPG)
	output_ocr_path (str): OCR 출력 경로 (PNG)
	margin (int, optional): 크롭 여백 (픽셀)
	use_rubbing (bool): 탁본 검출 여부 (기본: False)

	Returns:
	dict: 처리 결과
	성공 시: {
	"success": True,
	"original_shape": (H, W, C),
	"bbox": (x, y, w, h),
	"region_type": "text" or "rubbing",
	"region_detected": bool,

	"swin": {
	"output_path": str,
	"output_shape": (H, W, 3),
	"is_bright_bg": bool,
	...
	},

	"ocr": {
	"output_path": str,
	"output_shape": (H, W),
	"is_white_bg": bool,
	...
	}
	}

	실패 시: {
	"success": False,
	"message": str
	}

	Processing Steps:
	1. 이미지 로드
	2. 그레이스케일 변환
	3. 영역 검출 (탁본 또는 텍스트, 1회만)
	4. 크롭 + 여백
	5. Swin Gray 처리 (밝은배경 보장)
	6. OCR 처리 (이진화 + 흰배경 보장)
	7. 동시 저장

	Output:
	- Swin: JPG 3채널 (비이진화 256단계)
	- OCR: PNG 1채널 (이진화)

	Example:
	>>> prep = UnifiedImagePreprocessor()
	>>> result = prep.preprocess_unified(
	... "input.jpg",
	... "swin.jpg",
	... "ocr.png"
	... )
	>>> if result["success"]:
	... swin_output = result["swin"]["output_path"]
	... ocr_output = result["ocr"]["output_path"]
	"""
	margin_val = margin or self.config["margin"]

	try:
	# ====================================================================
	# Step 1: 이미지 로드
	# ====================================================================
	img_bgr = cv2.imread(str(input_image_path), cv2.IMREAD_COLOR)
	if img_bgr is None:
	raise ValueError(f"이미지 로드 실패: {input_image_path}")

	original_shape = img_bgr.shape
	logger.info(f"[LOAD] 이미지 로드: {input_image_path} {original_shape}")

	# ====================================================================
	# Step 2: 그레이스케일 변환
	# ====================================================================
	gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

	# ====================================================================
	# Step 3: 영역 검출 (탁본 또는 텍스트)
	# ====================================================================
	if use_rubbing:
	detected_bbox = self._find_rubbing_bbox(gray)
	region_type = "rubbing"
	logger.info("[DETECT] 탁본 영역 검출 모드")
	else:
	detected_bbox = None
	region_type = "text"
	logger.info("[DETECT] 텍스트 영역 검출 모드")

	H_img, W_img = gray.shape

	# ====================================================================
	# Step 4: 크롭 + 여백
	# ====================================================================
	if detected_bbox is not None:
	bbox_final = self._apply_margin(detected_bbox, gray, margin_val)
	logger.info(f"[DETECT] {region_type} 영역 검출: {bbox_final}")
	else:
	# 탁본 미검출 또는 텍스트 모드 -> 텍스트 검출
	if use_rubbing:
	bbox_final = (0, 0, W_img, H_img)
	logger.warning("[DETECT] 탁본 미검출 - 전체 이미지 사용")
	else:
	bbox_text = self._find_text_bbox(gray)
	bbox_final = self._apply_margin(bbox_text, gray, margin_val)
	logger.info(f"[DETECT] 텍스트 영역 검출: {bbox_final}")

	x, y, w, h = bbox_final
	gray_cropped = gray[y:y+h, x:x+w]

	logger.info(f"[CROP] 크롭 완료: {gray_cropped.shape}")

	# ====================================================================
	# Step 5: Swin Gray 처리
	# ====================================================================
	gray_bright, info_swin = self._ensure_bright_background(gray_cropped)
	swin_output_3ch = cv2.cvtColor(gray_bright, cv2.COLOR_GRAY2BGR)

	# ====================================================================
	# Step 6: OCR 처리
	# ====================================================================
	binary_final, info_ocr = self._ensure_white_background(gray_cropped)

	# ====================================================================
	# Step 7: 동시 저장
	# ====================================================================
	output_swin_path_obj = Path(output_swin_path)
	output_swin_path_obj.parent.mkdir(parents=True, exist_ok=True)
	swin_success = cv2.imwrite(str(output_swin_path_obj), swin_output_3ch)

	output_ocr_path_obj = Path(output_ocr_path)
	output_ocr_path_obj.parent.mkdir(parents=True, exist_ok=True)
	ocr_success = cv2.imwrite(str(output_ocr_path_obj), binary_final)

	if not swin_success or not ocr_success:
	raise ValueError("이미지 저장 실패")

	logger.info(f"[SAVE] Swin 저장: {output_swin_path_obj}")
	logger.info(f"[SAVE] OCR 저장: {output_ocr_path_obj}")

	# ====================================================================
	# 결과 반환
	# ====================================================================
	return {
	"success": True,
	"version": "Unified Swin Gray + OCR (v1.0.0)",
	"original_shape": original_shape,
	"bbox": bbox_final,
	"region_type": region_type,
	"region_detected": detected_bbox is not None,

	# Swin 부분
	"swin": {
	"output_path": str(output_swin_path_obj).replace("\\", "/"),
	"output_shape": swin_output_3ch.shape,
	"color_type": "Grayscale 3채널 (B=G=R, 비이진화 256단계)",
	"is_inverted": info_swin["is_inverted"],
	"mean_brightness_before": info_swin["mean_brightness_before"],
	"mean_brightness_after": info_swin["mean_brightness_after"],
	"is_bright_bg": info_swin["is_bright_bg"]
	},

	# OCR 부분
	"ocr": {
	"output_path": str(output_ocr_path_obj).replace("\\", "/"),
	"output_shape": binary_final.shape,
	"polarity": info_ocr["polarity"],
	"mean_brightness_before": info_ocr["mean_brightness_before"],
	"mean_brightness_after": info_ocr["mean_brightness_after"],
	"is_white_bg": info_ocr["is_white_bg"]
	},

	"message": "[DONE] 통합 전처리 완료 (Swin + OCR)"
	}

	except Exception as e:
	logger.error(f"[ERROR] 통합 전처리 실패: {e}")
	return {
	"success": False,
	"message": str(e)
	}


	# ================================================================================
	# Global Instance & Convenience Functions
	# ================================================================================


	_global_preprocessor = None


	def get_preprocessor(config_path: Optional[str] = None) -> UnifiedImagePreprocessor:
	"""전역 전처리기 인스턴스 반환"""
	global _global_preprocessor
	if _global_preprocessor is None:
	_global_preprocessor = UnifiedImagePreprocessor(config_path)
	return _global_preprocessor


	def preprocess_image_unified(
	input_path: str,
	output_swin_path: str,
	output_ocr_path: str,
	margin: Optional[int] = None,
	use_rubbing: bool = False
	) -> Dict:
	"""
	편의 함수: 통합 전처리

	Args:
	input_path (str): 입력 이미지 경로
	output_swin_path (str): Swin 출력 경로
	output_ocr_path (str): OCR 출력 경로
	margin (int, optional): 여백
	use_rubbing (bool): 탁본 모드

	Returns:
	dict: 처리 결과
	"""
	prep = get_preprocessor()
	return prep.preprocess_unified(
	input_path,
	output_swin_path,
	output_ocr_path,
	margin,
	use_rubbing
	)


	# ================================================================================
	# Usage Example
	# ================================================================================


	if __name__ == "__main__":
	"""
	테스트 예시
	"""
	logger.info("=" * 80)
	logger.info("[TEST] Unified Image Preprocessor v1.0.0 - 테스트 시작")
	logger.info("=" * 80)

	try:
	prep = UnifiedImagePreprocessor()

	result = prep.preprocess_unified(
	"test_input.jpg",
	"test_swin.jpg",
	"test_ocr.png"
	)

	if result["success"]:
	logger.info("[TEST] 통합 전처리 성공!")
	logger.info(f"[TEST] Swin: {result['swin']['output_path']}")
	logger.info(f"[TEST] OCR: {result['ocr']['output_path']}")
	logger.info(f"[TEST] Swin 밝은배경: {'Yes' if result['swin']['is_bright_bg'] else 'No'}")
	logger.info(f"[TEST] OCR 흰배경: {'Yes' if result['ocr']['is_white_bg'] else 'No'}")
	else:
	logger.error(f"[TEST] 실패: {result['message']}")

	except Exception as e:
	logger.error(f"[TEST] 예외: {e}")

	logger.info("=" * 80)