Score_To_MML / core /preprocess.py
Coconuttttt's picture
Initial deployment: Score to MML converter
daa0bdd
"""
core/preprocess.py
์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ๊ณ„์ธต.
์—ญํ• :
- ํŒŒ์ผ ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ (๊ฒฝ๋กœ, ํ™•์žฅ์ž, ์ฝ๊ธฐ ๊ฐ€๋Šฅ ์—ฌ๋ถ€)
- OpenCV ๊ธฐ๋ฐ˜ ์ด๋ฏธ์ง€ ํ’ˆ์งˆ ๊ฐœ์„  (Audiveris OMR ์ •ํ™•๋„ ํ–ฅ์ƒ ๋ชฉ์ )
์ „์ฒ˜๋ฆฌ ํŒŒ์ดํ”„๋ผ์ธ (apply_preprocessing):
1. ์ด๋ฏธ์ง€ ๋กœ๋“œ ๋ฐ ๊ฒ€์ฆ (cv2.imread)
2. Grayscale ๋ณ€ํ™˜
3. ๊ฐ€๋ฒผ์šด ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ (GaussianBlur 3x3)
4. ์ด์ง„ํ™” (Otsu ๋˜๋Š” Adaptive Threshold)
5. [์„ ํƒ์ ] Deskew โ€” ๊ธฐ์šธ๊ธฐ ๋ณด์ • (์‹คํ—˜์ , deskew_enabled=True ์‹œ)
์ฃผ์˜:
- apply_preprocessing์€ opencv-python ํ•„์š” (pip install opencv-python)
- opencv ๋ฏธ์„ค์น˜ ์‹œ PreprocessError ๋ฐœ์ƒ โ†’ pipeline์—์„œ fallback ์ฒ˜๋ฆฌ
- mock ๋ชจ๋“œ์—์„œ๋Š” pipeline์ด ์ด ํ•จ์ˆ˜๋ฅผ ํ˜ธ์ถœํ•˜์ง€ ์•Š์Œ
ํ•œ๊ณ„:
- deskew๋Š” ยฑ10๋„ ๋ฒ”์œ„์—์„œ๋งŒ ๋™์ž‘. ๊ณผ๋„ํ•˜๊ฒŒ ๊ธฐ์šธ์–ด์ง„ ์ด๋ฏธ์ง€๋Š” ๋ณด์ • ๋ถˆ๊ฐ€
- ์†๊ธ€์”จ, ๊ทธ๋ฆผ์ž, ์ €ํ•ด์ƒ๋„ ์ด๋ฏธ์ง€๋Š” ์ด์ง„ํ™” ํ›„ ์˜คํžˆ๋ ค ํ’ˆ์งˆ ์ €ํ•˜ ๊ฐ€๋Šฅ
- ์—ฐํ•„ ํ•„๊ธฐ ์•…๋ณด๋Š” Adaptive Threshold๊ฐ€ ๋” ์ ํ•ฉํ•  ์ˆ˜ ์žˆ์Œ
"""
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .models import ConvertOptions
SUPPORTED_EXTENSIONS = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".pdf"}
class PreprocessError(Exception):
"""์ „์ฒ˜๋ฆฌ ๋‹จ๊ณ„ ์˜ค๋ฅ˜."""
pass
# ---------------------------------------------------------------------------
# ํŒŒ์ผ ๊ฒ€์ฆ (opencv ๋ถˆํ•„์š”, ํ•ญ์ƒ ๋™์ž‘)
# ---------------------------------------------------------------------------
def validate_image_path(input_path: str) -> Path:
"""
์ž…๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ๊ฒ€์ฆํ•˜๊ณ  Path ๊ฐ์ฒด๋ฅผ ๋ฐ˜ํ™˜.
Raises:
PreprocessError: ํŒŒ์ผ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ธ ๊ฒฝ์šฐ
"""
path = Path(input_path)
if not path.exists():
raise PreprocessError(f"ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {input_path}")
if not path.is_file():
raise PreprocessError(f"ํŒŒ์ผ์ด ์•„๋‹™๋‹ˆ๋‹ค: {input_path}")
ext = path.suffix.lower()
if ext not in SUPPORTED_EXTENSIONS:
raise PreprocessError(
f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {ext}. "
f"์ง€์› ํ˜•์‹: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
)
return path
def get_file_info(path: Path) -> dict:
"""ํŒŒ์ผ ๊ธฐ๋ณธ ์ •๋ณด๋ฅผ ๋ฐ˜ํ™˜."""
stat = path.stat()
return {
"filename": path.name,
"extension": path.suffix.lower(),
"size_bytes": stat.st_size,
"absolute_path": str(path.resolve()),
}
def preprocess_image(input_path: str) -> dict:
"""
ํŒŒ์ผ ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ๋งŒ ์ˆ˜ํ–‰. ์ „์ฒ˜๋ฆฌ ์—†์ด ์›๋ณธ ๊ฒฝ๋กœ๋ฅผ ๋ฐ˜ํ™˜.
pipeline์—์„œ ๋จผ์ € ํ˜ธ์ถœํ•˜์—ฌ ํŒŒ์ผ์„ ๊ฒ€์ฆํ•œ ๋’ค,
Audiveris ๋ชจ๋“œ๋ผ๋ฉด apply_preprocessing()์„ ์ถ”๊ฐ€ ํ˜ธ์ถœํ•œ๋‹ค.
"""
path = validate_image_path(input_path)
info = get_file_info(path)
info["preprocessed_path"] = str(path.resolve())
info["preprocessing_applied"] = []
return info
# ---------------------------------------------------------------------------
# OpenCV ์ „์ฒ˜๋ฆฌ (opencv-python ํ•„์š”)
# ---------------------------------------------------------------------------
def apply_preprocessing(
input_path: str,
output_path: str,
options: ConvertOptions,
debug_dir: str = "",
) -> dict:
"""
OpenCV ๊ธฐ๋ฐ˜ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ๋ฅผ ์ ์šฉํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ output_path์— ์ €์žฅ.
Args:
input_path: ์›๋ณธ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ
output_path: ์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ ์ €์žฅ ๊ฒฝ๋กœ (.png ๊ถŒ์žฅ)
options: ConvertOptions (binarize_method, deskew_enabled ์‚ฌ์šฉ)
Returns:
dict: ์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ ์ •๋ณด
- applied: True
- steps: ์ ์šฉ๋œ ๋‹จ๊ณ„ ๋ชฉ๋ก
- input_size: (w, h)
- output_path: ๊ฒฐ๊ณผ ํŒŒ์ผ ๊ฒฝ๋กœ
Raises:
PreprocessError: opencv ๋ฏธ์„ค์น˜, ์ด๋ฏธ์ง€ ์ฝ๊ธฐ ์‹คํŒจ, ์ €์žฅ ์‹คํŒจ ์‹œ
"""
try:
import cv2
import numpy as np
except ImportError:
raise PreprocessError(
"opencv-python์ด ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
"์„ค์น˜ ๋ช…๋ น: pip install opencv-python"
)
img = cv2.imread(input_path, cv2.IMREAD_COLOR)
if img is None:
raise PreprocessError(
f"์ด๋ฏธ์ง€๋ฅผ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {input_path}\n"
f"ํŒŒ์ผ์ด ์†์ƒ๋˜์—ˆ๊ฑฐ๋‚˜ ์ง€์›ํ•˜์ง€ ์•Š๋Š” ํ˜•์‹์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
)
h, w = img.shape[:2]
applied: list[str] = []
def _save_step(name: str, img_data) -> None:
if debug_dir:
cv2.imwrite(str(Path(debug_dir) / name), img_data)
# 1. Grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
applied.append("grayscale")
_save_step("step_01_grayscale.png", gray)
# 2. ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ (๊ฐ€๋ฒผ์šด Gaussian blur, ์„ ํƒ์ )
if options.blur_enabled:
denoised = cv2.GaussianBlur(gray, (3, 3), 0)
applied.append("gaussian_blur_3x3")
_save_step("step_02_gaussian_blur.png", denoised)
else:
denoised = gray
# 3. ์ด์ง„ํ™” (์„ ํƒ์  โ€” ๊ธฐ๋ณธ off, Audiveris ์ž์ฒด ์ด์ง„ํ™” ์‹ ๋ขฐ)
processed = denoised
if options.binarize_enabled:
processed = _binarize(denoised, options.binarize_method)
applied.append(f"binarize:{options.binarize_method}")
_save_step(f"step_03_binarized_{options.binarize_method}.png", processed)
# 4. Deskew (์„ ํƒ์ )
if options.deskew_enabled:
processed, angle = _deskew(processed)
if abs(angle) > 0.01:
applied.append(f"deskew:{angle:.2f}deg")
_save_step(f"step_04_deskew_{angle:.2f}deg.png", processed)
else:
applied.append("deskew:skipped(angle<0.5)")
# ์ €์žฅ
ok = cv2.imwrite(output_path, processed)
if not ok:
raise PreprocessError(
f"์ „์ฒ˜๋ฆฌ ์ด๋ฏธ์ง€ ์ €์žฅ ์‹คํŒจ: {output_path}\n"
f"์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”."
)
return {
"applied": True,
"steps": applied,
"input_path": input_path,
"output_path": output_path,
"input_size": (w, h),
}
def _binarize(gray_img, method: str):
"""
Grayscale ์ด๋ฏธ์ง€๋ฅผ ์ด์ง„ํ™”.
method:
"otsu" : ์ „์—ญ Otsu ์ž„๊ณ„๊ฐ’. ๋ช…์•” ๋Œ€๋น„๊ฐ€ ๋ถ„๋ช…ํ•œ ์Šค์บ” ์•…๋ณด์— ์ ํ•ฉ.
"adaptive" : ์ง€์—ญ Adaptive Threshold. ์กฐ๋ช… ๋ถˆ๊ท ์ผ / ์—ฐํ•„ ํ•„๊ธฐ ์•…๋ณด์— ์ ํ•ฉ.
"""
try:
import cv2
except ImportError:
raise PreprocessError("opencv-python์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
if method == "adaptive":
return cv2.adaptiveThreshold(
gray_img, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=15, C=8,
)
# default: otsu
_, binary = cv2.threshold(
gray_img, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU,
)
return binary
def _deskew(binary_img):
"""
์ด์ง„ํ™”๋œ ์ด๋ฏธ์ง€์˜ ๊ธฐ์šธ๊ธฐ๋ฅผ ๋ณด์ •.
Returns:
(corrected_img, angle_degrees)
ํ•œ๊ณ„:
- ยฑ10๋„ ์ด๋‚ด์˜ ๊ธฐ์šธ๊ธฐ๋งŒ ๋ณด์ •. ๋ฒ”์œ„ ์ดˆ๊ณผ ์‹œ ์›๋ณธ ๋ฐ˜ํ™˜.
- ์•…๋ณด ์ „์ฒด๊ฐ€ ๊ธฐ์šธ์–ด์ง„ ๊ฒฝ์šฐ์—๋งŒ ์œ ํšจ. ๊ฐœ๋ณ„ ๋ณดํ‘œ ๊ธฐ์šธ๊ธฐ๋Š” ๋ฏธ์ฒ˜๋ฆฌ.
- ์–ด๋‘์šด ํ”ฝ์…€์ด ๋„ˆ๋ฌด ์ ์œผ๋ฉด ๊ฐ๋„ ์ถ”์ • ๋ถˆ๊ฐ€ โ†’ ์›๋ณธ ๋ฐ˜ํ™˜.
"""
try:
import cv2
import numpy as np
except ImportError:
return binary_img, 0.0
# ์–ด๋‘์šด ํ”ฝ์…€ ์ขŒํ‘œ ์ถ”์ถœ (์ด์ง„ํ™”๋œ ์ด๋ฏธ์ง€: ์•…๋ณด ์„ /์Œํ‘œ = 0)
dark_coords = np.column_stack(np.where(binary_img < 128))
if len(dark_coords) < 200:
return binary_img, 0.0
# (row, col) โ†’ (x=col, y=row) ๋ณ€ํ™˜ ํ›„ minAreaRect
points = dark_coords[:, ::-1].astype(np.float32)
rect = cv2.minAreaRect(points)
angle = rect[2] # range: (-90, 0]
# (-90, -45] โ†’ ์„ธ๋กœ ๋ฐฉํ–ฅ ๋ฐ•์Šค โ†’ +90 ๋ณด์ •
if angle < -45:
angle = 90.0 + angle
# ์ด์ œ angle โˆˆ (-45, 45)
# ๋„ˆ๋ฌด ํฌ๋ฉด ๋ณด์ • ๋ถˆ๊ฐ€ (์˜†์œผ๋กœ ์ฐํžŒ ์ด๋ฏธ์ง€ ๋“ฑ)
if abs(angle) > 10.0:
return binary_img, 0.0
# ๋„ˆ๋ฌด ์ž‘์œผ๋ฉด ์˜๋ฏธ ์—†์Œ
if abs(angle) < 0.5:
return binary_img, 0.0
h, w = binary_img.shape
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
corrected = cv2.warpAffine(
binary_img, M, (w, h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE,
)
return corrected, angle