File size: 3,049 Bytes
b12284c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""Image preprocessing for OCR accuracy.



Applies grayscale, deskew, thresholding, and noise reduction to

scanned page images before feeding them to Tesseract.

"""

from __future__ import annotations

import logging

import cv2
import numpy as np
from PIL import Image

logger = logging.getLogger(__name__)


def pil_to_cv(image: Image.Image) -> np.ndarray:
    """Convert PIL Image to OpenCV BGR array."""
    rgb = np.array(image.convert("RGB"))
    return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)


def cv_to_pil(img: np.ndarray) -> Image.Image:
    """Convert OpenCV BGR array to PIL Image."""
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return Image.fromarray(rgb)


def to_grayscale(img: np.ndarray) -> np.ndarray:
    """Convert to grayscale if not already."""
    if len(img.shape) == 3:
        return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img


def deskew(gray: np.ndarray, max_angle: float = 10.0) -> np.ndarray:
    """Correct slight rotation using projection profile.



    Only corrects rotations up to `max_angle` degrees to avoid

    false positives on non-skewed images.

    """
    coords = np.column_stack(np.where(gray < 128))
    if coords.shape[0] < 100:
        return gray  # not enough ink to detect angle

    angle = cv2.minAreaRect(coords)[-1]
    # OpenCV returns angles in [-90, 0); normalize
    if angle < -45:
        angle = 90 + angle
    elif angle > 45:
        angle = angle - 90

    if abs(angle) > max_angle or abs(angle) < 0.2:
        return gray  # skip if angle too large or negligible

    h, w = gray.shape[:2]
    center = (w // 2, h // 2)
    mat = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(
        gray, mat, (w, h),
        flags=cv2.INTER_CUBIC,
        borderMode=cv2.BORDER_REPLICATE,
    )
    logger.debug("Deskewed by %.2f°", angle)
    return rotated


def adaptive_threshold(gray: np.ndarray) -> np.ndarray:
    """Apply adaptive Gaussian thresholding for mixed lighting."""
    return cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        blockSize=31,
        C=11,
    )


def denoise(gray: np.ndarray) -> np.ndarray:
    """Light denoising that preserves text edges."""
    return cv2.fastNlMeansDenoising(gray, h=10)


def preprocess_for_ocr(image: Image.Image) -> Image.Image:
    """Full preprocessing pipeline: grayscale → deskew → denoise → threshold.



    Returns a cleaned PIL Image ready for pytesseract.

    """
    img = pil_to_cv(image)
    gray = to_grayscale(img)
    gray = deskew(gray)
    gray = denoise(gray)
    gray = adaptive_threshold(gray)
    return Image.fromarray(gray)


def is_mostly_blank(image: Image.Image, threshold: float = 0.98) -> bool:
    """Return True if the image is almost entirely white (blank page)."""
    gray = np.array(image.convert("L"))
    white_ratio = np.sum(gray > 240) / gray.size
    return white_ratio > threshold