File size: 5,515 Bytes
d5841ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""
Preprocessing functions to improve OCR accuracy
Includes various image enhancement techniques
"""
import cv2
import numpy as np


def convert_to_grayscale(img):
    """Convert image to grayscale"""
    if len(img.shape) == 3:
        return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img


def apply_thresholding(img, method='otsu'):
    """
    Apply thresholding to image
    
    Methods:
        - 'otsu': Otsu's automatic thresholding
        - 'adaptive': Adaptive thresholding
        - 'binary': Simple binary thresholding
    """
    gray = convert_to_grayscale(img)
    
    if method == 'otsu':
        # Otsu's thresholding - automatic threshold selection
        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    elif method == 'adaptive':
        # Adaptive thresholding - good for varying lighting
        thresh = cv2.adaptiveThreshold(
            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
            cv2.THRESH_BINARY, 11, 2
        )
    
    elif method == 'binary':
        # Simple binary thresholding
        _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    else:
        thresh = gray
    
    return thresh


def remove_noise(img, method='median'):
    """
    Remove noise from image
    
    Methods:
        - 'median': Median blur (good for salt-and-pepper noise)
        - 'gaussian': Gaussian blur (general smoothing)
        - 'bilateral': Bilateral filter (preserves edges)
    """
    if method == 'median':
        return cv2.medianBlur(img, 3)
    
    elif method == 'gaussian':
        return cv2.GaussianBlur(img, (5, 5), 0)
    
    elif method == 'bilateral':
        return cv2.bilateralFilter(img, 9, 75, 75)
    
    return img


def dilate_text(img, kernel_size=(1, 1)):
    """Dilate text to make it thicker"""
    kernel = np.ones(kernel_size, np.uint8)
    return cv2.dilate(img, kernel, iterations=1)


def erode_text(img, kernel_size=(1, 1)):
    """Erode text to make it thinner"""
    kernel = np.ones(kernel_size, np.uint8)
    return cv2.erode(img, kernel, iterations=1)


def invert_image(img):
    """Invert image colors (useful if text is white on black)"""
    return cv2.bitwise_not(img)


def enhance_contrast(img):
    """Enhance image contrast using CLAHE"""
    gray = convert_to_grayscale(img)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe.apply(gray)


def resize_image(img, scale=2.0):
    """
    Resize image for better OCR
    Larger images often work better with Tesseract
    """
    height, width = img.shape[:2]
    new_width = int(width * scale)
    new_height = int(height * scale)
    return cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)


def add_border(img, border_size=10, color=255):
    """Add white border around image"""
    return cv2.copyMakeBorder(
        img, border_size, border_size, border_size, border_size,
        cv2.BORDER_CONSTANT, value=color
    )


def preprocess_pipeline(img, config='default'):
    """
    Complete preprocessing pipeline
    
    Configs:
        - 'default': Standard preprocessing
        - 'aggressive': More aggressive preprocessing
        - 'light': Light preprocessing
        - 'custom': Custom pipeline
    """
    if config == 'default':
        # Standard pipeline
        processed = convert_to_grayscale(img)
        processed = remove_noise(processed, 'median')
        processed = apply_thresholding(processed, 'otsu')
        processed = add_border(processed, 10)
        
    elif config == 'aggressive':
        # Aggressive preprocessing
        processed = convert_to_grayscale(img)
        processed = enhance_contrast(processed)
        processed = remove_noise(processed, 'bilateral')
        processed = apply_thresholding(processed, 'adaptive')
        processed = dilate_text(processed, (2, 2))
        processed = add_border(processed, 15)
        
    elif config == 'light':
        # Light preprocessing
        processed = convert_to_grayscale(img)
        processed = apply_thresholding(processed, 'otsu')
        
    elif config == 'upscale':
        # Upscale and process
        processed = resize_image(img, scale=3.0)
        processed = convert_to_grayscale(processed)
        processed = remove_noise(processed, 'median')
        processed = apply_thresholding(processed, 'otsu')
        processed = add_border(processed, 20)
    
    else:
        # No preprocessing
        processed = img
    
    return processed


def preprocess_for_ocr(img, show_steps=False):
    """
    Optimized preprocessing for OCR
    Returns preprocessed image ready for Tesseract
    """
    steps = {}
    
    # Step 1: Convert to grayscale
    gray = convert_to_grayscale(img)
    if show_steps:
        steps['1_grayscale'] = gray.copy()
    
    # Step 2: Upscale image (Tesseract works better with larger images)
    upscaled = resize_image(gray, scale=2.5)
    if show_steps:
        steps['2_upscaled'] = upscaled.copy()
    
    # Step 3: Remove noise
    denoised = remove_noise(upscaled, 'bilateral')
    if show_steps:
        steps['3_denoised'] = denoised.copy()
    
    # Step 4: Apply thresholding
    thresh = apply_thresholding(denoised, 'otsu')
    if show_steps:
        steps['4_threshold'] = thresh.copy()
    
    # Step 5: Add border
    bordered = add_border(thresh, 20)
    if show_steps:
        steps['5_bordered'] = bordered.copy()
    
    if show_steps:
        return bordered, steps
    
    return bordered