text_detection / preprocessing.py
AlBaraa63's picture
Simple clean UI version
d5841ad
"""
Preprocessing functions to improve OCR accuracy
Includes various image enhancement techniques
"""
import cv2
import numpy as np
def convert_to_grayscale(img):
"""Convert image to grayscale"""
if len(img.shape) == 3:
return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img
def apply_thresholding(img, method='otsu'):
"""
Apply thresholding to image
Methods:
- 'otsu': Otsu's automatic thresholding
- 'adaptive': Adaptive thresholding
- 'binary': Simple binary thresholding
"""
gray = convert_to_grayscale(img)
if method == 'otsu':
# Otsu's thresholding - automatic threshold selection
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
elif method == 'adaptive':
# Adaptive thresholding - good for varying lighting
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
elif method == 'binary':
# Simple binary thresholding
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
else:
thresh = gray
return thresh
def remove_noise(img, method='median'):
"""
Remove noise from image
Methods:
- 'median': Median blur (good for salt-and-pepper noise)
- 'gaussian': Gaussian blur (general smoothing)
- 'bilateral': Bilateral filter (preserves edges)
"""
if method == 'median':
return cv2.medianBlur(img, 3)
elif method == 'gaussian':
return cv2.GaussianBlur(img, (5, 5), 0)
elif method == 'bilateral':
return cv2.bilateralFilter(img, 9, 75, 75)
return img
def dilate_text(img, kernel_size=(1, 1)):
"""Dilate text to make it thicker"""
kernel = np.ones(kernel_size, np.uint8)
return cv2.dilate(img, kernel, iterations=1)
def erode_text(img, kernel_size=(1, 1)):
"""Erode text to make it thinner"""
kernel = np.ones(kernel_size, np.uint8)
return cv2.erode(img, kernel, iterations=1)
def invert_image(img):
"""Invert image colors (useful if text is white on black)"""
return cv2.bitwise_not(img)
def enhance_contrast(img):
"""Enhance image contrast using CLAHE"""
gray = convert_to_grayscale(img)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
return clahe.apply(gray)
def resize_image(img, scale=2.0):
"""
Resize image for better OCR
Larger images often work better with Tesseract
"""
height, width = img.shape[:2]
new_width = int(width * scale)
new_height = int(height * scale)
return cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
def add_border(img, border_size=10, color=255):
"""Add white border around image"""
return cv2.copyMakeBorder(
img, border_size, border_size, border_size, border_size,
cv2.BORDER_CONSTANT, value=color
)
def preprocess_pipeline(img, config='default'):
"""
Complete preprocessing pipeline
Configs:
- 'default': Standard preprocessing
- 'aggressive': More aggressive preprocessing
- 'light': Light preprocessing
- 'custom': Custom pipeline
"""
if config == 'default':
# Standard pipeline
processed = convert_to_grayscale(img)
processed = remove_noise(processed, 'median')
processed = apply_thresholding(processed, 'otsu')
processed = add_border(processed, 10)
elif config == 'aggressive':
# Aggressive preprocessing
processed = convert_to_grayscale(img)
processed = enhance_contrast(processed)
processed = remove_noise(processed, 'bilateral')
processed = apply_thresholding(processed, 'adaptive')
processed = dilate_text(processed, (2, 2))
processed = add_border(processed, 15)
elif config == 'light':
# Light preprocessing
processed = convert_to_grayscale(img)
processed = apply_thresholding(processed, 'otsu')
elif config == 'upscale':
# Upscale and process
processed = resize_image(img, scale=3.0)
processed = convert_to_grayscale(processed)
processed = remove_noise(processed, 'median')
processed = apply_thresholding(processed, 'otsu')
processed = add_border(processed, 20)
else:
# No preprocessing
processed = img
return processed
def preprocess_for_ocr(img, show_steps=False):
"""
Optimized preprocessing for OCR
Returns preprocessed image ready for Tesseract
"""
steps = {}
# Step 1: Convert to grayscale
gray = convert_to_grayscale(img)
if show_steps:
steps['1_grayscale'] = gray.copy()
# Step 2: Upscale image (Tesseract works better with larger images)
upscaled = resize_image(gray, scale=2.5)
if show_steps:
steps['2_upscaled'] = upscaled.copy()
# Step 3: Remove noise
denoised = remove_noise(upscaled, 'bilateral')
if show_steps:
steps['3_denoised'] = denoised.copy()
# Step 4: Apply thresholding
thresh = apply_thresholding(denoised, 'otsu')
if show_steps:
steps['4_threshold'] = thresh.copy()
# Step 5: Add border
bordered = add_border(thresh, 20)
if show_steps:
steps['5_bordered'] = bordered.copy()
if show_steps:
return bordered, steps
return bordered