Spaces:

triumphh77
/

Handwritten-Text-Recognition

Sleeping

App Files Files Community

Handwritten-Text-Recognition / src /utils /preprocessing.py

triumphh77

Upload 13 files

f9a156f verified 28 days ago

raw

history blame contribute delete

2.71 kB

	import cv2
	import numpy as np

	def preprocess_image(image_path_or_array, target_size=(1024, 32)):
	"""
	Preprocess the image for handwritten text recognition.
	1. Read image as grayscale
	2. Resize while maintaining aspect ratio (padding with white)
	3. Apply binarization / normalization
	"""
	if isinstance(image_path_or_array, str):
	img = cv2.imread(image_path_or_array, cv2.IMREAD_GRAYSCALE)
	if img is None:
	raise FileNotFoundError(f"Could not read image at {image_path_or_array}")
	else:
	if len(image_path_or_array.shape) == 3:
	img = cv2.cvtColor(image_path_or_array, cv2.COLOR_BGR2GRAY)
	else:
	img = image_path_or_array.copy()

	# Enhance contrast (CLAHE - Contrast Limited Adaptive Histogram Equalization)
	# We do NOT want to do this if the image is already aggressively thresholded/binarized
	# However, for smooth grayscale training images, CLAHE is great.
	# Let's keep it but recognize it might amplify noise if not careful.
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
	img = clahe.apply(img)

	# Resize keeping aspect ratio
	h, w = img.shape
	target_w, target_h = target_size

	# Calculate ratio
	ratio_w = target_w / w
	ratio_h = target_h / h
	ratio = min(ratio_w, ratio_h)

	new_w = int(w * ratio)
	new_h = int(h * ratio)

	# Check to prevent 0 width/height
	if new_w == 0 or new_h == 0:
	return np.ones((target_h, target_w), dtype=np.uint8) * 255

	img_resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)

	# Create target blank (white) image
	target_img = np.ones((target_h, target_w), dtype=np.uint8) * 255

	# Calculate padding to center it vertically, but align LEFT horizontally
	# (Aligning left is usually better for sequence models like CTC)
	pad_y = (target_h - new_h) // 2
	pad_x = 0 # Align left instead of center

	# Paste resized image into target
	target_img[pad_y:pad_y+new_h, pad_x:pad_x+new_w] = img_resized

	# Return as uint8 array without inverting, to match training behavior (white background)
	return target_img

	def deskew(img):
	"""
	Deskew the image using image moments.
	"""
	m = cv2.moments(img)
	if abs(m['mu02']) < 1e-2:
	return img.copy()

	skew = m['mu11'] / m['mu02']
	M = np.float32([[1, skew, -0.5 * img.shape[0] * skew], [0, 1, 0]])
	img_deskewed = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
	return img_deskewed

	if __name__ == "__main__":
	# Simple test
	print("Preprocessing module ready.")