"""Tests for image preprocessing module.""" import numpy as np import pytest from modules.vision.image_preprocessor import ImagePreprocessor @pytest.fixture def sample_image() -> np.ndarray: """Create a synthetic test image (white background with black text). Returns: 200x400 BGR image with synthetic text-like content. """ image = np.ones((200, 400, 3), dtype=np.uint8) * 255 # Draw some "text" as dark rectangles image[50:60, 50:150] = 0 # Horizontal line of "text" image[80:90, 50:200] = 0 # Longer line image[110:120, 50:180] = 0 # Medium line image[140:150, 50:160] = 0 # Shorter line # Add some noise noise = np.random.randint(0, 10, (200, 400, 3), dtype=np.uint8) image = np.clip(image.astype(np.int16) - noise.astype(np.int16), 0, 255).astype(np.uint8) return image @pytest.fixture def noisy_image() -> np.ndarray: """Create a noisy test image. Returns: 200x400 BGR image with Gaussian noise. """ image = np.ones((200, 400, 3), dtype=np.uint8) * 255 image[70:130, 50:350] = 0 # Dark band # Add heavy noise noise = np.random.normal(0, 25, (200, 400, 3)).astype(np.int16) image = np.clip(image.astype(np.int16) + noise, 0, 255).astype(np.uint8) return image @pytest.fixture def preprocessor() -> ImagePreprocessor: """Create a preprocessor with all steps enabled.""" return ImagePreprocessor( apply_clahe=True, apply_denoise=True, apply_deskew=True, apply_binarize=True, denoise_strength=10, ) class TestPreprocessorInit: """Tests for preprocessor initialization.""" def test_default_config(self) -> None: """Test default preprocessor configuration.""" pp = ImagePreprocessor() assert pp.apply_clahe is True assert pp.apply_denoise is True assert pp.apply_deskew is True assert pp.apply_binarize is True assert pp.denoise_strength == 10 def test_custom_config(self) -> None: """Test custom preprocessor configuration.""" pp = ImagePreprocessor( apply_clahe=False, apply_denoise=False, clahe_clip_limit=3.0, denoise_strength=15, ) assert pp.apply_clahe is False assert pp.apply_denoise is False assert pp.clahe_clip_limit == 3.0 assert pp.denoise_strength == 15 def test_odd_window_sizes(self) -> None: """Test that window sizes are forced to odd values.""" pp = ImagePreprocessor( denoise_template_window=6, # Even number denoise_search_window=20, # Even number ) assert pp.denoise_template_window % 2 == 1 assert pp.denoise_search_window % 2 == 1 class TestGrayscaleConversion: """Tests for grayscale conversion.""" def test_bgr_to_grayscale(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test converting a BGR image to grayscale.""" gray = preprocessor._to_grayscale(sample_image) assert gray.ndim == 2 assert gray.shape == (200, 400) def test_already_grayscale(self, preprocessor: ImagePreprocessor) -> None: """Test that grayscale input is returned unchanged.""" gray = np.ones((100, 100), dtype=np.uint8) * 128 result = preprocessor._to_grayscale(gray) assert result.shape == (100, 100) class TestDenoising: """Tests for Gaussian blur denoising.""" def test_denoise_reduces_noise(self, preprocessor: ImagePreprocessor, noisy_image: np.ndarray) -> None: """Test that denoising reduces noise variance.""" gray = preprocessor._to_grayscale(noisy_image) denoised = preprocessor._apply_denoise(gray) # Denoised should have less variance in flat areas original_var = np.var(noisy_image[0:50, 0:50].astype(np.float64)) denoised_var = np.var(denoised[0:50, 0:50].astype(np.float64)) assert denoised_var <= original_var def test_denoise_preserves_shape(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test that denoising preserves image shape.""" gray = preprocessor._to_grayscale(sample_image) result = preprocessor._apply_denoise(gray) assert result.shape == gray.shape class TestBinarization: """Tests for Otsu threshold binarization.""" def test_binarize_output(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test that binarization produces binary output.""" gray = preprocessor._to_grayscale(sample_image) binary = preprocessor._apply_otsu(gray) # Check that output is single channel assert binary.ndim == 2 # Check that values are binary (0 or 255) unique_values = set(np.unique(binary)) assert unique_values.issubset({0, 255}) def test_binarize_preserves_shape(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test that binarization preserves shape.""" gray = preprocessor._to_grayscale(sample_image) binary = preprocessor._apply_otsu(gray) assert binary.shape == gray.shape class TestCLAHE: """Tests for CLAHE contrast enhancement.""" def test_clahe_output(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test that CLAHE produces valid output.""" gray = preprocessor._to_grayscale(sample_image) enhanced = preprocessor._apply_clahe(gray) assert enhanced.shape == gray.shape assert enhanced.dtype == np.uint8 class TestDeskew: """Tests for deskew detection and correction.""" def test_deskew_preserves_shape(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test that deskew preserves image shape.""" gray = preprocessor._to_grayscale(sample_image) result = preprocessor._apply_deskew(gray) assert result is not None assert result.shape == gray.shape class TestFullPipeline: """Tests for the full preprocessing pipeline.""" def test_process_all_enabled(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test full pipeline with all steps enabled.""" result = preprocessor.preprocess(sample_image) assert result is not None def test_process_no_steps(self, sample_image: np.ndarray) -> None: """Test pipeline with all steps disabled.""" pp = ImagePreprocessor( apply_clahe=False, apply_denoise=False, apply_deskew=False, apply_binarize=False, ) result = pp.preprocess(sample_image, return_numpy=True) np.testing.assert_array_equal(result, ImagePreprocessor._to_numpy(sample_image)) def test_process_return_numpy(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test pipeline returning numpy array.""" result = preprocessor.preprocess(sample_image, return_numpy=True) assert isinstance(result, np.ndarray) def test_process_return_pil(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test pipeline returning PIL image.""" result = preprocessor.preprocess(sample_image, return_numpy=False) assert hasattr(result, "mode") class TestSmartSegment: """Tests for word segmentation.""" def test_segment_returns_list(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test that segmentation returns a list.""" result = preprocessor.smart_segment(sample_image) assert isinstance(result, list) def test_get_bounding_boxes(self, preprocessor: ImagePreprocessor, sample_image: np.ndarray) -> None: """Test bounding box extraction.""" result = preprocessor.get_word_bounding_boxes(sample_image) assert isinstance(result, list)