Spaces:
Sleeping
Sleeping
File size: 4,490 Bytes
900df0b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """Tests for the pattern matching module."""
import numpy as np
import pytest
from unittest.mock import MagicMock, patch
@pytest.fixture
def sample_word_image() -> np.ndarray:
"""Create a synthetic word image for testing.
Returns:
40x100 grayscale image with dark text-like pattern.
"""
image = np.ones((40, 100), dtype=np.uint8) * 255
# Create a text-like pattern (dark horizontal bands)
image[5:15, 5:95] = 30
image[20:30, 10:90] = 30
return image
@pytest.fixture
def similar_word_image() -> np.ndarray:
"""Create a slightly different version of the word image.
Returns:
40x100 grayscale image similar to sample_word_image.
"""
image = np.ones((40, 100), dtype=np.uint8) * 255
# Similar but slightly different pattern
image[5:15, 5:95] = 35
image[20:30, 10:90] = 25
# Add some noise
noise = np.random.randint(-5, 5, (40, 100), dtype=np.int16)
image = np.clip(image.astype(np.int16) + noise, 0, 255).astype(np.uint8)
return image
@pytest.fixture
def different_word_image() -> np.ndarray:
"""Create a distinctly different word image.
Returns:
40x100 grayscale image with different pattern.
"""
image = np.ones((40, 100), dtype=np.uint8) * 255
# Very different pattern
image[5:35, 5:95] = 20 # Large block
return image
class TestTextSimilarity:
"""Tests for text similarity computation."""
def test_identical_text(self) -> None:
"""Test similarity of identical texts."""
from difflib import SequenceMatcher
sim = SequenceMatcher(None, "مرحبا", "مرحبا").ratio()
assert sim == 1.0
def test_similar_text(self) -> None:
"""Test similarity of similar texts."""
from difflib import SequenceMatcher
sim = SequenceMatcher(None, "مرحبا", "مرحبا ").ratio()
assert sim > 0.8
def test_different_text(self) -> None:
"""Test similarity of different texts."""
from difflib import SequenceMatcher
sim = SequenceMatcher(None, "مرحبا", "عالم").ratio()
assert sim < 1.0
def test_empty_text(self) -> None:
"""Test similarity with empty text."""
from difflib import SequenceMatcher
assert SequenceMatcher(None, "", "مرحبا").ratio() == 0.0
assert SequenceMatcher(None, "مرحبا", "").ratio() == 0.0
class TestSSIMComputation:
"""Tests for SSIM-like computation."""
def test_identical_images(self) -> None:
"""Test similarity of identical images."""
img = np.ones((50, 50), dtype=np.uint8) * 128
# Simple pixel-level comparison
diff = np.abs(img.astype(float) - img.astype(float))
score = 1.0 - (np.mean(diff) / 255.0)
assert abs(score - 1.0) < 0.01
def test_different_images(self) -> None:
"""Test similarity of different images."""
img1 = np.ones((50, 50), dtype=np.uint8) * 0
img2 = np.ones((50, 50), dtype=np.uint8) * 255
diff = np.abs(img1.astype(float) - img2.astype(float))
score = 1.0 - (np.mean(diff) / 255.0)
assert score < 0.5
class TestPatternMatching:
"""Tests for OCR pattern matching and correction."""
def test_word_image_size(self, sample_word_image: np.ndarray) -> None:
"""Test that sample word image has correct dimensions."""
assert sample_word_image.shape == (40, 100)
def test_similar_word_dimensions(
self,
sample_word_image: np.ndarray,
similar_word_image: np.ndarray,
) -> None:
"""Test that similar word image has same dimensions."""
assert sample_word_image.shape == similar_word_image.shape
def test_different_word_dimensions(
self,
sample_word_image: np.ndarray,
different_word_image: np.ndarray,
) -> None:
"""Test that different word image has same dimensions."""
assert sample_word_image.shape == different_word_image.shape
def test_image_variance(self, sample_word_image: np.ndarray) -> None:
"""Test that sample image has reasonable variance."""
var = np.var(sample_word_image)
assert var > 0 # Not uniform
def test_noise_addition(
self,
sample_word_image: np.ndarray,
similar_word_image: np.ndarray,
) -> None:
"""Test that noisy image is different from original."""
assert not np.array_equal(sample_word_image, similar_word_image)
|