File size: 4,490 Bytes
900df0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""Tests for the pattern matching module."""

import numpy as np
import pytest
from unittest.mock import MagicMock, patch


@pytest.fixture
def sample_word_image() -> np.ndarray:
    """Create a synthetic word image for testing.

    Returns:
        40x100 grayscale image with dark text-like pattern.
    """
    image = np.ones((40, 100), dtype=np.uint8) * 255
    # Create a text-like pattern (dark horizontal bands)
    image[5:15, 5:95] = 30
    image[20:30, 10:90] = 30
    return image


@pytest.fixture
def similar_word_image() -> np.ndarray:
    """Create a slightly different version of the word image.

    Returns:
        40x100 grayscale image similar to sample_word_image.
    """
    image = np.ones((40, 100), dtype=np.uint8) * 255
    # Similar but slightly different pattern
    image[5:15, 5:95] = 35
    image[20:30, 10:90] = 25
    # Add some noise
    noise = np.random.randint(-5, 5, (40, 100), dtype=np.int16)
    image = np.clip(image.astype(np.int16) + noise, 0, 255).astype(np.uint8)
    return image


@pytest.fixture
def different_word_image() -> np.ndarray:
    """Create a distinctly different word image.

    Returns:
        40x100 grayscale image with different pattern.
    """
    image = np.ones((40, 100), dtype=np.uint8) * 255
    # Very different pattern
    image[5:35, 5:95] = 20  # Large block
    return image


class TestTextSimilarity:
    """Tests for text similarity computation."""

    def test_identical_text(self) -> None:
        """Test similarity of identical texts."""
        from difflib import SequenceMatcher
        sim = SequenceMatcher(None, "مرحبا", "مرحبا").ratio()
        assert sim == 1.0

    def test_similar_text(self) -> None:
        """Test similarity of similar texts."""
        from difflib import SequenceMatcher
        sim = SequenceMatcher(None, "مرحبا", "مرحبا ").ratio()
        assert sim > 0.8

    def test_different_text(self) -> None:
        """Test similarity of different texts."""
        from difflib import SequenceMatcher
        sim = SequenceMatcher(None, "مرحبا", "عالم").ratio()
        assert sim < 1.0

    def test_empty_text(self) -> None:
        """Test similarity with empty text."""
        from difflib import SequenceMatcher
        assert SequenceMatcher(None, "", "مرحبا").ratio() == 0.0
        assert SequenceMatcher(None, "مرحبا", "").ratio() == 0.0


class TestSSIMComputation:
    """Tests for SSIM-like computation."""

    def test_identical_images(self) -> None:
        """Test similarity of identical images."""
        img = np.ones((50, 50), dtype=np.uint8) * 128
        # Simple pixel-level comparison
        diff = np.abs(img.astype(float) - img.astype(float))
        score = 1.0 - (np.mean(diff) / 255.0)
        assert abs(score - 1.0) < 0.01

    def test_different_images(self) -> None:
        """Test similarity of different images."""
        img1 = np.ones((50, 50), dtype=np.uint8) * 0
        img2 = np.ones((50, 50), dtype=np.uint8) * 255
        diff = np.abs(img1.astype(float) - img2.astype(float))
        score = 1.0 - (np.mean(diff) / 255.0)
        assert score < 0.5


class TestPatternMatching:
    """Tests for OCR pattern matching and correction."""

    def test_word_image_size(self, sample_word_image: np.ndarray) -> None:
        """Test that sample word image has correct dimensions."""
        assert sample_word_image.shape == (40, 100)

    def test_similar_word_dimensions(
        self,
        sample_word_image: np.ndarray,
        similar_word_image: np.ndarray,
    ) -> None:
        """Test that similar word image has same dimensions."""
        assert sample_word_image.shape == similar_word_image.shape

    def test_different_word_dimensions(
        self,
        sample_word_image: np.ndarray,
        different_word_image: np.ndarray,
    ) -> None:
        """Test that different word image has same dimensions."""
        assert sample_word_image.shape == different_word_image.shape

    def test_image_variance(self, sample_word_image: np.ndarray) -> None:
        """Test that sample image has reasonable variance."""
        var = np.var(sample_word_image)
        assert var > 0  # Not uniform

    def test_noise_addition(
        self,
        sample_word_image: np.ndarray,
        similar_word_image: np.ndarray,
    ) -> None:
        """Test that noisy image is different from original."""
        assert not np.array_equal(sample_word_image, similar_word_image)