Spaces:
Sleeping
Sleeping
| """Unit tests for TextContent value object.""" | |
| import pytest | |
| from src.domain.models.text_content import TextContent | |
| class TestTextContent: | |
| """Test cases for TextContent value object.""" | |
| def test_valid_text_content_creation(self): | |
| """Test creating valid TextContent instance.""" | |
| text = TextContent( | |
| text="Hello, world!", | |
| language="en", | |
| encoding="utf-8" | |
| ) | |
| assert text.text == "Hello, world!" | |
| assert text.language == "en" | |
| assert text.encoding == "utf-8" | |
| assert text.word_count == 2 | |
| assert text.character_count == 13 | |
| assert text.is_empty is False | |
| def test_text_content_with_default_encoding(self): | |
| """Test creating TextContent with default encoding.""" | |
| text = TextContent( | |
| text="Hello, world!", | |
| language="en" | |
| ) | |
| assert text.encoding == "utf-8" | |
| def test_non_string_text_raises_error(self): | |
| """Test that non-string text raises TypeError.""" | |
| with pytest.raises(TypeError, match="Text must be a string"): | |
| TextContent( | |
| text=123, # type: ignore | |
| language="en" | |
| ) | |
| def test_empty_text_raises_error(self): | |
| """Test that empty text raises ValueError.""" | |
| with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"): | |
| TextContent( | |
| text="", | |
| language="en" | |
| ) | |
| def test_whitespace_only_text_raises_error(self): | |
| """Test that whitespace-only text raises ValueError.""" | |
| with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"): | |
| TextContent( | |
| text=" \n\t ", | |
| language="en" | |
| ) | |
| def test_text_too_long_raises_error(self): | |
| """Test that text over 50,000 characters raises ValueError.""" | |
| long_text = "a" * 50001 | |
| with pytest.raises(ValueError, match="Text content too long"): | |
| TextContent( | |
| text=long_text, | |
| language="en" | |
| ) | |
| def test_text_at_max_length(self): | |
| """Test text at maximum allowed length.""" | |
| max_text = "a" * 50000 | |
| text = TextContent( | |
| text=max_text, | |
| language="en" | |
| ) | |
| assert len(text.text) == 50000 | |
| def test_non_string_language_raises_error(self): | |
| """Test that non-string language raises TypeError.""" | |
| with pytest.raises(TypeError, match="Language must be a string"): | |
| TextContent( | |
| text="Hello", | |
| language=123 # type: ignore | |
| ) | |
| def test_empty_language_raises_error(self): | |
| """Test that empty language raises ValueError.""" | |
| with pytest.raises(ValueError, match="Language cannot be empty"): | |
| TextContent( | |
| text="Hello", | |
| language="" | |
| ) | |
| def test_whitespace_language_raises_error(self): | |
| """Test that whitespace-only language raises ValueError.""" | |
| with pytest.raises(ValueError, match="Language cannot be empty"): | |
| TextContent( | |
| text="Hello", | |
| language=" " | |
| ) | |
| def test_invalid_language_code_format_raises_error(self): | |
| """Test that invalid language code format raises ValueError.""" | |
| invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"] | |
| for code in invalid_codes: | |
| with pytest.raises(ValueError, match="Invalid language code format"): | |
| TextContent( | |
| text="Hello", | |
| language=code | |
| ) | |
| def test_valid_language_codes(self): | |
| """Test valid language code formats.""" | |
| valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"] | |
| for code in valid_codes: | |
| text = TextContent( | |
| text="Hello", | |
| language=code | |
| ) | |
| assert text.language == code | |
| def test_non_string_encoding_raises_error(self): | |
| """Test that non-string encoding raises TypeError.""" | |
| with pytest.raises(TypeError, match="Encoding must be a string"): | |
| TextContent( | |
| text="Hello", | |
| language="en", | |
| encoding=123 # type: ignore | |
| ) | |
| def test_unsupported_encoding_raises_error(self): | |
| """Test that unsupported encoding raises ValueError.""" | |
| with pytest.raises(ValueError, match="Unsupported encoding: xyz"): | |
| TextContent( | |
| text="Hello", | |
| language="en", | |
| encoding="xyz" | |
| ) | |
| def test_supported_encodings(self): | |
| """Test all supported encodings.""" | |
| supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1'] | |
| for encoding in supported_encodings: | |
| text = TextContent( | |
| text="Hello", | |
| language="en", | |
| encoding=encoding | |
| ) | |
| assert text.encoding == encoding | |
| def test_text_encoding_compatibility(self): | |
| """Test that text is compatible with specified encoding.""" | |
| # ASCII text with UTF-8 encoding should work | |
| text = TextContent( | |
| text="Hello", | |
| language="en", | |
| encoding="ascii" | |
| ) | |
| assert text.encoding == "ascii" | |
| # Unicode text with ASCII encoding should fail | |
| with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"): | |
| TextContent( | |
| text="Héllo", # Contains non-ASCII character | |
| language="en", | |
| encoding="ascii" | |
| ) | |
| def test_word_count_property(self): | |
| """Test word_count property calculation.""" | |
| test_cases = [ | |
| ("Hello world", 2), | |
| ("Hello", 1), | |
| ("Hello world test", 3), | |
| ("Hello, world! Test.", 3), # Multiple spaces and punctuation | |
| ("", 1), # Empty string split returns [''] | |
| ] | |
| for text_str, expected_count in test_cases: | |
| if text_str: # Skip empty string test as it would fail validation | |
| text = TextContent(text=text_str, language="en") | |
| assert text.word_count == expected_count | |
| def test_character_count_property(self): | |
| """Test character_count property.""" | |
| text_str = "Hello, world!" | |
| text = TextContent(text=text_str, language="en") | |
| assert text.character_count == len(text_str) | |
| def test_is_empty_property(self): | |
| """Test is_empty property.""" | |
| # Non-empty text | |
| text = TextContent(text="Hello", language="en") | |
| assert text.is_empty is False | |
| # Text with only meaningful content | |
| text2 = TextContent(text=" Hello ", language="en") | |
| assert text2.is_empty is False | |
| def test_truncate_method(self): | |
| """Test truncate method.""" | |
| text = TextContent(text="Hello, world! This is a test.", language="en") | |
| # Truncate to shorter length | |
| truncated = text.truncate(10) | |
| assert len(truncated.text) <= 10 | |
| assert truncated.language == text.language | |
| assert truncated.encoding == text.encoding | |
| assert isinstance(truncated, TextContent) | |
| # Truncate to longer length (should return same) | |
| not_truncated = text.truncate(100) | |
| assert not_truncated.text == text.text | |
| def test_truncate_with_invalid_length(self): | |
| """Test truncate with invalid max_length.""" | |
| text = TextContent(text="Hello", language="en") | |
| with pytest.raises(ValueError, match="Max length must be positive"): | |
| text.truncate(0) | |
| with pytest.raises(ValueError, match="Max length must be positive"): | |
| text.truncate(-1) | |
| def test_text_content_is_immutable(self): | |
| """Test that TextContent is immutable (frozen dataclass).""" | |
| text = TextContent(text="Hello", language="en") | |
| with pytest.raises(AttributeError): | |
| text.text = "Goodbye" # type: ignore | |
| def test_truncate_preserves_word_boundaries(self): | |
| """Test that truncate method preserves word boundaries by rstripping.""" | |
| text = TextContent(text="Hello world test", language="en") | |
| # Truncate in middle of word | |
| truncated = text.truncate(12) # "Hello world " -> "Hello world" after rstrip | |
| assert not truncated.text.endswith(" ") | |
| assert truncated.text == "Hello world" |