Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

teachingAssistant / tests /unit /domain /models /test_text_content.py

Michael Hu

refactor based on DDD

5009cb8 6 months ago

8.78 kB

	"""Unit tests for TextContent value object."""

	import pytest
	from src.domain.models.text_content import TextContent


	class TestTextContent:
	"""Test cases for TextContent value object."""

	def test_valid_text_content_creation(self):
	"""Test creating valid TextContent instance."""
	text = TextContent(
	text="Hello, world!",
	language="en",
	encoding="utf-8"
	)

	assert text.text == "Hello, world!"
	assert text.language == "en"
	assert text.encoding == "utf-8"
	assert text.word_count == 2
	assert text.character_count == 13
	assert text.is_empty is False

	def test_text_content_with_default_encoding(self):
	"""Test creating TextContent with default encoding."""
	text = TextContent(
	text="Hello, world!",
	language="en"
	)

	assert text.encoding == "utf-8"

	def test_non_string_text_raises_error(self):
	"""Test that non-string text raises TypeError."""
	with pytest.raises(TypeError, match="Text must be a string"):
	TextContent(
	text=123, # type: ignore
	language="en"
	)

	def test_empty_text_raises_error(self):
	"""Test that empty text raises ValueError."""
	with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
	TextContent(
	text="",
	language="en"
	)

	def test_whitespace_only_text_raises_error(self):
	"""Test that whitespace-only text raises ValueError."""
	with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
	TextContent(
	text=" \n\t ",
	language="en"
	)

	def test_text_too_long_raises_error(self):
	"""Test that text over 50,000 characters raises ValueError."""
	long_text = "a" * 50001
	with pytest.raises(ValueError, match="Text content too long"):
	TextContent(
	text=long_text,
	language="en"
	)

	def test_text_at_max_length(self):
	"""Test text at maximum allowed length."""
	max_text = "a" * 50000
	text = TextContent(
	text=max_text,
	language="en"
	)
	assert len(text.text) == 50000

	def test_non_string_language_raises_error(self):
	"""Test that non-string language raises TypeError."""
	with pytest.raises(TypeError, match="Language must be a string"):
	TextContent(
	text="Hello",
	language=123 # type: ignore
	)

	def test_empty_language_raises_error(self):
	"""Test that empty language raises ValueError."""
	with pytest.raises(ValueError, match="Language cannot be empty"):
	TextContent(
	text="Hello",
	language=""
	)

	def test_whitespace_language_raises_error(self):
	"""Test that whitespace-only language raises ValueError."""
	with pytest.raises(ValueError, match="Language cannot be empty"):
	TextContent(
	text="Hello",
	language=" "
	)

	def test_invalid_language_code_format_raises_error(self):
	"""Test that invalid language code format raises ValueError."""
	invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"]

	for code in invalid_codes:
	with pytest.raises(ValueError, match="Invalid language code format"):
	TextContent(
	text="Hello",
	language=code
	)

	def test_valid_language_codes(self):
	"""Test valid language code formats."""
	valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"]

	for code in valid_codes:
	text = TextContent(
	text="Hello",
	language=code
	)
	assert text.language == code

	def test_non_string_encoding_raises_error(self):
	"""Test that non-string encoding raises TypeError."""
	with pytest.raises(TypeError, match="Encoding must be a string"):
	TextContent(
	text="Hello",
	language="en",
	encoding=123 # type: ignore
	)

	def test_unsupported_encoding_raises_error(self):
	"""Test that unsupported encoding raises ValueError."""
	with pytest.raises(ValueError, match="Unsupported encoding: xyz"):
	TextContent(
	text="Hello",
	language="en",
	encoding="xyz"
	)

	def test_supported_encodings(self):
	"""Test all supported encodings."""
	supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1']

	for encoding in supported_encodings:
	text = TextContent(
	text="Hello",
	language="en",
	encoding=encoding
	)
	assert text.encoding == encoding

	def test_text_encoding_compatibility(self):
	"""Test that text is compatible with specified encoding."""
	# ASCII text with UTF-8 encoding should work
	text = TextContent(
	text="Hello",
	language="en",
	encoding="ascii"
	)
	assert text.encoding == "ascii"

	# Unicode text with ASCII encoding should fail
	with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"):
	TextContent(
	text="Héllo", # Contains non-ASCII character
	language="en",
	encoding="ascii"
	)

	def test_word_count_property(self):
	"""Test word_count property calculation."""
	test_cases = [
	("Hello world", 2),
	("Hello", 1),
	("Hello world test", 3),
	("Hello, world! Test.", 3), # Multiple spaces and punctuation
	("", 1), # Empty string split returns ['']
	]

	for text_str, expected_count in test_cases:
	if text_str: # Skip empty string test as it would fail validation
	text = TextContent(text=text_str, language="en")
	assert text.word_count == expected_count

	def test_character_count_property(self):
	"""Test character_count property."""
	text_str = "Hello, world!"
	text = TextContent(text=text_str, language="en")
	assert text.character_count == len(text_str)

	def test_is_empty_property(self):
	"""Test is_empty property."""
	# Non-empty text
	text = TextContent(text="Hello", language="en")
	assert text.is_empty is False

	# Text with only meaningful content
	text2 = TextContent(text=" Hello ", language="en")
	assert text2.is_empty is False

	def test_truncate_method(self):
	"""Test truncate method."""
	text = TextContent(text="Hello, world! This is a test.", language="en")

	# Truncate to shorter length
	truncated = text.truncate(10)
	assert len(truncated.text) <= 10
	assert truncated.language == text.language
	assert truncated.encoding == text.encoding
	assert isinstance(truncated, TextContent)

	# Truncate to longer length (should return same)
	not_truncated = text.truncate(100)
	assert not_truncated.text == text.text

	def test_truncate_with_invalid_length(self):
	"""Test truncate with invalid max_length."""
	text = TextContent(text="Hello", language="en")

	with pytest.raises(ValueError, match="Max length must be positive"):
	text.truncate(0)

	with pytest.raises(ValueError, match="Max length must be positive"):
	text.truncate(-1)

	def test_text_content_is_immutable(self):
	"""Test that TextContent is immutable (frozen dataclass)."""
	text = TextContent(text="Hello", language="en")

	with pytest.raises(AttributeError):
	text.text = "Goodbye" # type: ignore

	def test_truncate_preserves_word_boundaries(self):
	"""Test that truncate method preserves word boundaries by rstripping."""
	text = TextContent(text="Hello world test", language="en")

	# Truncate in middle of word
	truncated = text.truncate(12) # "Hello world " -> "Hello world" after rstrip
	assert not truncated.text.endswith(" ")
	assert truncated.text == "Hello world"