""" Unit tests for the open_notebook.utils module. This test suite focuses on testing utility functions that perform actual logic without heavy mocking - string processing, validation, and algorithms. """ import pytest from open_notebook.utils import ( clean_thinking_content, compare_versions, get_installed_version, parse_thinking_content, remove_non_ascii, remove_non_printable, split_text, token_count, ) from open_notebook.utils.context_builder import ContextBuilder, ContextConfig # ============================================================================ # TEST SUITE 1: Text Utilities # ============================================================================ class TestTextUtilities: """Test suite for text utility functions.""" def test_split_text_empty_string(self): """Test splitting empty or very short strings.""" assert split_text("") == [] assert split_text("short") == ["short"] def test_remove_non_ascii(self): """Test removal of non-ASCII characters.""" # Text with various non-ASCII characters text_with_unicode = "Hello 世界 café naïve émoji 🎉" result = remove_non_ascii(text_with_unicode) # Should only contain ASCII characters assert result == "Hello caf nave moji " # All characters should be in ASCII range assert all(ord(char) < 128 for char in result) def test_remove_non_ascii_pure_ascii(self): """Test that pure ASCII text is unchanged.""" text = "Hello World 123 !@#" result = remove_non_ascii(text) assert result == text def test_remove_non_printable(self): """Test removal of non-printable characters.""" # Text with various Unicode whitespace and control chars text = "Hello\u2000World\u200B\u202FTest" result = remove_non_printable(text) # Should have regular spaces and printable chars only assert "Hello" in result assert "World" in result assert "Test" in result def test_remove_non_printable_preserves_newlines(self): """Test that newlines and tabs are preserved.""" text = "Line1\nLine2\tTabbed" result = remove_non_printable(text) assert "\n" in result assert "\t" in result def test_parse_thinking_content_basic(self): """Test parsing single thinking block.""" content = "This is my thinkingHere is my answer" thinking, cleaned = parse_thinking_content(content) assert thinking == "This is my thinking" assert cleaned == "Here is my answer" def test_parse_thinking_content_multiple_tags(self): """Test parsing multiple thinking blocks.""" content = "First thoughtAnswerSecond thoughtMore" thinking, cleaned = parse_thinking_content(content) assert "First thought" in thinking assert "Second thought" in thinking assert "" not in cleaned assert "Answer" in cleaned assert "More" in cleaned def test_parse_thinking_content_no_tags(self): """Test parsing content without thinking tags.""" content = "Just regular content" thinking, cleaned = parse_thinking_content(content) assert thinking == "" assert cleaned == "Just regular content" def test_parse_thinking_content_malformed_no_open_tag(self): """Test parsing malformed output where opening tag is missing.""" content = "Some thinking contentHere is my answer" thinking, cleaned = parse_thinking_content(content) assert thinking == "Some thinking content" assert cleaned == "Here is my answer" def test_parse_thinking_content_invalid_input(self): """Test parsing with invalid input types.""" # Non-string input thinking, cleaned = parse_thinking_content(None) assert thinking == "" assert cleaned == "" # Integer input thinking, cleaned = parse_thinking_content(123) assert thinking == "" assert cleaned == "123" def test_parse_thinking_content_large_content(self): """Test that very large content is not processed.""" large_content = "x" * 200000 # > 100KB limit thinking, cleaned = parse_thinking_content(large_content) # Should return unchanged due to size limit assert thinking == "" assert cleaned == large_content def test_clean_thinking_content(self): """Test convenience function for cleaning thinking content.""" content = "Internal thoughtsPublic response" result = clean_thinking_content(content) assert "" not in result assert "Public response" in result assert "Internal thoughts" not in result # ============================================================================ # TEST SUITE 2: Token Utilities # ============================================================================ class TestTokenUtilities: """Test suite for token counting fallback behavior.""" def test_token_count_fallback(self): """Test fallback when tiktoken raises an error.""" from unittest.mock import patch # Make tiktoken raise an ImportError to trigger fallback with patch("tiktoken.get_encoding", side_effect=ImportError("tiktoken not available")): text = "one two three four five" count = token_count(text) # Fallback uses word count * 1.3 # 5 words * 1.3 = 6.5 -> 6 assert isinstance(count, int) assert count > 0 # ============================================================================ # TEST SUITE 3: Version Utilities # ============================================================================ class TestVersionUtilities: """Test suite for version management functions.""" def test_compare_versions_equal(self): """Test comparing equal versions.""" result = compare_versions("1.0.0", "1.0.0") assert result == 0 def test_compare_versions_less_than(self): """Test comparing when first version is less.""" result = compare_versions("1.0.0", "2.0.0") assert result == -1 result = compare_versions("1.0.0", "1.1.0") assert result == -1 result = compare_versions("1.0.0", "1.0.1") assert result == -1 def test_compare_versions_greater_than(self): """Test comparing when first version is greater.""" result = compare_versions("2.0.0", "1.0.0") assert result == 1 result = compare_versions("1.1.0", "1.0.0") assert result == 1 result = compare_versions("1.0.1", "1.0.0") assert result == 1 def test_compare_versions_prerelease(self): """Test comparing versions with pre-release tags.""" result = compare_versions("1.0.0", "1.0.0-alpha") assert result == 1 # Release > pre-release result = compare_versions("1.0.0-beta", "1.0.0-alpha") assert result == 1 # beta > alpha def test_get_installed_version_success(self): """Test getting installed package version.""" # Test with a known installed package version = get_installed_version("pytest") assert isinstance(version, str) assert len(version) > 0 # Should look like a version (has dots) assert "." in version def test_get_installed_version_not_found(self): """Test getting version of non-existent package.""" from importlib.metadata import PackageNotFoundError with pytest.raises(PackageNotFoundError): get_installed_version("this-package-does-not-exist-12345") def test_get_version_from_github_invalid_url(self): """Test GitHub version fetch with invalid URL.""" from open_notebook.utils.version_utils import get_version_from_github with pytest.raises(ValueError, match="Not a GitHub URL"): get_version_from_github("https://example.com/repo") with pytest.raises(ValueError, match="Invalid GitHub repository URL"): get_version_from_github("https://github.com/") # ============================================================================ # TEST SUITE 4: Context Builder Configuration # ============================================================================ class TestContextBuilder: """Test suite for ContextBuilder initialization and configuration.""" def test_context_config_defaults(self): """Test ContextConfig default values.""" config = ContextConfig() assert config.sources == {} assert config.notes == {} assert config.include_insights is True assert config.include_notes is True assert config.priority_weights is not None assert "source" in config.priority_weights assert "note" in config.priority_weights assert "insight" in config.priority_weights def test_context_builder_initialization(self): """Test ContextBuilder initialization with various params.""" builder = ContextBuilder( source_id="source:123", notebook_id="notebook:456", max_tokens=1000, include_insights=False ) assert builder.source_id == "source:123" assert builder.notebook_id == "notebook:456" assert builder.max_tokens == 1000 assert builder.include_insights is False if __name__ == "__main__": pytest.main([__file__, "-v"])