Spaces:
Sleeping
Sleeping
File size: 9,659 Bytes
f871fed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
"""
Unit tests for the open_notebook.utils module.
This test suite focuses on testing utility functions that perform actual logic
without heavy mocking - string processing, validation, and algorithms.
"""
import pytest
from open_notebook.utils import (
clean_thinking_content,
compare_versions,
get_installed_version,
parse_thinking_content,
remove_non_ascii,
remove_non_printable,
split_text,
token_count,
)
from open_notebook.utils.context_builder import ContextBuilder, ContextConfig
# ============================================================================
# TEST SUITE 1: Text Utilities
# ============================================================================
class TestTextUtilities:
"""Test suite for text utility functions."""
def test_split_text_empty_string(self):
"""Test splitting empty or very short strings."""
assert split_text("") == []
assert split_text("short") == ["short"]
def test_remove_non_ascii(self):
"""Test removal of non-ASCII characters."""
# Text with various non-ASCII characters
text_with_unicode = "Hello 世界 café naïve émoji 🎉"
result = remove_non_ascii(text_with_unicode)
# Should only contain ASCII characters
assert result == "Hello caf nave moji "
# All characters should be in ASCII range
assert all(ord(char) < 128 for char in result)
def test_remove_non_ascii_pure_ascii(self):
"""Test that pure ASCII text is unchanged."""
text = "Hello World 123 !@#"
result = remove_non_ascii(text)
assert result == text
def test_remove_non_printable(self):
"""Test removal of non-printable characters."""
# Text with various Unicode whitespace and control chars
text = "Hello\u2000World\u200B\u202FTest"
result = remove_non_printable(text)
# Should have regular spaces and printable chars only
assert "Hello" in result
assert "World" in result
assert "Test" in result
def test_remove_non_printable_preserves_newlines(self):
"""Test that newlines and tabs are preserved."""
text = "Line1\nLine2\tTabbed"
result = remove_non_printable(text)
assert "\n" in result
assert "\t" in result
def test_parse_thinking_content_basic(self):
"""Test parsing single thinking block."""
content = "<think>This is my thinking</think>Here is my answer"
thinking, cleaned = parse_thinking_content(content)
assert thinking == "This is my thinking"
assert cleaned == "Here is my answer"
def test_parse_thinking_content_multiple_tags(self):
"""Test parsing multiple thinking blocks."""
content = "<think>First thought</think>Answer<think>Second thought</think>More"
thinking, cleaned = parse_thinking_content(content)
assert "First thought" in thinking
assert "Second thought" in thinking
assert "<think>" not in cleaned
assert "Answer" in cleaned
assert "More" in cleaned
def test_parse_thinking_content_no_tags(self):
"""Test parsing content without thinking tags."""
content = "Just regular content"
thinking, cleaned = parse_thinking_content(content)
assert thinking == ""
assert cleaned == "Just regular content"
def test_parse_thinking_content_malformed_no_open_tag(self):
"""Test parsing malformed output where opening <think> tag is missing."""
content = "Some thinking content</think>Here is my answer"
thinking, cleaned = parse_thinking_content(content)
assert thinking == "Some thinking content"
assert cleaned == "Here is my answer"
def test_parse_thinking_content_invalid_input(self):
"""Test parsing with invalid input types."""
# Non-string input
thinking, cleaned = parse_thinking_content(None)
assert thinking == ""
assert cleaned == ""
# Integer input
thinking, cleaned = parse_thinking_content(123)
assert thinking == ""
assert cleaned == "123"
def test_parse_thinking_content_large_content(self):
"""Test that very large content is not processed."""
large_content = "x" * 200000 # > 100KB limit
thinking, cleaned = parse_thinking_content(large_content)
# Should return unchanged due to size limit
assert thinking == ""
assert cleaned == large_content
def test_clean_thinking_content(self):
"""Test convenience function for cleaning thinking content."""
content = "<think>Internal thoughts</think>Public response"
result = clean_thinking_content(content)
assert "<think>" not in result
assert "Public response" in result
assert "Internal thoughts" not in result
# ============================================================================
# TEST SUITE 2: Token Utilities
# ============================================================================
class TestTokenUtilities:
"""Test suite for token counting fallback behavior."""
def test_token_count_fallback(self):
"""Test fallback when tiktoken raises an error."""
from unittest.mock import patch
# Make tiktoken raise an ImportError to trigger fallback
with patch("tiktoken.get_encoding", side_effect=ImportError("tiktoken not available")):
text = "one two three four five"
count = token_count(text)
# Fallback uses word count * 1.3
# 5 words * 1.3 = 6.5 -> 6
assert isinstance(count, int)
assert count > 0
# ============================================================================
# TEST SUITE 3: Version Utilities
# ============================================================================
class TestVersionUtilities:
"""Test suite for version management functions."""
def test_compare_versions_equal(self):
"""Test comparing equal versions."""
result = compare_versions("1.0.0", "1.0.0")
assert result == 0
def test_compare_versions_less_than(self):
"""Test comparing when first version is less."""
result = compare_versions("1.0.0", "2.0.0")
assert result == -1
result = compare_versions("1.0.0", "1.1.0")
assert result == -1
result = compare_versions("1.0.0", "1.0.1")
assert result == -1
def test_compare_versions_greater_than(self):
"""Test comparing when first version is greater."""
result = compare_versions("2.0.0", "1.0.0")
assert result == 1
result = compare_versions("1.1.0", "1.0.0")
assert result == 1
result = compare_versions("1.0.1", "1.0.0")
assert result == 1
def test_compare_versions_prerelease(self):
"""Test comparing versions with pre-release tags."""
result = compare_versions("1.0.0", "1.0.0-alpha")
assert result == 1 # Release > pre-release
result = compare_versions("1.0.0-beta", "1.0.0-alpha")
assert result == 1 # beta > alpha
def test_get_installed_version_success(self):
"""Test getting installed package version."""
# Test with a known installed package
version = get_installed_version("pytest")
assert isinstance(version, str)
assert len(version) > 0
# Should look like a version (has dots)
assert "." in version
def test_get_installed_version_not_found(self):
"""Test getting version of non-existent package."""
from importlib.metadata import PackageNotFoundError
with pytest.raises(PackageNotFoundError):
get_installed_version("this-package-does-not-exist-12345")
def test_get_version_from_github_invalid_url(self):
"""Test GitHub version fetch with invalid URL."""
from open_notebook.utils.version_utils import get_version_from_github
with pytest.raises(ValueError, match="Not a GitHub URL"):
get_version_from_github("https://example.com/repo")
with pytest.raises(ValueError, match="Invalid GitHub repository URL"):
get_version_from_github("https://github.com/")
# ============================================================================
# TEST SUITE 4: Context Builder Configuration
# ============================================================================
class TestContextBuilder:
"""Test suite for ContextBuilder initialization and configuration."""
def test_context_config_defaults(self):
"""Test ContextConfig default values."""
config = ContextConfig()
assert config.sources == {}
assert config.notes == {}
assert config.include_insights is True
assert config.include_notes is True
assert config.priority_weights is not None
assert "source" in config.priority_weights
assert "note" in config.priority_weights
assert "insight" in config.priority_weights
def test_context_builder_initialization(self):
"""Test ContextBuilder initialization with various params."""
builder = ContextBuilder(
source_id="source:123",
notebook_id="notebook:456",
max_tokens=1000,
include_insights=False
)
assert builder.source_id == "source:123"
assert builder.notebook_id == "notebook:456"
assert builder.max_tokens == 1000
assert builder.include_insights is False
if __name__ == "__main__":
pytest.main([__file__, "-v"])
|