"""Tests for PrefixNormalizer.""" import pytest from apohara_context_forge.normalization.prefix_normalizer import ( PrefixNormalizer, create_prefix_normalizer, SEPARATOR, ) class TestPrefixNormalizerBasic: """Basic PrefixNormalizer tests.""" def test_byte_identical_output_for_same_canonical_prompt(self): """Test normalize() produces byte-identical output for same canonical prompt.""" normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") prompt1 = normalizer.normalize("agent1", "What is AI?", "retriever role") prompt2 = normalizer.normalize("agent2", "What is AI?", "summarizer role") # Extract system prompt prefix (everything before first separator) system_prefix_1 = prompt1.split(SEPARATOR)[0] system_prefix_2 = prompt2.split(SEPARATOR)[0] # Both should have the same system prompt prefix assert system_prefix_1 == system_prefix_2 assert system_prefix_1 == "You are a helpful AI." def test_sha256_validation_catches_mismatched_canonical_prompts(self): """Test SHA256 validation catches mismatched canonical prompts.""" normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") # Valid matching prompt assert normalizer.validate_system_prompt("You are a helpful AI.") is True # Different prompt should not match assert normalizer.validate_system_prompt("You are a different AI.") is False # Prompt with extra whitespace should not match (validation strips input) assert normalizer.validate_system_prompt(" You are a helpful AI. ") is True def test_separator_enforcement(self): """Test separator enforcement.""" normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") # Default separator should be exactly "\n\n" assert normalizer.separator == "\n\n" # Output should contain exactly two newlines between segments prompt = normalizer.normalize("agent1", "What is AI?", "retriever role") # Count occurrences of separator assert prompt.count("\n\n") == 2 # Should have pattern: system\n\nrole\n\nuser parts = prompt.split("\n\n") assert len(parts) == 3 assert parts[0] == "You are a helpful AI." assert parts[1] == "retriever role" assert parts[2] == "What is AI?" def test_whitespace_stripping(self): """Test whitespace stripping from user_prompt and role_prompt.""" normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") # Trailing whitespace should be stripped prompt = normalizer.normalize( "agent1", "What is AI? ", "retriever role ", ) # Verify no trailing whitespace in output lines = prompt.split("\n\n") assert lines[1] == "retriever role" assert lines[2] == "What is AI?" # Leading whitespace should also be stripped prompt2 = normalizer.normalize( "agent2", " What is AI?", " summarizer role", ) lines2 = prompt2.split("\n\n") assert lines2[1] == "summarizer role" assert lines2[2] == "What is AI?" def test_get_canonical_hash(self): """Test get_canonical_hash() returns consistent SHA256 hex string.""" normalizer1 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") normalizer2 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") hash1 = normalizer1.get_canonical_hash() hash2 = normalizer2.get_canonical_hash() # Same prompt should produce same hash assert hash1 == hash2 # Should be a valid SHA256 hex string (64 characters) assert len(hash1) == 64 assert all(c in "0123456789abcdef" for c in hash1) # Different prompt should produce different hash normalizer3 = PrefixNormalizer(canonical_system_prompt="You are a different AI.") hash3 = normalizer3.get_canonical_hash() assert hash1 != hash3 def test_separator_property(self): """Test separator property returns the correct string.""" normalizer = PrefixNormalizer(canonical_system_prompt="Test prompt.") assert normalizer.separator == SEPARATOR assert normalizer.separator == "\n\n" def test_canonical_hash_consistency(self): """Test two instances with same prompt have same hash.""" normalizer_a = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") normalizer_b = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.") assert normalizer_a.get_canonical_hash() == normalizer_b.get_canonical_hash() class TestCreatePrefixNormalizer: """Tests for create_prefix_normalizer factory function.""" def test_create_with_custom_prompt(self): """Test create_prefix_normalizer with custom prompt.""" normalizer = create_prefix_normalizer( canonical_system_prompt="Custom system prompt." ) assert normalizer.get_canonical_prompt() == "Custom system prompt." def test_create_with_default_prompt(self): """Test create_prefix_normalizer uses default prompt when none provided.""" normalizer = create_prefix_normalizer() expected_default = ( "You are a helpful AI assistant. " "Provide accurate, detailed, and thoughtful responses. " "Use chain-of-thought reasoning when appropriate." ) assert normalizer.get_canonical_prompt() == expected_default def test_create_prefix_normalizer_has_correct_separator(self): """Test create_prefix_normalizer uses correct separator.""" normalizer = create_prefix_normalizer( canonical_system_prompt="Test prompt." ) assert normalizer.separator == "\n\n" class TestNormalize: """Tests for normalize() method.""" def test_normalize_assembles_in_fixed_order(self): """Test normalize() assembles segments in fixed order.""" normalizer = PrefixNormalizer(canonical_system_prompt="System prompt.") prompt = normalizer.normalize( agent_id="test_agent", user_prompt="User question?", agent_role_prompt="Role description.", ) # Order should be: system, role, user assert prompt.startswith("System prompt.") assert "Role description." in prompt assert "User question?" in prompt def test_normalize_with_empty_role_prompt(self): """Test normalize() with empty role prompt.""" normalizer = PrefixNormalizer(canonical_system_prompt="System.") prompt = normalizer.normalize( agent_id="agent", user_prompt="Question", agent_role_prompt="", ) parts = prompt.split("\n\n") assert parts[0] == "System." assert parts[1] == "" assert parts[2] == "Question" def test_normalize_registered_agents(self): """Test normalize() tracks registered agents.""" normalizer = PrefixNormalizer(canonical_system_prompt="System.") normalizer.normalize("agent1", "Q1", "Role1") normalizer.normalize("agent2", "Q2", "Role2") # Agents should be tracked (internal state) assert len(normalizer._registered_agents) == 2