Spaces:
Sleeping
Sleeping
File size: 7,487 Bytes
24d9eca cf0a8ed 24d9eca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | """Tests for PrefixNormalizer."""
import pytest
from apohara_context_forge.normalization.prefix_normalizer import (
PrefixNormalizer,
create_prefix_normalizer,
SEPARATOR,
)
class TestPrefixNormalizerBasic:
"""Basic PrefixNormalizer tests."""
def test_byte_identical_output_for_same_canonical_prompt(self):
"""Test normalize() produces byte-identical output for same canonical prompt."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
prompt1 = normalizer.normalize("agent1", "What is AI?", "retriever role")
prompt2 = normalizer.normalize("agent2", "What is AI?", "summarizer role")
# Extract system prompt prefix (everything before first separator)
system_prefix_1 = prompt1.split(SEPARATOR)[0]
system_prefix_2 = prompt2.split(SEPARATOR)[0]
# Both should have the same system prompt prefix
assert system_prefix_1 == system_prefix_2
assert system_prefix_1 == "You are a helpful AI."
def test_sha256_validation_catches_mismatched_canonical_prompts(self):
"""Test SHA256 validation catches mismatched canonical prompts."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
# Valid matching prompt
assert normalizer.validate_system_prompt("You are a helpful AI.") is True
# Different prompt should not match
assert normalizer.validate_system_prompt("You are a different AI.") is False
# Prompt with extra whitespace should not match (validation strips input)
assert normalizer.validate_system_prompt(" You are a helpful AI. ") is True
def test_separator_enforcement(self):
"""Test separator enforcement."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
# Default separator should be exactly "\n\n"
assert normalizer.separator == "\n\n"
# Output should contain exactly two newlines between segments
prompt = normalizer.normalize("agent1", "What is AI?", "retriever role")
# Count occurrences of separator
assert prompt.count("\n\n") == 2
# Should have pattern: system\n\nrole\n\nuser
parts = prompt.split("\n\n")
assert len(parts) == 3
assert parts[0] == "You are a helpful AI."
assert parts[1] == "retriever role"
assert parts[2] == "What is AI?"
def test_whitespace_stripping(self):
"""Test whitespace stripping from user_prompt and role_prompt."""
normalizer = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
# Trailing whitespace should be stripped
prompt = normalizer.normalize(
"agent1",
"What is AI? ",
"retriever role ",
)
# Verify no trailing whitespace in output
lines = prompt.split("\n\n")
assert lines[1] == "retriever role"
assert lines[2] == "What is AI?"
# Leading whitespace should also be stripped
prompt2 = normalizer.normalize(
"agent2",
" What is AI?",
" summarizer role",
)
lines2 = prompt2.split("\n\n")
assert lines2[1] == "summarizer role"
assert lines2[2] == "What is AI?"
def test_get_canonical_hash(self):
"""Test get_canonical_hash() returns consistent SHA256 hex string."""
normalizer1 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
normalizer2 = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
hash1 = normalizer1.get_canonical_hash()
hash2 = normalizer2.get_canonical_hash()
# Same prompt should produce same hash
assert hash1 == hash2
# Should be a valid SHA256 hex string (64 characters)
assert len(hash1) == 64
assert all(c in "0123456789abcdef" for c in hash1)
# Different prompt should produce different hash
normalizer3 = PrefixNormalizer(canonical_system_prompt="You are a different AI.")
hash3 = normalizer3.get_canonical_hash()
assert hash1 != hash3
def test_separator_property(self):
"""Test separator property returns the correct string."""
normalizer = PrefixNormalizer(canonical_system_prompt="Test prompt.")
assert normalizer.separator == SEPARATOR
assert normalizer.separator == "\n\n"
def test_canonical_hash_consistency(self):
"""Test two instances with same prompt have same hash."""
normalizer_a = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
normalizer_b = PrefixNormalizer(canonical_system_prompt="You are a helpful AI.")
assert normalizer_a.get_canonical_hash() == normalizer_b.get_canonical_hash()
class TestCreatePrefixNormalizer:
"""Tests for create_prefix_normalizer factory function."""
def test_create_with_custom_prompt(self):
"""Test create_prefix_normalizer with custom prompt."""
normalizer = create_prefix_normalizer(
canonical_system_prompt="Custom system prompt."
)
assert normalizer.get_canonical_prompt() == "Custom system prompt."
def test_create_with_default_prompt(self):
"""Test create_prefix_normalizer uses default prompt when none provided."""
normalizer = create_prefix_normalizer()
expected_default = (
"You are a helpful AI assistant. "
"Provide accurate, detailed, and thoughtful responses. "
"Use chain-of-thought reasoning when appropriate."
)
assert normalizer.get_canonical_prompt() == expected_default
def test_create_prefix_normalizer_has_correct_separator(self):
"""Test create_prefix_normalizer uses correct separator."""
normalizer = create_prefix_normalizer(
canonical_system_prompt="Test prompt."
)
assert normalizer.separator == "\n\n"
class TestNormalize:
"""Tests for normalize() method."""
def test_normalize_assembles_in_fixed_order(self):
"""Test normalize() assembles segments in fixed order."""
normalizer = PrefixNormalizer(canonical_system_prompt="System prompt.")
prompt = normalizer.normalize(
agent_id="test_agent",
user_prompt="User question?",
agent_role_prompt="Role description.",
)
# Order should be: system, role, user
assert prompt.startswith("System prompt.")
assert "Role description." in prompt
assert "User question?" in prompt
def test_normalize_with_empty_role_prompt(self):
"""Test normalize() with empty role prompt."""
normalizer = PrefixNormalizer(canonical_system_prompt="System.")
prompt = normalizer.normalize(
agent_id="agent",
user_prompt="Question",
agent_role_prompt="",
)
parts = prompt.split("\n\n")
assert parts[0] == "System."
assert parts[1] == ""
assert parts[2] == "Question"
def test_normalize_registered_agents(self):
"""Test normalize() tracks registered agents."""
normalizer = PrefixNormalizer(canonical_system_prompt="System.")
normalizer.normalize("agent1", "Q1", "Role1")
normalizer.normalize("agent2", "Q2", "Role2")
# Agents should be tracked (internal state)
assert len(normalizer._registered_agents) == 2 |