"""Tests for core/encoder.py — NodeEncoder.""" import sys from pathlib import Path import pytest import torch sys.path.insert(0, str(Path(__file__).parent.parent)) from core.encoder import NodeEncoder class TestNodeEncoderCreation: """Tests for NodeEncoder creation.""" def test_default_creation(self): """Creation with default parameters.""" encoder = NodeEncoder() assert encoder is not None assert encoder.fallback_dim > 0 def test_creation_with_model(self): """Creation with a specified model.""" encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2") assert encoder.model_name == "sentence-transformers/all-MiniLM-L6-v2" def test_creation_with_fallback_dim(self): """Creation with a specified fallback dimension.""" encoder = NodeEncoder(fallback_dim=128) assert encoder.fallback_dim == 128 class TestHashEmbeddings: """Tests for hash embeddings (fallback).""" def test_hash_embedding_deterministic(self): """Hash embedding is deterministic.""" encoder = NodeEncoder(model_name="hash:64") text = "test agent" emb1 = encoder.encode([text]) emb2 = encoder.encode([text]) assert torch.allclose(emb1, emb2) def test_hash_embedding_different_texts(self): """Different texts produce different embeddings.""" encoder = NodeEncoder(model_name="hash:64") embs = encoder.encode(["agent one", "agent two"]) assert not torch.allclose(embs[0], embs[1]) def test_hash_embedding_dimension(self): """Hash embedding dimension.""" encoder = NodeEncoder(model_name="hash:128") embs = encoder.encode(["test"]) assert embs.shape == (1, 128) def test_hash_embedding_normalized(self): """Hash embedding is normalized.""" encoder = NodeEncoder(model_name="hash:64") embs = encoder.encode(["test"]) norm = torch.norm(embs[0]).item() assert abs(norm - 1.0) < 0.01 # Close to 1 def test_hash_embedding_empty_string(self): """Hash embedding for an empty string.""" encoder = NodeEncoder(model_name="hash:64") embs = encoder.encode([""]) assert embs.shape == (1, 64) assert not torch.isnan(embs).any() class TestSentenceTransformerEmbeddings: """Tests for sentence-transformer embeddings.""" def test_encode_single_text(self): """Encoding a single text.""" encoder = NodeEncoder() embs = encoder.encode(["Test agent description"]) assert isinstance(embs, torch.Tensor) assert embs.dim() == 2 assert embs.shape[0] == 1 assert embs.shape[1] > 0 def test_encode_batch(self): """Encoding a batch of texts.""" encoder = NodeEncoder() texts = ["Agent one", "Agent two", "Agent three"] embs = encoder.encode(texts) assert isinstance(embs, torch.Tensor) assert embs.shape[0] == 3 def test_encode_empty_batch(self): """Encoding an empty batch.""" encoder = NodeEncoder() embs = encoder.encode([]) assert embs.shape[0] == 0 def test_fallback_when_st_unavailable(self): """Fallback to hash when ST is unavailable.""" encoder = NodeEncoder(model_name="hash:64") embs = encoder.encode(["test"]) assert embs.shape == (1, 64) class TestAgentProfileEncoding: """Tests for agent profile encoding.""" def test_encode_agent_profile(self): """Encoding an agent profile.""" from core.agent import AgentProfile encoder = NodeEncoder() profile = AgentProfile( agent_id="test_agent", display_name="Researcher", persona="Finds and analyzes information", ) embs = encoder.encode([profile.to_text()]) assert isinstance(embs, torch.Tensor) assert embs.dim() == 2 assert embs.shape[0] == 1 def test_encode_minimal_profile(self): """Encoding a minimal profile.""" from core.agent import AgentProfile encoder = NodeEncoder() profile = AgentProfile(agent_id="minimal", display_name="minimal") embs = encoder.encode([profile.to_text()]) assert isinstance(embs, torch.Tensor) def test_encode_profiles_batch(self): """Encoding a batch of profiles.""" from core.agent import AgentProfile encoder = NodeEncoder() profiles = [ AgentProfile(agent_id="a", display_name="Role A"), AgentProfile(agent_id="b", display_name="Role B"), ] texts = [p.to_text() for p in profiles] embs = encoder.encode(texts) assert embs.shape[0] == 2 class TestConsistency: """Tests for encoder consistency.""" def test_same_input_same_output(self): """Same input produces same output.""" encoder = NodeEncoder() text = "consistent input" emb1 = encoder.encode([text]) emb2 = encoder.encode([text]) assert torch.allclose(emb1, emb2, atol=1e-6) def test_similar_texts_close_embeddings(self): """Similar texts have close embeddings.""" encoder = NodeEncoder() embs = encoder.encode( [ "This is a researcher agent", "This is a research agent", "This is a completely different unrelated text about cats", ] ) # Cosine similarity sim_12 = torch.cosine_similarity(embs[0].unsqueeze(0), embs[1].unsqueeze(0)).item() sim_13 = torch.cosine_similarity(embs[0].unsqueeze(0), embs[2].unsqueeze(0)).item() # Similar texts should have higher similarity assert sim_12 > sim_13 def test_dimension_consistency(self): """Dimension consistency.""" encoder = NodeEncoder() texts = ["short", "medium length text", "a very long text " * 100] dims = set() embs = encoder.encode(texts) for i in range(len(texts)): dims.add(embs[i].shape[0]) # All should have same dimension assert len(dims) == 1 class TestEdgeCases: """Tests for edge cases.""" def test_unicode_text(self): """Unicode text.""" encoder = NodeEncoder() embs = encoder.encode(["Test agent with unicode 日本語"]) assert isinstance(embs, torch.Tensor) assert not torch.isnan(embs).any() def test_special_characters(self): """Special characters.""" encoder = NodeEncoder() embs = encoder.encode(["Agent with special chars: !@#$%^&*()"]) assert isinstance(embs, torch.Tensor) assert not torch.isnan(embs).any() def test_very_long_text(self): """Very long text.""" encoder = NodeEncoder() long_text = "word " * 10000 embs = encoder.encode([long_text]) assert isinstance(embs, torch.Tensor) assert not torch.isnan(embs).any() def test_whitespace_only(self): """Whitespace only.""" encoder = NodeEncoder() embs = encoder.encode([" \t\n "]) assert isinstance(embs, torch.Tensor) def test_numbers_only(self): """Numbers only.""" encoder = NodeEncoder() embs = encoder.encode(["12345 67890"]) assert isinstance(embs, torch.Tensor) class TestGraphIntegration: """Tests for graph integration.""" def test_encode_graph_agents(self): """Encoding graph agents.""" from core.agent import AgentProfile encoder = NodeEncoder() agents = [ AgentProfile( agent_id="coordinator", display_name="Coordinator", persona="Manages workflow", ), AgentProfile(agent_id="researcher", display_name="Researcher", persona="Finds information"), AgentProfile(agent_id="writer", display_name="Writer", persona="Creates content"), ] texts = [a.to_text() for a in agents] embeddings = encoder.encode(texts) assert embeddings.shape[0] == 3 # All unique agents should have different embeddings assert not torch.allclose(embeddings[0], embeddings[1]) assert not torch.allclose(embeddings[1], embeddings[2]) class TestNodeEncoderValidationErrors: """Tests for validation errors in NodeEncoder creation.""" def test_hash_prefix_non_numeric_dim(self): """Cover lines 48-49: hash: prefix with non-numeric dimension.""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="hash:abc") def test_hash_prefix_zero_dim(self): """Cover lines 50-52: hash: prefix with zero dimension.""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="hash:0") def test_hash_prefix_negative_dim(self): """Cover lines 50-52: hash: prefix with negative dimension (non-digit actually).""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="hash:-5") def test_sentence_transformer_missing_model_id(self): """Cover lines 60-61: sentence-transformers: prefix without model identifier.""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="sentence-transformers/") def test_unsupported_model_name(self): """Cover lines 63-64: unsupported model name raises ValueError.""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="totally-unsupported-model") def test_sentence_transformer_colon_style_missing_model(self): """Cover lines 60-61: sentence-transformers: without model.""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="sentence-transformers:") def test_load_model_returns_none_for_hash_provider(self): """Cover line 99: _load_model returns None when provider is hash.""" encoder = NodeEncoder(model_name="hash:64") result = encoder._load_model() assert result is None def test_encode_with_st_model_none_uses_hash_fallback(self): """Cover line 87: model is None → uses hash fallback.""" from unittest.mock import patch encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2") # Force _load_model to return None with patch.object(encoder, "_load_model", return_value=None): embs = encoder.encode(["test"]) assert embs.shape[0] == 1 def test_sentence_transformer_spec_missing_model_colon(self): """Cover lines 60-61 via colon notation.""" with pytest.raises((ValueError, Exception)): NodeEncoder(model_name="st:") class TestNodeEncoderEmbeddingDim: """Tests for embedding_dim property.""" def test_embedding_dim_hash_provider(self): """Cover lines 136-137: embedding_dim for hash provider.""" encoder = NodeEncoder(model_name="hash:64") assert encoder.embedding_dim == 64 def test_embedding_dim_hash_default(self): """Cover lines 136-137: embedding_dim for hash provider (default dim).""" encoder = NodeEncoder(model_name="hash") # fallback_dim should be at least 32 assert encoder.embedding_dim >= 32 def test_embedding_dim_sentence_transformer(self): """Cover lines 139-141: embedding_dim when model is loaded.""" encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2") dim = encoder.embedding_dim assert dim > 0 def test_embedding_dim_when_model_none(self): """Cover line 143: embedding_dim returns fallback_dim when model returns None.""" from unittest.mock import patch encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2") with patch.object(encoder, "_load_model", return_value=None): dim = encoder.embedding_dim assert dim == encoder.fallback_dim if __name__ == "__main__": pytest.main([__file__, "-v"]) class TestNodeEncoderSentenceTransformersNotInstalled: def test_load_model_falls_back_to_hash_when_st_not_available(self): """Lines 105-106: _load_model sets provider to hash when sentence_transformers is missing.""" import importlib.util from unittest.mock import patch encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2") # Simulate sentence_transformers not being installed with patch.object(importlib.util, "find_spec", return_value=None): result = encoder._load_model() assert result is None assert encoder._provider == "hash"