gMAS / tests /test_encoder.py
Артём Боярских
chore: initial commit
3193174
"""Tests for core/encoder.py — NodeEncoder."""
import sys
from pathlib import Path
import pytest
import torch
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.encoder import NodeEncoder
class TestNodeEncoderCreation:
"""Tests for NodeEncoder creation."""
def test_default_creation(self):
"""Creation with default parameters."""
encoder = NodeEncoder()
assert encoder is not None
assert encoder.fallback_dim > 0
def test_creation_with_model(self):
"""Creation with a specified model."""
encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
assert encoder.model_name == "sentence-transformers/all-MiniLM-L6-v2"
def test_creation_with_fallback_dim(self):
"""Creation with a specified fallback dimension."""
encoder = NodeEncoder(fallback_dim=128)
assert encoder.fallback_dim == 128
class TestHashEmbeddings:
"""Tests for hash embeddings (fallback)."""
def test_hash_embedding_deterministic(self):
"""Hash embedding is deterministic."""
encoder = NodeEncoder(model_name="hash:64")
text = "test agent"
emb1 = encoder.encode([text])
emb2 = encoder.encode([text])
assert torch.allclose(emb1, emb2)
def test_hash_embedding_different_texts(self):
"""Different texts produce different embeddings."""
encoder = NodeEncoder(model_name="hash:64")
embs = encoder.encode(["agent one", "agent two"])
assert not torch.allclose(embs[0], embs[1])
def test_hash_embedding_dimension(self):
"""Hash embedding dimension."""
encoder = NodeEncoder(model_name="hash:128")
embs = encoder.encode(["test"])
assert embs.shape == (1, 128)
def test_hash_embedding_normalized(self):
"""Hash embedding is normalized."""
encoder = NodeEncoder(model_name="hash:64")
embs = encoder.encode(["test"])
norm = torch.norm(embs[0]).item()
assert abs(norm - 1.0) < 0.01 # Close to 1
def test_hash_embedding_empty_string(self):
"""Hash embedding for an empty string."""
encoder = NodeEncoder(model_name="hash:64")
embs = encoder.encode([""])
assert embs.shape == (1, 64)
assert not torch.isnan(embs).any()
class TestSentenceTransformerEmbeddings:
"""Tests for sentence-transformer embeddings."""
def test_encode_single_text(self):
"""Encoding a single text."""
encoder = NodeEncoder()
embs = encoder.encode(["Test agent description"])
assert isinstance(embs, torch.Tensor)
assert embs.dim() == 2
assert embs.shape[0] == 1
assert embs.shape[1] > 0
def test_encode_batch(self):
"""Encoding a batch of texts."""
encoder = NodeEncoder()
texts = ["Agent one", "Agent two", "Agent three"]
embs = encoder.encode(texts)
assert isinstance(embs, torch.Tensor)
assert embs.shape[0] == 3
def test_encode_empty_batch(self):
"""Encoding an empty batch."""
encoder = NodeEncoder()
embs = encoder.encode([])
assert embs.shape[0] == 0
def test_fallback_when_st_unavailable(self):
"""Fallback to hash when ST is unavailable."""
encoder = NodeEncoder(model_name="hash:64")
embs = encoder.encode(["test"])
assert embs.shape == (1, 64)
class TestAgentProfileEncoding:
"""Tests for agent profile encoding."""
def test_encode_agent_profile(self):
"""Encoding an agent profile."""
from core.agent import AgentProfile
encoder = NodeEncoder()
profile = AgentProfile(
agent_id="test_agent",
display_name="Researcher",
persona="Finds and analyzes information",
)
embs = encoder.encode([profile.to_text()])
assert isinstance(embs, torch.Tensor)
assert embs.dim() == 2
assert embs.shape[0] == 1
def test_encode_minimal_profile(self):
"""Encoding a minimal profile."""
from core.agent import AgentProfile
encoder = NodeEncoder()
profile = AgentProfile(agent_id="minimal", display_name="minimal")
embs = encoder.encode([profile.to_text()])
assert isinstance(embs, torch.Tensor)
def test_encode_profiles_batch(self):
"""Encoding a batch of profiles."""
from core.agent import AgentProfile
encoder = NodeEncoder()
profiles = [
AgentProfile(agent_id="a", display_name="Role A"),
AgentProfile(agent_id="b", display_name="Role B"),
]
texts = [p.to_text() for p in profiles]
embs = encoder.encode(texts)
assert embs.shape[0] == 2
class TestConsistency:
"""Tests for encoder consistency."""
def test_same_input_same_output(self):
"""Same input produces same output."""
encoder = NodeEncoder()
text = "consistent input"
emb1 = encoder.encode([text])
emb2 = encoder.encode([text])
assert torch.allclose(emb1, emb2, atol=1e-6)
def test_similar_texts_close_embeddings(self):
"""Similar texts have close embeddings."""
encoder = NodeEncoder()
embs = encoder.encode(
[
"This is a researcher agent",
"This is a research agent",
"This is a completely different unrelated text about cats",
]
)
# Cosine similarity
sim_12 = torch.cosine_similarity(embs[0].unsqueeze(0), embs[1].unsqueeze(0)).item()
sim_13 = torch.cosine_similarity(embs[0].unsqueeze(0), embs[2].unsqueeze(0)).item()
# Similar texts should have higher similarity
assert sim_12 > sim_13
def test_dimension_consistency(self):
"""Dimension consistency."""
encoder = NodeEncoder()
texts = ["short", "medium length text", "a very long text " * 100]
dims = set()
embs = encoder.encode(texts)
for i in range(len(texts)):
dims.add(embs[i].shape[0])
# All should have same dimension
assert len(dims) == 1
class TestEdgeCases:
"""Tests for edge cases."""
def test_unicode_text(self):
"""Unicode text."""
encoder = NodeEncoder()
embs = encoder.encode(["Test agent with unicode 日本語"])
assert isinstance(embs, torch.Tensor)
assert not torch.isnan(embs).any()
def test_special_characters(self):
"""Special characters."""
encoder = NodeEncoder()
embs = encoder.encode(["Agent with special chars: !@#$%^&*()"])
assert isinstance(embs, torch.Tensor)
assert not torch.isnan(embs).any()
def test_very_long_text(self):
"""Very long text."""
encoder = NodeEncoder()
long_text = "word " * 10000
embs = encoder.encode([long_text])
assert isinstance(embs, torch.Tensor)
assert not torch.isnan(embs).any()
def test_whitespace_only(self):
"""Whitespace only."""
encoder = NodeEncoder()
embs = encoder.encode([" \t\n "])
assert isinstance(embs, torch.Tensor)
def test_numbers_only(self):
"""Numbers only."""
encoder = NodeEncoder()
embs = encoder.encode(["12345 67890"])
assert isinstance(embs, torch.Tensor)
class TestGraphIntegration:
"""Tests for graph integration."""
def test_encode_graph_agents(self):
"""Encoding graph agents."""
from core.agent import AgentProfile
encoder = NodeEncoder()
agents = [
AgentProfile(
agent_id="coordinator",
display_name="Coordinator",
persona="Manages workflow",
),
AgentProfile(agent_id="researcher", display_name="Researcher", persona="Finds information"),
AgentProfile(agent_id="writer", display_name="Writer", persona="Creates content"),
]
texts = [a.to_text() for a in agents]
embeddings = encoder.encode(texts)
assert embeddings.shape[0] == 3
# All unique agents should have different embeddings
assert not torch.allclose(embeddings[0], embeddings[1])
assert not torch.allclose(embeddings[1], embeddings[2])
class TestNodeEncoderValidationErrors:
"""Tests for validation errors in NodeEncoder creation."""
def test_hash_prefix_non_numeric_dim(self):
"""Cover lines 48-49: hash: prefix with non-numeric dimension."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="hash:abc")
def test_hash_prefix_zero_dim(self):
"""Cover lines 50-52: hash: prefix with zero dimension."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="hash:0")
def test_hash_prefix_negative_dim(self):
"""Cover lines 50-52: hash: prefix with negative dimension (non-digit actually)."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="hash:-5")
def test_sentence_transformer_missing_model_id(self):
"""Cover lines 60-61: sentence-transformers: prefix without model identifier."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="sentence-transformers/")
def test_unsupported_model_name(self):
"""Cover lines 63-64: unsupported model name raises ValueError."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="totally-unsupported-model")
def test_sentence_transformer_colon_style_missing_model(self):
"""Cover lines 60-61: sentence-transformers: without model."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="sentence-transformers:")
def test_load_model_returns_none_for_hash_provider(self):
"""Cover line 99: _load_model returns None when provider is hash."""
encoder = NodeEncoder(model_name="hash:64")
result = encoder._load_model()
assert result is None
def test_encode_with_st_model_none_uses_hash_fallback(self):
"""Cover line 87: model is None → uses hash fallback."""
from unittest.mock import patch
encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Force _load_model to return None
with patch.object(encoder, "_load_model", return_value=None):
embs = encoder.encode(["test"])
assert embs.shape[0] == 1
def test_sentence_transformer_spec_missing_model_colon(self):
"""Cover lines 60-61 via colon notation."""
with pytest.raises((ValueError, Exception)):
NodeEncoder(model_name="st:")
class TestNodeEncoderEmbeddingDim:
"""Tests for embedding_dim property."""
def test_embedding_dim_hash_provider(self):
"""Cover lines 136-137: embedding_dim for hash provider."""
encoder = NodeEncoder(model_name="hash:64")
assert encoder.embedding_dim == 64
def test_embedding_dim_hash_default(self):
"""Cover lines 136-137: embedding_dim for hash provider (default dim)."""
encoder = NodeEncoder(model_name="hash")
# fallback_dim should be at least 32
assert encoder.embedding_dim >= 32
def test_embedding_dim_sentence_transformer(self):
"""Cover lines 139-141: embedding_dim when model is loaded."""
encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
dim = encoder.embedding_dim
assert dim > 0
def test_embedding_dim_when_model_none(self):
"""Cover line 143: embedding_dim returns fallback_dim when model returns None."""
from unittest.mock import patch
encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
with patch.object(encoder, "_load_model", return_value=None):
dim = encoder.embedding_dim
assert dim == encoder.fallback_dim
if __name__ == "__main__":
pytest.main([__file__, "-v"])
class TestNodeEncoderSentenceTransformersNotInstalled:
def test_load_model_falls_back_to_hash_when_st_not_available(self):
"""Lines 105-106: _load_model sets provider to hash when sentence_transformers is missing."""
import importlib.util
from unittest.mock import patch
encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Simulate sentence_transformers not being installed
with patch.object(importlib.util, "find_spec", return_value=None):
result = encoder._load_model()
assert result is None
assert encoder._provider == "hash"