File size: 12,790 Bytes
3193174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
"""Tests for core/encoder.py — NodeEncoder."""

import sys
from pathlib import Path

import pytest
import torch

sys.path.insert(0, str(Path(__file__).parent.parent))

from core.encoder import NodeEncoder


class TestNodeEncoderCreation:
    """Tests for NodeEncoder creation."""

    def test_default_creation(self):
        """Creation with default parameters."""
        encoder = NodeEncoder()

        assert encoder is not None
        assert encoder.fallback_dim > 0

    def test_creation_with_model(self):
        """Creation with a specified model."""
        encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")

        assert encoder.model_name == "sentence-transformers/all-MiniLM-L6-v2"

    def test_creation_with_fallback_dim(self):
        """Creation with a specified fallback dimension."""
        encoder = NodeEncoder(fallback_dim=128)

        assert encoder.fallback_dim == 128


class TestHashEmbeddings:
    """Tests for hash embeddings (fallback)."""

    def test_hash_embedding_deterministic(self):
        """Hash embedding is deterministic."""
        encoder = NodeEncoder(model_name="hash:64")

        text = "test agent"
        emb1 = encoder.encode([text])
        emb2 = encoder.encode([text])

        assert torch.allclose(emb1, emb2)

    def test_hash_embedding_different_texts(self):
        """Different texts produce different embeddings."""
        encoder = NodeEncoder(model_name="hash:64")

        embs = encoder.encode(["agent one", "agent two"])

        assert not torch.allclose(embs[0], embs[1])

    def test_hash_embedding_dimension(self):
        """Hash embedding dimension."""
        encoder = NodeEncoder(model_name="hash:128")

        embs = encoder.encode(["test"])

        assert embs.shape == (1, 128)

    def test_hash_embedding_normalized(self):
        """Hash embedding is normalized."""
        encoder = NodeEncoder(model_name="hash:64")

        embs = encoder.encode(["test"])
        norm = torch.norm(embs[0]).item()

        assert abs(norm - 1.0) < 0.01  # Close to 1

    def test_hash_embedding_empty_string(self):
        """Hash embedding for an empty string."""
        encoder = NodeEncoder(model_name="hash:64")

        embs = encoder.encode([""])

        assert embs.shape == (1, 64)
        assert not torch.isnan(embs).any()


class TestSentenceTransformerEmbeddings:
    """Tests for sentence-transformer embeddings."""

    def test_encode_single_text(self):
        """Encoding a single text."""
        encoder = NodeEncoder()

        embs = encoder.encode(["Test agent description"])

        assert isinstance(embs, torch.Tensor)
        assert embs.dim() == 2
        assert embs.shape[0] == 1
        assert embs.shape[1] > 0

    def test_encode_batch(self):
        """Encoding a batch of texts."""
        encoder = NodeEncoder()

        texts = ["Agent one", "Agent two", "Agent three"]
        embs = encoder.encode(texts)

        assert isinstance(embs, torch.Tensor)
        assert embs.shape[0] == 3

    def test_encode_empty_batch(self):
        """Encoding an empty batch."""
        encoder = NodeEncoder()

        embs = encoder.encode([])

        assert embs.shape[0] == 0

    def test_fallback_when_st_unavailable(self):
        """Fallback to hash when ST is unavailable."""
        encoder = NodeEncoder(model_name="hash:64")

        embs = encoder.encode(["test"])

        assert embs.shape == (1, 64)


class TestAgentProfileEncoding:
    """Tests for agent profile encoding."""

    def test_encode_agent_profile(self):
        """Encoding an agent profile."""
        from core.agent import AgentProfile

        encoder = NodeEncoder()

        profile = AgentProfile(
            agent_id="test_agent",
            display_name="Researcher",
            persona="Finds and analyzes information",
        )

        embs = encoder.encode([profile.to_text()])

        assert isinstance(embs, torch.Tensor)
        assert embs.dim() == 2
        assert embs.shape[0] == 1

    def test_encode_minimal_profile(self):
        """Encoding a minimal profile."""
        from core.agent import AgentProfile

        encoder = NodeEncoder()

        profile = AgentProfile(agent_id="minimal", display_name="minimal")

        embs = encoder.encode([profile.to_text()])

        assert isinstance(embs, torch.Tensor)

    def test_encode_profiles_batch(self):
        """Encoding a batch of profiles."""
        from core.agent import AgentProfile

        encoder = NodeEncoder()

        profiles = [
            AgentProfile(agent_id="a", display_name="Role A"),
            AgentProfile(agent_id="b", display_name="Role B"),
        ]

        texts = [p.to_text() for p in profiles]
        embs = encoder.encode(texts)

        assert embs.shape[0] == 2


class TestConsistency:
    """Tests for encoder consistency."""

    def test_same_input_same_output(self):
        """Same input produces same output."""
        encoder = NodeEncoder()

        text = "consistent input"
        emb1 = encoder.encode([text])
        emb2 = encoder.encode([text])

        assert torch.allclose(emb1, emb2, atol=1e-6)

    def test_similar_texts_close_embeddings(self):
        """Similar texts have close embeddings."""
        encoder = NodeEncoder()

        embs = encoder.encode(
            [
                "This is a researcher agent",
                "This is a research agent",
                "This is a completely different unrelated text about cats",
            ]
        )

        # Cosine similarity
        sim_12 = torch.cosine_similarity(embs[0].unsqueeze(0), embs[1].unsqueeze(0)).item()
        sim_13 = torch.cosine_similarity(embs[0].unsqueeze(0), embs[2].unsqueeze(0)).item()

        # Similar texts should have higher similarity
        assert sim_12 > sim_13

    def test_dimension_consistency(self):
        """Dimension consistency."""
        encoder = NodeEncoder()

        texts = ["short", "medium length text", "a very long text " * 100]

        dims = set()
        embs = encoder.encode(texts)
        for i in range(len(texts)):
            dims.add(embs[i].shape[0])

        # All should have same dimension
        assert len(dims) == 1


class TestEdgeCases:
    """Tests for edge cases."""

    def test_unicode_text(self):
        """Unicode text."""
        encoder = NodeEncoder()

        embs = encoder.encode(["Test agent with unicode 日本語"])

        assert isinstance(embs, torch.Tensor)
        assert not torch.isnan(embs).any()

    def test_special_characters(self):
        """Special characters."""
        encoder = NodeEncoder()

        embs = encoder.encode(["Agent with special chars: !@#$%^&*()"])

        assert isinstance(embs, torch.Tensor)
        assert not torch.isnan(embs).any()

    def test_very_long_text(self):
        """Very long text."""
        encoder = NodeEncoder()

        long_text = "word " * 10000
        embs = encoder.encode([long_text])

        assert isinstance(embs, torch.Tensor)
        assert not torch.isnan(embs).any()

    def test_whitespace_only(self):
        """Whitespace only."""
        encoder = NodeEncoder()

        embs = encoder.encode(["   \t\n   "])

        assert isinstance(embs, torch.Tensor)

    def test_numbers_only(self):
        """Numbers only."""
        encoder = NodeEncoder()

        embs = encoder.encode(["12345 67890"])

        assert isinstance(embs, torch.Tensor)


class TestGraphIntegration:
    """Tests for graph integration."""

    def test_encode_graph_agents(self):
        """Encoding graph agents."""
        from core.agent import AgentProfile

        encoder = NodeEncoder()

        agents = [
            AgentProfile(
                agent_id="coordinator",
                display_name="Coordinator",
                persona="Manages workflow",
            ),
            AgentProfile(agent_id="researcher", display_name="Researcher", persona="Finds information"),
            AgentProfile(agent_id="writer", display_name="Writer", persona="Creates content"),
        ]

        texts = [a.to_text() for a in agents]
        embeddings = encoder.encode(texts)

        assert embeddings.shape[0] == 3
        # All unique agents should have different embeddings
        assert not torch.allclose(embeddings[0], embeddings[1])
        assert not torch.allclose(embeddings[1], embeddings[2])


class TestNodeEncoderValidationErrors:
    """Tests for validation errors in NodeEncoder creation."""

    def test_hash_prefix_non_numeric_dim(self):
        """Cover lines 48-49: hash: prefix with non-numeric dimension."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="hash:abc")

    def test_hash_prefix_zero_dim(self):
        """Cover lines 50-52: hash: prefix with zero dimension."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="hash:0")

    def test_hash_prefix_negative_dim(self):
        """Cover lines 50-52: hash: prefix with negative dimension (non-digit actually)."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="hash:-5")

    def test_sentence_transformer_missing_model_id(self):
        """Cover lines 60-61: sentence-transformers: prefix without model identifier."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="sentence-transformers/")

    def test_unsupported_model_name(self):
        """Cover lines 63-64: unsupported model name raises ValueError."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="totally-unsupported-model")

    def test_sentence_transformer_colon_style_missing_model(self):
        """Cover lines 60-61: sentence-transformers:  without model."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="sentence-transformers:")

    def test_load_model_returns_none_for_hash_provider(self):
        """Cover line 99: _load_model returns None when provider is hash."""
        encoder = NodeEncoder(model_name="hash:64")
        result = encoder._load_model()
        assert result is None

    def test_encode_with_st_model_none_uses_hash_fallback(self):
        """Cover line 87: model is None → uses hash fallback."""
        from unittest.mock import patch

        encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
        # Force _load_model to return None
        with patch.object(encoder, "_load_model", return_value=None):
            embs = encoder.encode(["test"])
        assert embs.shape[0] == 1

    def test_sentence_transformer_spec_missing_model_colon(self):
        """Cover lines 60-61 via colon notation."""
        with pytest.raises((ValueError, Exception)):
            NodeEncoder(model_name="st:")


class TestNodeEncoderEmbeddingDim:
    """Tests for embedding_dim property."""

    def test_embedding_dim_hash_provider(self):
        """Cover lines 136-137: embedding_dim for hash provider."""
        encoder = NodeEncoder(model_name="hash:64")
        assert encoder.embedding_dim == 64

    def test_embedding_dim_hash_default(self):
        """Cover lines 136-137: embedding_dim for hash provider (default dim)."""
        encoder = NodeEncoder(model_name="hash")
        # fallback_dim should be at least 32
        assert encoder.embedding_dim >= 32

    def test_embedding_dim_sentence_transformer(self):
        """Cover lines 139-141: embedding_dim when model is loaded."""
        encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
        dim = encoder.embedding_dim
        assert dim > 0

    def test_embedding_dim_when_model_none(self):
        """Cover line 143: embedding_dim returns fallback_dim when model returns None."""
        from unittest.mock import patch

        encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
        with patch.object(encoder, "_load_model", return_value=None):
            dim = encoder.embedding_dim
        assert dim == encoder.fallback_dim


if __name__ == "__main__":
    pytest.main([__file__, "-v"])


class TestNodeEncoderSentenceTransformersNotInstalled:
    def test_load_model_falls_back_to_hash_when_st_not_available(self):
        """Lines 105-106: _load_model sets provider to hash when sentence_transformers is missing."""
        import importlib.util
        from unittest.mock import patch

        encoder = NodeEncoder(model_name="sentence-transformers/all-MiniLM-L6-v2")
        # Simulate sentence_transformers not being installed
        with patch.object(importlib.util, "find_spec", return_value=None):
            result = encoder._load_model()
        assert result is None
        assert encoder._provider == "hash"