File size: 1,246 Bytes
0de90e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pytest
from mediastorm.vectorize.embedder import Embedder


@pytest.fixture(scope="module")
def embedder():
    """Shared embedder instance (model loading is slow)."""
    return Embedder()


def test_embed_returns_384_dimensions(embedder):
    """Embedding should produce 384-dimensional vectors."""
    vectors = embedder.embed_texts(["Hello world"])
    assert len(vectors) == 1
    assert len(vectors[0]) == 384


def test_embed_batch_consistency(embedder):
    """Same text should produce same vector."""
    v1 = embedder.embed_texts(["The cat sat on the mat"])
    v2 = embedder.embed_texts(["The cat sat on the mat"])
    assert v1[0] == pytest.approx(v2[0], abs=1e-5)


def test_embed_multiple_texts(embedder):
    """Batch embedding should return one vector per text."""
    texts = ["First sentence.", "Second sentence.", "Third sentence."]
    vectors = embedder.embed_texts(texts)
    assert len(vectors) == 3
    assert all(len(v) == 384 for v in vectors)


def test_embed_vectors_are_normalized(embedder):
    """Embeddings should be L2 normalized (unit vectors)."""
    vectors = embedder.embed_texts(["Test normalization"])
    norm = sum(x ** 2 for x in vectors[0]) ** 0.5
    assert norm == pytest.approx(1.0, abs=1e-4)