import pytest from mediastorm.vectorize.embedder import Embedder @pytest.fixture(scope="module") def embedder(): """Shared embedder instance (model loading is slow).""" return Embedder() def test_embed_returns_384_dimensions(embedder): """Embedding should produce 384-dimensional vectors.""" vectors = embedder.embed_texts(["Hello world"]) assert len(vectors) == 1 assert len(vectors[0]) == 384 def test_embed_batch_consistency(embedder): """Same text should produce same vector.""" v1 = embedder.embed_texts(["The cat sat on the mat"]) v2 = embedder.embed_texts(["The cat sat on the mat"]) assert v1[0] == pytest.approx(v2[0], abs=1e-5) def test_embed_multiple_texts(embedder): """Batch embedding should return one vector per text.""" texts = ["First sentence.", "Second sentence.", "Third sentence."] vectors = embedder.embed_texts(texts) assert len(vectors) == 3 assert all(len(v) == 384 for v in vectors) def test_embed_vectors_are_normalized(embedder): """Embeddings should be L2 normalized (unit vectors).""" vectors = embedder.embed_texts(["Test normalization"]) norm = sum(x ** 2 for x in vectors[0]) ** 0.5 assert norm == pytest.approx(1.0, abs=1e-4)