Spaces:
Sleeping
Sleeping
| import pytest | |
| from mediastorm.vectorize.embedder import Embedder | |
| def embedder(): | |
| """Shared embedder instance (model loading is slow).""" | |
| return Embedder() | |
| def test_embed_returns_384_dimensions(embedder): | |
| """Embedding should produce 384-dimensional vectors.""" | |
| vectors = embedder.embed_texts(["Hello world"]) | |
| assert len(vectors) == 1 | |
| assert len(vectors[0]) == 384 | |
| def test_embed_batch_consistency(embedder): | |
| """Same text should produce same vector.""" | |
| v1 = embedder.embed_texts(["The cat sat on the mat"]) | |
| v2 = embedder.embed_texts(["The cat sat on the mat"]) | |
| assert v1[0] == pytest.approx(v2[0], abs=1e-5) | |
| def test_embed_multiple_texts(embedder): | |
| """Batch embedding should return one vector per text.""" | |
| texts = ["First sentence.", "Second sentence.", "Third sentence."] | |
| vectors = embedder.embed_texts(texts) | |
| assert len(vectors) == 3 | |
| assert all(len(v) == 384 for v in vectors) | |
| def test_embed_vectors_are_normalized(embedder): | |
| """Embeddings should be L2 normalized (unit vectors).""" | |
| vectors = embedder.embed_texts(["Test normalization"]) | |
| norm = sum(x ** 2 for x in vectors[0]) ** 0.5 | |
| assert norm == pytest.approx(1.0, abs=1e-4) | |