Spaces:
Sleeping
Sleeping
Create embedder.py
Browse files- src/embedder.py +33 -0
src/embedder.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# embedder.py
|
| 2 |
+
"""
|
| 3 |
+
Ultra-fast MLX embedder using ModernBERT (4-bit) — 1500+ it/s on M4
|
| 4 |
+
100% local, no fallback needed.
|
| 5 |
+
"""
|
| 6 |
+
from typing import List
|
| 7 |
+
import mlx.core as mx
|
| 8 |
+
|
| 9 |
+
# Load the model (downloads ~200MB on first run)
|
| 10 |
+
print("Loading ModernBERT Embed (MLX 4-bit)...")
|
| 11 |
+
from mlx_embeddings import load, generate
|
| 12 |
+
model, processor = load("mlx-community/nomicai-modernbert-embed-base-4bit")
|
| 13 |
+
|
| 14 |
+
class LocalEmbedder:
|
| 15 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
| 16 |
+
# Generate embeddings (batched, GPU-accelerated)
|
| 17 |
+
output = generate(model, processor, texts=texts)
|
| 18 |
+
embeddings = output.text_embeds # Shape: (len(texts), 768)
|
| 19 |
+
|
| 20 |
+
# Optional: Truncate to 256 dims for speed (Matryoshka)
|
| 21 |
+
# embeddings = embeddings[:, :256]
|
| 22 |
+
|
| 23 |
+
# Normalize for cosine similarity
|
| 24 |
+
norms = mx.sqrt(mx.sum(embeddings ** 2, axis=-1, keepdims=True))
|
| 25 |
+
normalized = embeddings / (norms + 1e-8)
|
| 26 |
+
return normalized.tolist()
|
| 27 |
+
|
| 28 |
+
def embed_query(self, text: str) -> List[float]:
|
| 29 |
+
return self.embed_documents([text])[0]
|
| 30 |
+
|
| 31 |
+
# Global instance
|
| 32 |
+
embedder = LocalEmbedder()
|
| 33 |
+
print("ModernBERT Embed ready! (2000+ it/s expected)")
|