Spaces:

SiddhJagani
/

Github

Sleeping

SiddhJagani commited on Nov 9, 2025

Commit

6af664c

verified ·

1 Parent(s): 9a7f6cc

Create embedder.py

Files changed (1) hide show

src/embedder.py ADDED Viewed

+# embedder.py
+"""
+Ultra-fast MLX embedder using ModernBERT (4-bit) — 1500+ it/s on M4
+100% local, no fallback needed.
+"""
+from typing import List
+import mlx.core as mx
+# Load the model (downloads ~200MB on first run)
+print("Loading ModernBERT Embed (MLX 4-bit)...")
+from mlx_embeddings import load, generate
+model, processor = load("mlx-community/nomicai-modernbert-embed-base-4bit")
+class LocalEmbedder:
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        # Generate embeddings (batched, GPU-accelerated)
+        output = generate(model, processor, texts=texts)
+        embeddings = output.text_embeds  # Shape: (len(texts), 768)
+        # Optional: Truncate to 256 dims for speed (Matryoshka)
+        # embeddings = embeddings[:, :256]
+        # Normalize for cosine similarity
+        norms = mx.sqrt(mx.sum(embeddings ** 2, axis=-1, keepdims=True))
+        normalized = embeddings / (norms + 1e-8)
+        return normalized.tolist()
+    def embed_query(self, text: str) -> List[float]:
+        return self.embed_documents([text])[0]
+# Global instance
+embedder = LocalEmbedder()
+print("ModernBERT Embed ready! (2000+ it/s expected)")