Github / src /embedder.py
SiddhJagani's picture
Update src/embedder.py
50cad82 verified
"""
Ultra-fast ModernBERT-like embedder (CPU-compatible)
100% local — runs on x86 / Linux / Docker.
"""
from typing import List
import torch
from transformers import AutoTokenizer, AutoModel
print("Loading ModernBERT Embed (CPU version)...")
# You can use any small, fast embedding model here
# e.g. nomic-ai/nomic-embed-text-v1.5 or sentence-transformers/all-MiniLM-L6-v2
MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True)
model.eval()
if torch.cuda.is_available():
model = model.to("cuda")
else:
model = model.to("cpu")
class LocalEmbedder:
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Generate normalized embeddings for a batch of texts."""
with torch.no_grad():
inputs = tokenizer(
texts,
padding=True,
truncation=True,
max_length=512,
return_tensors="pt",
).to(model.device)
outputs = model(**inputs)
# Mean pool embeddings (like ModernBERT)
embeddings = outputs.last_hidden_state.mean(dim=1)
# Normalize for cosine similarity
embeddings = embeddings / embeddings.norm(p=2, dim=1, keepdim=True)
return embeddings.cpu().tolist()
def embed_query(self, text: str) -> List[float]:
"""Generate normalized embedding for a single query string."""
return self.embed_documents([text])[0]
# Global instance (for llama_index)
embedder = LocalEmbedder()
print("ModernBERT Embed ready! (CPU-compatible)")