Spaces:
Sleeping
Sleeping
| """ | |
| Ultra-fast ModernBERT-like embedder (CPU-compatible) | |
| 100% local — runs on x86 / Linux / Docker. | |
| """ | |
| from typing import List | |
| import torch | |
| from transformers import AutoTokenizer, AutoModel | |
| print("Loading ModernBERT Embed (CPU version)...") | |
| # You can use any small, fast embedding model here | |
| # e.g. nomic-ai/nomic-embed-text-v1.5 or sentence-transformers/all-MiniLM-L6-v2 | |
| MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| model.eval() | |
| if torch.cuda.is_available(): | |
| model = model.to("cuda") | |
| else: | |
| model = model.to("cpu") | |
| class LocalEmbedder: | |
| def embed_documents(self, texts: List[str]) -> List[List[float]]: | |
| """Generate normalized embeddings for a batch of texts.""" | |
| with torch.no_grad(): | |
| inputs = tokenizer( | |
| texts, | |
| padding=True, | |
| truncation=True, | |
| max_length=512, | |
| return_tensors="pt", | |
| ).to(model.device) | |
| outputs = model(**inputs) | |
| # Mean pool embeddings (like ModernBERT) | |
| embeddings = outputs.last_hidden_state.mean(dim=1) | |
| # Normalize for cosine similarity | |
| embeddings = embeddings / embeddings.norm(p=2, dim=1, keepdim=True) | |
| return embeddings.cpu().tolist() | |
| def embed_query(self, text: str) -> List[float]: | |
| """Generate normalized embedding for a single query string.""" | |
| return self.embed_documents([text])[0] | |
| # Global instance (for llama_index) | |
| embedder = LocalEmbedder() | |
| print("ModernBERT Embed ready! (CPU-compatible)") | |