Spaces:
Sleeping
Sleeping
File size: 995 Bytes
633bb91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from typing import List
from config import Config
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
class BAAIEmbedder(Embeddings):
def __init__(self):
self.model = SentenceTransformer(Config.EMBEDDING_MODEL_NAME)
self.batch_size = Config.BATCH_SIZE
def embed_documents(self, texts: List[str]) -> List[List[float]]:
return self.model.encode(texts, batch_size=self.batch_size, show_progress_bar=True, convert_to_numpy=True).tolist()
def embed_query(self, text: str) -> List[float]:
return self.model.encode(text, convert_to_numpy=True).tolist()
if __name__ == "__main__":
embedder = BAAIEmbedder()
sample_texts = ["LangChain is powerful", "Qdrant is great for vectors"]
embeddings = embedder.embed_documents(sample_texts)
print("### Sample embeddings (first 5 dims):")
for emb in embeddings:
print(emb[:5])
|