File size: 995 Bytes
633bb91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from typing import List
from config import Config
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer


class BAAIEmbedder(Embeddings):
    def __init__(self):
        self.model = SentenceTransformer(Config.EMBEDDING_MODEL_NAME)
        self.batch_size = Config.BATCH_SIZE

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return self.model.encode(texts, batch_size=self.batch_size, show_progress_bar=True, convert_to_numpy=True).tolist()

    def embed_query(self, text: str) -> List[float]:
        return self.model.encode(text, convert_to_numpy=True).tolist()


if __name__ == "__main__":
    embedder = BAAIEmbedder()
    sample_texts = ["LangChain is powerful", "Qdrant is great for vectors"]
    embeddings = embedder.embed_documents(sample_texts)
    print("### Sample embeddings (first 5 dims):")
    for emb in embeddings:
        print(emb[:5])