Spaces:
Sleeping
Sleeping
File size: 1,199 Bytes
22cff0b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | #transforming sentence chunks from langchain into vectors usin faiss
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from config import EMBEDDING_MODEL
def load_embedding_model():
return SentenceTransformer(EMBEDDING_MODEL) #all-MiniLM-L6-v2 from config file, we can change it
def build_vectorstore(chunks):
if not chunks:
raise ValueError("Chunks list is empty.")
model = load_embedding_model()
embeddings = model.encode(chunks)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype("float32"))
return model, index
def retrieve_chunks(query, model, index, chunks, k): #k is number of chunks we want to extract, the more k, better the answer but slower the process
if index is None:
raise ValueError("FAISS index has not been built.")
query_embedding = model.encode([query])
distances, indices = index.search(
np.array(query_embedding).astype("float32"),
k
)
return [chunks[i] for i in indices[0]] #since we have only 1 query, get 0th item from list of indices [[chunk1, chunk2]] |