Spaces:

murkasad
/

DeepLearningRAGchatbot

Sleeping

Upload 10 files

22cff0b verified about 1 month ago

1.2 kB

	#transforming sentence chunks from langchain into vectors usin faiss

	import numpy as np
	import faiss
	from sentence_transformers import SentenceTransformer
	from config import EMBEDDING_MODEL

	def load_embedding_model():
	return SentenceTransformer(EMBEDDING_MODEL) #all-MiniLM-L6-v2 from config file, we can change it

	def build_vectorstore(chunks):
	if not chunks:
	raise ValueError("Chunks list is empty.")

	model = load_embedding_model()
	embeddings = model.encode(chunks)
	dimension = embeddings.shape[1]

	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings).astype("float32"))

	return model, index

	def retrieve_chunks(query, model, index, chunks, k): #k is number of chunks we want to extract, the more k, better the answer but slower the process
	if index is None:
	raise ValueError("FAISS index has not been built.")

	query_embedding = model.encode([query])
	distances, indices = index.search(
	np.array(query_embedding).astype("float32"),
	k
	)

	return [chunks[i] for i in indices[0]] #since we have only 1 query, get 0th item from list of indices [[chunk1, chunk2]]