Shubham170793's picture
Create vectorstore.py
dcde1ea verified
raw
history blame
1.55 kB
import faiss # πŸ“Œ Library: specialized for vector search
import numpy as np # πŸ“Œ Library: handles numeric arrays efficiently
def build_faiss_index(embeddings: list):
"""
πŸ“Œ Build a FAISS index from embeddings.
Args:
embeddings (list): List of embedding vectors (lists of floats).
Returns:
index (faiss.IndexFlatL2): FAISS index for similarity search.
"""
# Step 1: Get vector size (e.g., 384 for Hugging Face embeddings)
dimension = len(embeddings[0])
# Step 2: Create FAISS index that uses Euclidean (L2) distance
index = faiss.IndexFlatL2(dimension)
# Step 3: Add all embeddings to the index (convert to float32 array first)
index.add(np.array(embeddings).astype('float32'))
return index
def search_faiss(query_embedding: list, index, chunks: list, top_k: int = 3):
"""
πŸ“Œ Search FAISS index to find most similar chunks.
Args:
query_embedding (list): Embedding for user query.
index: FAISS index object.
chunks (list): Original text chunks.
top_k (int): Number of results to return (default = 3).
Returns:
list: Top matching text chunks.
"""
# Step 1: Turn query embedding into shape (1, dim) numpy array
query_vector = np.array([query_embedding]).astype('float32')
# Step 2: Search FAISS for nearest neighbors
distances, indices = index.search(query_vector, top_k)
# Step 3: Map result indices back to actual text chunks
return [chunks[i] for i in indices[0]]