File size: 1,547 Bytes
dcde1ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import faiss # π Library: specialized for vector search
import numpy as np # π Library: handles numeric arrays efficiently
def build_faiss_index(embeddings: list):
"""
π Build a FAISS index from embeddings.
Args:
embeddings (list): List of embedding vectors (lists of floats).
Returns:
index (faiss.IndexFlatL2): FAISS index for similarity search.
"""
# Step 1: Get vector size (e.g., 384 for Hugging Face embeddings)
dimension = len(embeddings[0])
# Step 2: Create FAISS index that uses Euclidean (L2) distance
index = faiss.IndexFlatL2(dimension)
# Step 3: Add all embeddings to the index (convert to float32 array first)
index.add(np.array(embeddings).astype('float32'))
return index
def search_faiss(query_embedding: list, index, chunks: list, top_k: int = 3):
"""
π Search FAISS index to find most similar chunks.
Args:
query_embedding (list): Embedding for user query.
index: FAISS index object.
chunks (list): Original text chunks.
top_k (int): Number of results to return (default = 3).
Returns:
list: Top matching text chunks.
"""
# Step 1: Turn query embedding into shape (1, dim) numpy array
query_vector = np.array([query_embedding]).astype('float32')
# Step 2: Search FAISS for nearest neighbors
distances, indices = index.search(query_vector, top_k)
# Step 3: Map result indices back to actual text chunks
return [chunks[i] for i in indices[0]] |