|
|
import faiss |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
def build_faiss_index(embeddings: list): |
|
|
""" |
|
|
π Build a FAISS index from embeddings. |
|
|
|
|
|
Args: |
|
|
embeddings (list): List of embedding vectors (lists of floats). |
|
|
|
|
|
Returns: |
|
|
index (faiss.IndexFlatL2): FAISS index for similarity search. |
|
|
""" |
|
|
|
|
|
dimension = len(embeddings[0]) |
|
|
|
|
|
|
|
|
index = faiss.IndexFlatL2(dimension) |
|
|
|
|
|
|
|
|
index.add(np.array(embeddings).astype('float32')) |
|
|
|
|
|
return index |
|
|
|
|
|
|
|
|
def search_faiss(query_embedding: list, index, chunks: list, top_k: int = 3): |
|
|
""" |
|
|
π Search FAISS index to find most similar chunks. |
|
|
|
|
|
Args: |
|
|
query_embedding (list): Embedding for user query. |
|
|
index: FAISS index object. |
|
|
chunks (list): Original text chunks. |
|
|
top_k (int): Number of results to return (default = 3). |
|
|
|
|
|
Returns: |
|
|
list: Top matching text chunks. |
|
|
""" |
|
|
|
|
|
query_vector = np.array([query_embedding]).astype('float32') |
|
|
|
|
|
|
|
|
distances, indices = index.search(query_vector, top_k) |
|
|
|
|
|
|
|
|
return [chunks[i] for i in indices[0]] |