File size: 2,376 Bytes
7e85729 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import os
import uuid
import faiss
import shutil
import logging
import pandas as pd
from typing import Any
from langchain_core import documents
from langchain_community import embeddings
from langchain_community import vectorstores
from langchain_community.docstore import in_memory
DEFAULT_INDEX_QUERY = "hello world"
def build_faiss(
data_frame: pd.DataFrame,
index_path: str,
embedder: Any
) -> vectorstores.FAISS:
"""Build a FAISS index from a DataFrame.
Args:
data_frame: DataFrame containing data to index
index_path: Path where to save the FAISS index
embedder: Embedder object to generate vectors
Returns:
vectorstores.FAISS: Built FAISS vectorstore object
"""
embedded_documents = []
for row_idx, row in data_frame.iterrows():
for col_name, cell_val in row.items():
embedded_documents.append(documents.Document(
page_content=str(cell_val),
metadata={"row": row_idx, "column": col_name},
))
if os.path.exists(index_path):
shutil.rmtree(index_path, ignore_errors=True)
logging.debug(f"Deleted existing FAISS index at {index_path}")
vectorstore = vectorstores.FAISS(
embedding_function=embedder,
index=faiss.IndexFlatIP(len(embedder.embed_query(DEFAULT_INDEX_QUERY))),
docstore=in_memory.InMemoryDocstore(),
index_to_docstore_id={},
)
uuids = [str(uuid.uuid4()) for _ in range(len(embedded_documents))]
vectorstore.add_documents(documents=embedded_documents, ids=uuids)
logging.debug(f"Added {len(embedded_documents)} documents to FAISS index")
os.makedirs(index_path, exist_ok=True)
vectorstore.save_local(index_path)
logging.debug(f"FAISS index saved to ./{index_path}/")
return vectorstore
def load_faiss_index(
index_path: str,
hf_model_name: str
) -> vectorstores.FAISS:
"""Load a previously saved FAISS index.
Args:
index_path: Path of the saved FAISS index
hf_model_name: Name of the HuggingFace model for embeddings
Returns:
vectorstores.FAISS: Loaded FAISS vectorstore object
"""
embedder = embeddings.HuggingFaceEmbeddings(model_name=hf_model_name)
return vectorstores.FAISS.load_local(index_path, embedder, allow_dangerous_deserialization=True)
|