# Loading embeddings from storage import os from pathlib import Path from huggingface_hub import hf_hub_download from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings # download it at the data directory data_path = os.path.join(Path(os.getcwd()).parent, "data") # make the faiss local folder local_folder = os.path.join(data_path, 'faiss_index') def download_faiss_index(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"): os.makedirs(local_folder, exist_ok=True) index_faiss_path = os.path.join(local_folder, "index.faiss") index_pkl_path = os.path.join(local_folder, "index.pkl") if not os.path.exists(index_faiss_path): print("Downloading index.faiss from Hugging Face Dataset...") hf_hub_download( repo_id=repo_id, filename="index.faiss", repo_type="dataset", local_dir=local_folder, local_dir_use_symlinks=False, ) if not os.path.exists(index_pkl_path): print("Downloading index.pkl from Hugging Face Dataset...") hf_hub_download( repo_id=repo_id, filename="index.pkl", repo_type="dataset", local_dir=local_folder, local_dir_use_symlinks=False, ) def load_vectorstore(index_path="faiss_index"): embedding_model = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) db = FAISS.load_local( index_path, embeddings=embedding_model, allow_dangerous_deserialization=True ) return db # download and load vectorstore def get_vectorstore(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"): download_faiss_index(repo_id=repo_id, local_folder=local_folder) return load_vectorstore(index_path=local_folder)