Spaces:
Running
Running
| # Loading embeddings from storage | |
| import os | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| # download it at the data directory | |
| data_path = os.path.join(Path(os.getcwd()).parent, "data") | |
| # make the faiss local folder | |
| local_folder = os.path.join(data_path, 'faiss_index') | |
| def download_faiss_index(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"): | |
| os.makedirs(local_folder, exist_ok=True) | |
| index_faiss_path = os.path.join(local_folder, "index.faiss") | |
| index_pkl_path = os.path.join(local_folder, "index.pkl") | |
| if not os.path.exists(index_faiss_path): | |
| print("Downloading index.faiss from Hugging Face Dataset...") | |
| hf_hub_download( | |
| repo_id=repo_id, | |
| filename="index.faiss", | |
| repo_type="dataset", | |
| local_dir=local_folder, | |
| local_dir_use_symlinks=False, | |
| ) | |
| if not os.path.exists(index_pkl_path): | |
| print("Downloading index.pkl from Hugging Face Dataset...") | |
| hf_hub_download( | |
| repo_id=repo_id, | |
| filename="index.pkl", | |
| repo_type="dataset", | |
| local_dir=local_folder, | |
| local_dir_use_symlinks=False, | |
| ) | |
| def load_vectorstore(index_path="faiss_index"): | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| db = FAISS.load_local( | |
| index_path, | |
| embeddings=embedding_model, | |
| allow_dangerous_deserialization=True | |
| ) | |
| return db | |
| # download and load vectorstore | |
| def get_vectorstore(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"): | |
| download_faiss_index(repo_id=repo_id, local_folder=local_folder) | |
| return load_vectorstore(index_path=local_folder) | |