Spaces:
Running
Running
File size: 1,897 Bytes
ef26a79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# Loading embeddings from storage
import os
from pathlib import Path
from huggingface_hub import hf_hub_download
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
# download it at the data directory
data_path = os.path.join(Path(os.getcwd()).parent, "data")
# make the faiss local folder
local_folder = os.path.join(data_path, 'faiss_index')
def download_faiss_index(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"):
os.makedirs(local_folder, exist_ok=True)
index_faiss_path = os.path.join(local_folder, "index.faiss")
index_pkl_path = os.path.join(local_folder, "index.pkl")
if not os.path.exists(index_faiss_path):
print("Downloading index.faiss from Hugging Face Dataset...")
hf_hub_download(
repo_id=repo_id,
filename="index.faiss",
repo_type="dataset",
local_dir=local_folder,
local_dir_use_symlinks=False,
)
if not os.path.exists(index_pkl_path):
print("Downloading index.pkl from Hugging Face Dataset...")
hf_hub_download(
repo_id=repo_id,
filename="index.pkl",
repo_type="dataset",
local_dir=local_folder,
local_dir_use_symlinks=False,
)
def load_vectorstore(index_path="faiss_index"):
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
db = FAISS.load_local(
index_path,
embeddings=embedding_model,
allow_dangerous_deserialization=True
)
return db
# download and load vectorstore
def get_vectorstore(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"):
download_faiss_index(repo_id=repo_id, local_folder=local_folder)
return load_vectorstore(index_path=local_folder)
|