File size: 1,897 Bytes
ef26a79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#  Loading embeddings from storage
import os
from pathlib import Path
from huggingface_hub import hf_hub_download
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# download it at the data directory
data_path = os.path.join(Path(os.getcwd()).parent, "data")
# make the faiss local folder
local_folder = os.path.join(data_path, 'faiss_index')

def download_faiss_index(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"):
    
    os.makedirs(local_folder, exist_ok=True)


    index_faiss_path = os.path.join(local_folder, "index.faiss")
    index_pkl_path = os.path.join(local_folder, "index.pkl")

    if not os.path.exists(index_faiss_path):
        print("Downloading index.faiss from Hugging Face Dataset...")
        hf_hub_download(
            repo_id=repo_id,
            filename="index.faiss",
            repo_type="dataset",          
            local_dir=local_folder,
            local_dir_use_symlinks=False,
        )

    if not os.path.exists(index_pkl_path):
        print("Downloading index.pkl from Hugging Face Dataset...")
        hf_hub_download(
            repo_id=repo_id,
            filename="index.pkl",
            repo_type="dataset",          
            local_dir=local_folder,
            local_dir_use_symlinks=False,
        )

def load_vectorstore(index_path="faiss_index"):
    embedding_model = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    db = FAISS.load_local(
        index_path,
        embeddings=embedding_model,
        allow_dangerous_deserialization=True
    )
    return db

# download and load vectorstore
def get_vectorstore(repo_id="kaburia/epic-a-embeddings", local_folder="faiss_index"):
    download_faiss_index(repo_id=repo_id, local_folder=local_folder)
    return load_vectorstore(index_path=local_folder)