pls-rag / db /initializer.py
m97j's picture
Initial codes commit
2aa7bf4
raw
history blame
988 Bytes
# rag/db/initializer.py
import faiss
import numpy as np
from huggingface_hub import hf_hub_download
from config import HF_DS_REPO_ID, HF_INDEX_FILE, HF_IDS_FILE
from modules.retriever import set_index
from modules import corpus
_vector_ids = None
def _load_index_in_memory():
"""HF Hub์—์„œ ์ธ๋ฑ์Šค/ID ๋งคํ•‘์„ ๋ฐ›์•„ ๋ฉ”๋ชจ๋ฆฌ์— ๋กœ๋“œ"""
index_path = hf_hub_download(repo_id=HF_DS_REPO_ID, filename=HF_INDEX_FILE, repo_type="dataset")
ids_path = hf_hub_download(repo_id=HF_DS_REPO_ID, filename=HF_IDS_FILE, repo_type="dataset")
index = faiss.read_index(index_path)
set_index(index)
global _vector_ids
_vector_ids = np.load(ids_path, allow_pickle=True)
def get_vector_ids():
global _vector_ids
return _vector_ids
def initialize_dbs():
# 1) ์ฝ”ํผ์Šค ์ค€๋น„ (์ตœ์ดˆ 1ํšŒ๋งŒ ๋‹ค์šด๋กœ๋“œ)
corpus.prepare_corpus()
# 2) ์ธ๋ฑ์Šค/ID ๋งคํ•‘ ๋ฉ”๋ชจ๋ฆฌ ๋กœ๋“œ
_load_index_in_memory()
def force_update():
_load_index_in_memory()