File size: 988 Bytes
4fdc679 2aa7bf4 4fdc679 2aa7bf4 4fdc679 2aa7bf4 4fdc679 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# rag/db/initializer.py
import faiss
import numpy as np
from huggingface_hub import hf_hub_download
from config import HF_DS_REPO_ID, HF_INDEX_FILE, HF_IDS_FILE
from modules.retriever import set_index
from modules import corpus
_vector_ids = None
def _load_index_in_memory():
"""HF Hub์์ ์ธ๋ฑ์ค/ID ๋งคํ์ ๋ฐ์ ๋ฉ๋ชจ๋ฆฌ์ ๋ก๋"""
index_path = hf_hub_download(repo_id=HF_DS_REPO_ID, filename=HF_INDEX_FILE, repo_type="dataset")
ids_path = hf_hub_download(repo_id=HF_DS_REPO_ID, filename=HF_IDS_FILE, repo_type="dataset")
index = faiss.read_index(index_path)
set_index(index)
global _vector_ids
_vector_ids = np.load(ids_path, allow_pickle=True)
def get_vector_ids():
global _vector_ids
return _vector_ids
def initialize_dbs():
# 1) ์ฝํผ์ค ์ค๋น (์ต์ด 1ํ๋ง ๋ค์ด๋ก๋)
corpus.prepare_corpus()
# 2) ์ธ๋ฑ์ค/ID ๋งคํ ๋ฉ๋ชจ๋ฆฌ ๋ก๋
_load_index_in_memory()
def force_update():
_load_index_in_memory()
|