Spaces:
Runtime error
Runtime error
File size: 1,969 Bytes
bb56df9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from sentence_transformers import SentenceTransformer
import faiss, numpy as np, json
import os, pickle
INDEX_PATH = "vector_db/deposit.index"
META_PATH = "vector_db/deposit_meta.pkl"
EMB_MODEL = "intfloat/multilingual-e5-base"
_emb_model = None
_index = None
_docs = None
def _lazy_load():
"""ํ์์ ๋ฒกํฐ DB, ๋ฌธ์ ๋ฉํ๋ฐ์ดํฐ ๋ก๋"""
global _emb_model, _index, _docs
if _emb_model is None:
# device="cpu"๋ฅผ ๋ช
์ํ์ฌ meta tensor ์ค๋ฅ ๋ฐฉ์ง
# model_kwargs={"low_cpu_mem_usage": False} ์ถ๊ฐ: meta tensor ์ค๋ฅ ๋ฐฉ์ง
_emb_model = SentenceTransformer(EMB_MODEL, device="cpu", model_kwargs={"low_cpu_mem_usage": False})
print("๐ง ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ ์๋ฃ")
if _index is None:
if not os.path.exists(INDEX_PATH):
raise FileNotFoundError(f"โ {INDEX_PATH} not found.")
_index = faiss.read_index(INDEX_PATH)
print("๐ฆ ๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ")
if _docs is None:
if os.path.exists(META_PATH):
with open(META_PATH, "rb") as f:
_docs = pickle.load(f)
print(f"๐ {_docs and len(_docs)}๊ฐ ๋ฌธ์ ๋ฉํ ๋ก๋๋จ (from deposit_meta.pkl)")
else:
print("โ ๏ธ ๋ฉํ๋ฐ์ดํฐ ํ์ผ ์์. ๋น ๋ฆฌ์คํธ๋ก ์ด๊ธฐํ")
_docs = []
def search_similar_docs(query, top_k=3):
"""์ฟผ๋ฆฌ์ ๊ฐ์ฅ ์ ์ฌํ ๋ฌธ์ ๋ฐํ"""
_lazy_load()
query_emb = _emb_model.encode([query])
D, I = _index.search(query_emb, top_k)
results = []
for idx, score in zip(I[0], D[0]):
if 0 <= idx < len(_docs):
results.append(_docs[idx])
print(f"๐ ๋งค์นญ ๋ฌธ์: {_docs[idx].get('meta', {})} | score={score:.4f}")
return results
# return type: bool
def check_question_validity(question):
results = search_similar_docs(question, top_k=1)
return len(results) > 0 |