Spaces:
Runtime error
Runtime error
| from sentence_transformers import SentenceTransformer | |
| import faiss, numpy as np, json | |
| import os, pickle | |
| INDEX_PATH = "vector_db/deposit.index" | |
| META_PATH = "vector_db/deposit_meta.pkl" | |
| EMB_MODEL = "intfloat/multilingual-e5-base" | |
| _emb_model = None | |
| _index = None | |
| _docs = None | |
| def _lazy_load(): | |
| """ํ์์ ๋ฒกํฐ DB, ๋ฌธ์ ๋ฉํ๋ฐ์ดํฐ ๋ก๋""" | |
| global _emb_model, _index, _docs | |
| if _emb_model is None: | |
| # device="cpu"๋ฅผ ๋ช ์ํ์ฌ meta tensor ์ค๋ฅ ๋ฐฉ์ง | |
| # model_kwargs={"low_cpu_mem_usage": False} ์ถ๊ฐ: meta tensor ์ค๋ฅ ๋ฐฉ์ง | |
| _emb_model = SentenceTransformer(EMB_MODEL, device="cpu", model_kwargs={"low_cpu_mem_usage": False}) | |
| print("๐ง ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ ์๋ฃ") | |
| if _index is None: | |
| if not os.path.exists(INDEX_PATH): | |
| raise FileNotFoundError(f"โ {INDEX_PATH} not found.") | |
| _index = faiss.read_index(INDEX_PATH) | |
| print("๐ฆ ๋ฒกํฐ ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ") | |
| if _docs is None: | |
| if os.path.exists(META_PATH): | |
| with open(META_PATH, "rb") as f: | |
| _docs = pickle.load(f) | |
| print(f"๐ {_docs and len(_docs)}๊ฐ ๋ฌธ์ ๋ฉํ ๋ก๋๋จ (from deposit_meta.pkl)") | |
| else: | |
| print("โ ๏ธ ๋ฉํ๋ฐ์ดํฐ ํ์ผ ์์. ๋น ๋ฆฌ์คํธ๋ก ์ด๊ธฐํ") | |
| _docs = [] | |
| def search_similar_docs(query, top_k=3): | |
| """์ฟผ๋ฆฌ์ ๊ฐ์ฅ ์ ์ฌํ ๋ฌธ์ ๋ฐํ""" | |
| _lazy_load() | |
| query_emb = _emb_model.encode([query]) | |
| D, I = _index.search(query_emb, top_k) | |
| results = [] | |
| for idx, score in zip(I[0], D[0]): | |
| if 0 <= idx < len(_docs): | |
| results.append(_docs[idx]) | |
| print(f"๐ ๋งค์นญ ๋ฌธ์: {_docs[idx].get('meta', {})} | score={score:.4f}") | |
| return results | |
| # return type: bool | |
| def check_question_validity(question): | |
| results = search_similar_docs(question, top_k=1) | |
| return len(results) > 0 |