Spaces:
Sleeping
Sleeping
| """ | |
| Vector DB ์ํ ํ์ธ ์คํฌ๋ฆฝํธ | |
| ์ธ๋ฑ์ฑ์ด ์๋ฃ๋ ํ ๋ฒกํฐ DB์ ๋ด์ฉ์ ํ์ธํฉ๋๋ค. | |
| """ | |
| import sys | |
| from pathlib import Path | |
| # ํ๋ก์ ํธ ๋ฃจํธ๋ฅผ Python ๊ฒฝ๋ก์ ์ถ๊ฐ | |
| project_root = Path(__file__).parent.parent | |
| sys.path.insert(0, str(project_root)) | |
| from dotenv import load_dotenv | |
| from services.vector_store import VectorStore | |
| from utils.config import settings | |
| def main(): | |
| """Vector DB ์ํ ํ์ธ""" | |
| load_dotenv() | |
| print("=" * 80) | |
| print("Vector DB ์ํ ํ์ธ") | |
| print("=" * 80) | |
| # Vector Store ์ด๊ธฐํ | |
| vector_store = VectorStore( | |
| persist_directory=settings.chroma_persist_directory, | |
| collection_name=settings.collection_name | |
| ) | |
| # ํต๊ณ ์ ๋ณด | |
| stats = vector_store.get_collection_stats() | |
| print(f"\n๐ ๊ธฐ๋ณธ ์ ๋ณด:") | |
| print(f" ์ปฌ๋ ์ ๋ช : {stats['collection_name']}") | |
| print(f" ์ ์ฅ ๊ฒฝ๋ก: {stats['persist_directory']}") | |
| print(f" ์ ์ฒด ๋ฌธ์: {stats['total_documents']}๊ฐ") | |
| print(f" ๋ฐ์ดํฐ ์กด์ฌ: {'โ ์' if stats['has_data'] else 'โ ์๋์ค'}") | |
| if not stats['has_data']: | |
| print("\nโ ๏ธ Vector DB๊ฐ ๋น์ด์์ต๋๋ค!") | |
| print(" python scripts/index_pdfs.py ๋ฅผ ๋จผ์ ์คํํ์ธ์.") | |
| return | |
| # ์ํ ๋ฐ์ดํฐ ํ์ธ | |
| print(f"\n๐ ์ํ ๋ฌธ์ ํ์ธ:") | |
| sample = vector_store.collection.peek(limit=3) | |
| for i, (doc_id, doc, metadata) in enumerate(zip( | |
| sample['ids'], | |
| sample['documents'], | |
| sample['metadatas'] | |
| ), 1): | |
| print(f"\n[{i}] {doc_id}") | |
| print(f" ์ถ์ฒ: {metadata.get('source_filename', 'unknown')}") | |
| print(f" ์ ๋ชฉ: {metadata.get('title', 'N/A')}") | |
| print(f" ์ ์: {metadata.get('author', 'N/A')}") | |
| print(f" ๋ด์ฉ: {doc[:150]}...") | |
| # ๊ฐ๋จํ ๊ฒ์ ํ ์คํธ | |
| print(f"\n๐ ๊ฒ์ ํ ์คํธ:") | |
| test_query = "financial crisis" | |
| print(f" ์ฟผ๋ฆฌ: '{test_query}'") | |
| results = vector_store.search_by_text(test_query, top_k=3) | |
| print(f" ๊ฒฐ๊ณผ: {len(results['documents'])}๊ฐ ๋ฌธ์ ๋ฐ๊ฒฌ") | |
| for i, (doc, metadata, distance) in enumerate(zip( | |
| results['documents'], | |
| results['metadatas'], | |
| results['distances'] | |
| ), 1): | |
| similarity = 1 - distance | |
| print(f"\n [{i}] {metadata.get('source_filename', 'unknown')}") | |
| print(f" ์ ์ฌ๋: {similarity:.3f}") | |
| print(f" ๋ด์ฉ: {doc[:100]}...") | |
| print("\n" + "=" * 80) | |
| print("โ Vector DB ํ์ธ ์๋ฃ!") | |
| print("=" * 80) | |
| if __name__ == "__main__": | |
| main() | |