Spaces:
Sleeping
Sleeping
File size: 2,595 Bytes
f6b05db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
"""
Vector DB ์ํ ํ์ธ ์คํฌ๋ฆฝํธ
์ธ๋ฑ์ฑ์ด ์๋ฃ๋ ํ ๋ฒกํฐ DB์ ๋ด์ฉ์ ํ์ธํฉ๋๋ค.
"""
import sys
from pathlib import Path
# ํ๋ก์ ํธ ๋ฃจํธ๋ฅผ Python ๊ฒฝ๋ก์ ์ถ๊ฐ
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
from services.vector_store import VectorStore
from utils.config import settings
def main():
"""Vector DB ์ํ ํ์ธ"""
load_dotenv()
print("=" * 80)
print("Vector DB ์ํ ํ์ธ")
print("=" * 80)
# Vector Store ์ด๊ธฐํ
vector_store = VectorStore(
persist_directory=settings.chroma_persist_directory,
collection_name=settings.collection_name
)
# ํต๊ณ ์ ๋ณด
stats = vector_store.get_collection_stats()
print(f"\n๐ ๊ธฐ๋ณธ ์ ๋ณด:")
print(f" ์ปฌ๋ ์
๋ช
: {stats['collection_name']}")
print(f" ์ ์ฅ ๊ฒฝ๋ก: {stats['persist_directory']}")
print(f" ์ ์ฒด ๋ฌธ์: {stats['total_documents']}๊ฐ")
print(f" ๋ฐ์ดํฐ ์กด์ฌ: {'โ
์' if stats['has_data'] else 'โ ์๋์ค'}")
if not stats['has_data']:
print("\nโ ๏ธ Vector DB๊ฐ ๋น์ด์์ต๋๋ค!")
print(" python scripts/index_pdfs.py ๋ฅผ ๋จผ์ ์คํํ์ธ์.")
return
# ์ํ ๋ฐ์ดํฐ ํ์ธ
print(f"\n๐ ์ํ ๋ฌธ์ ํ์ธ:")
sample = vector_store.collection.peek(limit=3)
for i, (doc_id, doc, metadata) in enumerate(zip(
sample['ids'],
sample['documents'],
sample['metadatas']
), 1):
print(f"\n[{i}] {doc_id}")
print(f" ์ถ์ฒ: {metadata.get('source_filename', 'unknown')}")
print(f" ์ ๋ชฉ: {metadata.get('title', 'N/A')}")
print(f" ์ ์: {metadata.get('author', 'N/A')}")
print(f" ๋ด์ฉ: {doc[:150]}...")
# ๊ฐ๋จํ ๊ฒ์ ํ
์คํธ
print(f"\n๐ ๊ฒ์ ํ
์คํธ:")
test_query = "financial crisis"
print(f" ์ฟผ๋ฆฌ: '{test_query}'")
results = vector_store.search_by_text(test_query, top_k=3)
print(f" ๊ฒฐ๊ณผ: {len(results['documents'])}๊ฐ ๋ฌธ์ ๋ฐ๊ฒฌ")
for i, (doc, metadata, distance) in enumerate(zip(
results['documents'],
results['metadatas'],
results['distances']
), 1):
similarity = 1 - distance
print(f"\n [{i}] {metadata.get('source_filename', 'unknown')}")
print(f" ์ ์ฌ๋: {similarity:.3f}")
print(f" ๋ด์ฉ: {doc[:100]}...")
print("\n" + "=" * 80)
print("โ
Vector DB ํ์ธ ์๋ฃ!")
print("=" * 80)
if __name__ == "__main__":
main()
|