financial-rag-chatbot / scripts /check_vector_db.py
Claude
Add complete Financial RAG system with Metacognitive Agent
f6b05db unverified
"""
Vector DB ์ƒํƒœ ํ™•์ธ ์Šคํฌ๋ฆฝํŠธ
์ธ๋ฑ์‹ฑ์ด ์™„๋ฃŒ๋œ ํ›„ ๋ฒกํ„ฐ DB์˜ ๋‚ด์šฉ์„ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.
"""
import sys
from pathlib import Path
# ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ๋ฅผ Python ๊ฒฝ๋กœ์— ์ถ”๊ฐ€
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
from services.vector_store import VectorStore
from utils.config import settings
def main():
"""Vector DB ์ƒํƒœ ํ™•์ธ"""
load_dotenv()
print("=" * 80)
print("Vector DB ์ƒํƒœ ํ™•์ธ")
print("=" * 80)
# Vector Store ์ดˆ๊ธฐํ™”
vector_store = VectorStore(
persist_directory=settings.chroma_persist_directory,
collection_name=settings.collection_name
)
# ํ†ต๊ณ„ ์ •๋ณด
stats = vector_store.get_collection_stats()
print(f"\n๐Ÿ“Š ๊ธฐ๋ณธ ์ •๋ณด:")
print(f" ์ปฌ๋ ‰์…˜๋ช…: {stats['collection_name']}")
print(f" ์ €์žฅ ๊ฒฝ๋กœ: {stats['persist_directory']}")
print(f" ์ „์ฒด ๋ฌธ์„œ: {stats['total_documents']}๊ฐœ")
print(f" ๋ฐ์ดํ„ฐ ์กด์žฌ: {'โœ… ์˜ˆ' if stats['has_data'] else 'โŒ ์•„๋‹ˆ์˜ค'}")
if not stats['has_data']:
print("\nโš ๏ธ Vector DB๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค!")
print(" python scripts/index_pdfs.py ๋ฅผ ๋จผ์ € ์‹คํ–‰ํ•˜์„ธ์š”.")
return
# ์ƒ˜ํ”Œ ๋ฐ์ดํ„ฐ ํ™•์ธ
print(f"\n๐Ÿ“š ์ƒ˜ํ”Œ ๋ฌธ์„œ ํ™•์ธ:")
sample = vector_store.collection.peek(limit=3)
for i, (doc_id, doc, metadata) in enumerate(zip(
sample['ids'],
sample['documents'],
sample['metadatas']
), 1):
print(f"\n[{i}] {doc_id}")
print(f" ์ถœ์ฒ˜: {metadata.get('source_filename', 'unknown')}")
print(f" ์ œ๋ชฉ: {metadata.get('title', 'N/A')}")
print(f" ์ €์ž: {metadata.get('author', 'N/A')}")
print(f" ๋‚ด์šฉ: {doc[:150]}...")
# ๊ฐ„๋‹จํ•œ ๊ฒ€์ƒ‰ ํ…Œ์ŠคํŠธ
print(f"\n๐Ÿ” ๊ฒ€์ƒ‰ ํ…Œ์ŠคํŠธ:")
test_query = "financial crisis"
print(f" ์ฟผ๋ฆฌ: '{test_query}'")
results = vector_store.search_by_text(test_query, top_k=3)
print(f" ๊ฒฐ๊ณผ: {len(results['documents'])}๊ฐœ ๋ฌธ์„œ ๋ฐœ๊ฒฌ")
for i, (doc, metadata, distance) in enumerate(zip(
results['documents'],
results['metadatas'],
results['distances']
), 1):
similarity = 1 - distance
print(f"\n [{i}] {metadata.get('source_filename', 'unknown')}")
print(f" ์œ ์‚ฌ๋„: {similarity:.3f}")
print(f" ๋‚ด์šฉ: {doc[:100]}...")
print("\n" + "=" * 80)
print("โœ… Vector DB ํ™•์ธ ์™„๋ฃŒ!")
print("=" * 80)
if __name__ == "__main__":
main()