rag_agent / knowledge_base /test_retrieval.py
Cheh Kit Hong
created chroma vectordb
d09d387
raw
history blame
1.22 kB
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
# Configuration must match the creation step
PERSIST_PATH = "./knowledge_base/chroma_data"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
COLLECTION_NAME = "langchain_mpnet_collection"
# 1. Define the custom embedding object (Crucial for query vectorization)
dense_embeddings = HuggingFaceEmbeddings(
model_name=EMBEDDING_MODEL_NAME
)
# 2. Load the existing vector store from disk
try:
vectorstore = Chroma(
persist_directory=PERSIST_PATH,
embedding_function=dense_embeddings,
collection_name=COLLECTION_NAME
)
print("Vector store loaded successfully.")
except Exception as e:
print(f"Error loading vector store: {e}")
exit()
query = "Tell me about SAM3 general architecture."
# Perform the search
# k=3 means it will return the top 3 most similar document chunks
retrieved_docs = vectorstore.similarity_search(query, k=3)
print(f"\n--- Search Results for: '{query}' ---")
for i, doc in enumerate(retrieved_docs):
print(f"**Document {i+1} (Source: {doc.metadata.get('source', 'N/A')})**")
print(f"Content: {doc.page_content[:150]}...\n")