Spaces:

kith777
/

rag_agent

Paused

rag_agent / knowledge_base /test_retrieval.py

Cheh Kit Hong

created chroma vectordb

d09d387 17 days ago

1.22 kB

	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_chroma import Chroma

	# Configuration must match the creation step
	PERSIST_PATH = "./knowledge_base/chroma_data"
	EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
	COLLECTION_NAME = "langchain_mpnet_collection"

	# 1. Define the custom embedding object (Crucial for query vectorization)
	dense_embeddings = HuggingFaceEmbeddings(
	model_name=EMBEDDING_MODEL_NAME
	)

	# 2. Load the existing vector store from disk
	try:
	vectorstore = Chroma(
	persist_directory=PERSIST_PATH,
	embedding_function=dense_embeddings,
	collection_name=COLLECTION_NAME
	)
	print("Vector store loaded successfully.")
	except Exception as e:
	print(f"Error loading vector store: {e}")
	exit()

	query = "Tell me about SAM3 general architecture."

	# Perform the search
	# k=3 means it will return the top 3 most similar document chunks
	retrieved_docs = vectorstore.similarity_search(query, k=3)

	print(f"\n--- Search Results for: '{query}' ---")
	for i, doc in enumerate(retrieved_docs):
	print(f"Document {i+1} (Source: {doc.metadata.get('source', 'N/A')})")
	print(f"Content: {doc.page_content[:150]}...\n")