from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_chroma import Chroma # Configuration must match the creation step PERSIST_PATH = "./knowledge_base/chroma_data" EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" COLLECTION_NAME = "langchain_mpnet_collection" # 1. Define the custom embedding object (Crucial for query vectorization) dense_embeddings = HuggingFaceEmbeddings( model_name=EMBEDDING_MODEL_NAME ) # 2. Load the existing vector store from disk try: vectorstore = Chroma( persist_directory=PERSIST_PATH, embedding_function=dense_embeddings, collection_name=COLLECTION_NAME ) print("Vector store loaded successfully.") except Exception as e: print(f"Error loading vector store: {e}") exit() query = "Tell me about SAM3 general architecture." # Perform the search # k=3 means it will return the top 3 most similar document chunks retrieved_docs = vectorstore.similarity_search(query, k=3) print(f"\n--- Search Results for: '{query}' ---") for i, doc in enumerate(retrieved_docs): print(f"**Document {i+1} (Source: {doc.metadata.get('source', 'N/A')})**") print(f"Content: {doc.page_content[:150]}...\n")