from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# Configuration must match the creation step
PERSIST_PATH = "./knowledge_base/chroma_data"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
COLLECTION_NAME = "langchain_mpnet_collection"

# 1. Define the custom embedding object (Crucial for query vectorization)
dense_embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME
)

# 2. Load the existing vector store from disk
try:
    vectorstore = Chroma(
        persist_directory=PERSIST_PATH,
        embedding_function=dense_embeddings,
        collection_name=COLLECTION_NAME
    )
    print("Vector store loaded successfully.")
except Exception as e:
    print(f"Error loading vector store: {e}")
    exit()

query = "Tell me about SAM3 general architecture."

# Perform the search
# k=3 means it will return the top 3 most similar document chunks
retrieved_docs = vectorstore.similarity_search(query, k=3)

print(f"\n--- Search Results for: '{query}' ---")
for i, doc in enumerate(retrieved_docs):
    print(f"**Document {i+1} (Source: {doc.metadata.get('source', 'N/A')})**")
    print(f"Content: {doc.page_content[:150]}...\n")