|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_chroma import Chroma |
|
|
|
|
|
|
|
|
PERSIST_PATH = "./knowledge_base/chroma_data" |
|
|
EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" |
|
|
COLLECTION_NAME = "langchain_mpnet_collection" |
|
|
|
|
|
|
|
|
dense_embeddings = HuggingFaceEmbeddings( |
|
|
model_name=EMBEDDING_MODEL_NAME |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
vectorstore = Chroma( |
|
|
persist_directory=PERSIST_PATH, |
|
|
embedding_function=dense_embeddings, |
|
|
collection_name=COLLECTION_NAME |
|
|
) |
|
|
print("Vector store loaded successfully.") |
|
|
except Exception as e: |
|
|
print(f"Error loading vector store: {e}") |
|
|
exit() |
|
|
|
|
|
query = "Tell me about SAM3 general architecture." |
|
|
|
|
|
|
|
|
|
|
|
retrieved_docs = vectorstore.similarity_search(query, k=3) |
|
|
|
|
|
print(f"\n--- Search Results for: '{query}' ---") |
|
|
for i, doc in enumerate(retrieved_docs): |
|
|
print(f"**Document {i+1} (Source: {doc.metadata.get('source', 'N/A')})**") |
|
|
print(f"Content: {doc.page_content[:150]}...\n") |