Spaces:

kith777
/

rag_agent

Paused

Cheh Kit Hong commited on 17 days ago

Commit

d09d387

1 Parent(s): 067cdc9

created chroma vectordb

Files changed (2) hide show

knowledge_base/test_retrieval.py ADDED Viewed

+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+# Configuration must match the creation step
+PERSIST_PATH = "./knowledge_base/chroma_data"
+EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
+COLLECTION_NAME = "langchain_mpnet_collection"
+# 1. Define the custom embedding object (Crucial for query vectorization)
+dense_embeddings = HuggingFaceEmbeddings(
+    model_name=EMBEDDING_MODEL_NAME
+)
+# 2. Load the existing vector store from disk
+try:
+    vectorstore = Chroma(
+        persist_directory=PERSIST_PATH,
+        embedding_function=dense_embeddings,
+        collection_name=COLLECTION_NAME
+    )
+    print("Vector store loaded successfully.")
+except Exception as e:
+    print(f"Error loading vector store: {e}")
+    exit()
+query = "Tell me about SAM3 general architecture."
+# Perform the search
+# k=3 means it will return the top 3 most similar document chunks
+retrieved_docs = vectorstore.similarity_search(query, k=3)
+print(f"\n--- Search Results for: '{query}' ---")
+for i, doc in enumerate(retrieved_docs):
+    print(f"**Document {i+1} (Source: {doc.metadata.get('source', 'N/A')})**")
+    print(f"Content: {doc.page_content[:150]}...\n")

requirements.txt CHANGED Viewed

@@ -8,4 +8,8 @@ uvicorn
 pydantic
 chromadb
 pymupdf
-pymupdf4llm

 pydantic
 chromadb
 pymupdf
+pymupdf4llm
+langchain-community
+langchain_text_splitters
+pymupdf-layout
+sentence_transformers