import argparse from pathlib import Path from typing import List import chromadb def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Local ChromaDB persistence demo") parser.add_argument( "--path", default="chroma_data", help="Local persistence directory", ) parser.add_argument( "--collection", default="knowledge_base", help="Collection name", ) parser.add_argument( "--query", default="Tell me about vector stores", help="Query text", ) return parser.parse_args() def _seed_collection(collection: chromadb.Collection) -> None: documents = [ "Chroma is a lightweight, open-source vector database built for AI.", "Python is a high-level programming language used extensively in data science.", "The celestial body closest to Earth is the Moon.", ] metadatas = [ {"category": "tech", "source": "docs"}, {"category": "tech", "source": "wiki"}, {"category": "science", "source": "space-facts"}, ] ids = ["doc1", "doc2", "doc3"] collection.add(documents=documents, metadatas=metadatas, ids=ids) def main() -> None: args = _parse_args() persist_path = Path(args.path).resolve() persist_path.mkdir(parents=True, exist_ok=True) print(f"Using local Chroma persistence at: {persist_path}") client = chromadb.PersistentClient(path=str(persist_path)) collection = client.get_or_create_collection(name=args.collection) if collection.count() == 0: print("Seeding collection with sample documents...") _seed_collection(collection) print(f"Collection '{args.collection}' has {collection.count()} documents.") results = collection.query(query_texts=[args.query], n_results=2) print("\n--- Search Results ---") for doc, meta, distance in zip( results["documents"][0], results["metadatas"][0], results["distances"][0], ): print(f"Matched Document: {doc}") print(f"Metadata: {meta}") print(f"Distance Score (Lower is better): {distance:.4f}") print() print("----------------------") if __name__ == "__main__": main()