| import argparse |
| from pathlib import Path |
| from typing import List |
|
|
| import chromadb |
|
|
|
|
| def _parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser(description="Local ChromaDB persistence demo") |
| parser.add_argument( |
| "--path", |
| default="chroma_data", |
| help="Local persistence directory", |
| ) |
| parser.add_argument( |
| "--collection", |
| default="knowledge_base", |
| help="Collection name", |
| ) |
| parser.add_argument( |
| "--query", |
| default="Tell me about vector stores", |
| help="Query text", |
| ) |
| return parser.parse_args() |
|
|
|
|
| def _seed_collection(collection: chromadb.Collection) -> None: |
| documents = [ |
| "Chroma is a lightweight, open-source vector database built for AI.", |
| "Python is a high-level programming language used extensively in data science.", |
| "The celestial body closest to Earth is the Moon.", |
| ] |
| metadatas = [ |
| {"category": "tech", "source": "docs"}, |
| {"category": "tech", "source": "wiki"}, |
| {"category": "science", "source": "space-facts"}, |
| ] |
| ids = ["doc1", "doc2", "doc3"] |
| collection.add(documents=documents, metadatas=metadatas, ids=ids) |
|
|
|
|
| def main() -> None: |
| args = _parse_args() |
| persist_path = Path(args.path).resolve() |
| persist_path.mkdir(parents=True, exist_ok=True) |
|
|
| print(f"Using local Chroma persistence at: {persist_path}") |
| client = chromadb.PersistentClient(path=str(persist_path)) |
|
|
| collection = client.get_or_create_collection(name=args.collection) |
| if collection.count() == 0: |
| print("Seeding collection with sample documents...") |
| _seed_collection(collection) |
| print(f"Collection '{args.collection}' has {collection.count()} documents.") |
|
|
| results = collection.query(query_texts=[args.query], n_results=2) |
| print("\n--- Search Results ---") |
| for doc, meta, distance in zip( |
| results["documents"][0], |
| results["metadatas"][0], |
| results["distances"][0], |
| ): |
| print(f"Matched Document: {doc}") |
| print(f"Metadata: {meta}") |
| print(f"Distance Score (Lower is better): {distance:.4f}") |
| print() |
| print("----------------------") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|