emb1024 / vector.py
gcharanteja
ch6
ad469c9
Raw
History Blame Contribute Delete
2.25 kB
import argparse
from pathlib import Path
from typing import List
import chromadb
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Local ChromaDB persistence demo")
parser.add_argument(
"--path",
default="chroma_data",
help="Local persistence directory",
)
parser.add_argument(
"--collection",
default="knowledge_base",
help="Collection name",
)
parser.add_argument(
"--query",
default="Tell me about vector stores",
help="Query text",
)
return parser.parse_args()
def _seed_collection(collection: chromadb.Collection) -> None:
documents = [
"Chroma is a lightweight, open-source vector database built for AI.",
"Python is a high-level programming language used extensively in data science.",
"The celestial body closest to Earth is the Moon.",
]
metadatas = [
{"category": "tech", "source": "docs"},
{"category": "tech", "source": "wiki"},
{"category": "science", "source": "space-facts"},
]
ids = ["doc1", "doc2", "doc3"]
collection.add(documents=documents, metadatas=metadatas, ids=ids)
def main() -> None:
args = _parse_args()
persist_path = Path(args.path).resolve()
persist_path.mkdir(parents=True, exist_ok=True)
print(f"Using local Chroma persistence at: {persist_path}")
client = chromadb.PersistentClient(path=str(persist_path))
collection = client.get_or_create_collection(name=args.collection)
if collection.count() == 0:
print("Seeding collection with sample documents...")
_seed_collection(collection)
print(f"Collection '{args.collection}' has {collection.count()} documents.")
results = collection.query(query_texts=[args.query], n_results=2)
print("\n--- Search Results ---")
for doc, meta, distance in zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0],
):
print(f"Matched Document: {doc}")
print(f"Metadata: {meta}")
print(f"Distance Score (Lower is better): {distance:.4f}")
print()
print("----------------------")
if __name__ == "__main__":
main()