| import chromadb | |
| from chromadb.utils import embedding_functions | |
| # Adjust these as needed | |
| CHROMA_PATH = "chroma_db" | |
| COLLECTION_NAME = "pib_titles" | |
| client = chromadb.PersistentClient(path=CHROMA_PATH) | |
| collection = client.get_collection( | |
| name=COLLECTION_NAME, | |
| embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction( | |
| model_name="all-MiniLM-L6-v2" | |
| ) | |
| ) | |
| # Retrieve all documents and metadata (ids are always returned) | |
| all_docs = collection.get(include=["documents", "metadatas"]) | |
| print("Total documents:", len(all_docs["ids"])) | |
| for i, (doc_id, doc, meta) in enumerate(zip(all_docs["ids"], all_docs["documents"], all_docs["metadatas"])): | |
| print(f"\n--- Document {i+1} ---") | |
| print("ID:", doc_id) | |
| print("Document:", doc) | |
| print("Metadata:", meta) | |