Spaces:
Running
Running
| import sys | |
| from unittest.mock import MagicMock | |
| def test_store_chunks_deletes_old_chunks(monkeypatch): | |
| """ | |
| Test that store_chunks cleans up old chunks for the specific document and user | |
| before embedding and saving the new chunks. | |
| """ | |
| # Keep track of fake module from sys.modules | |
| fake_module = sys.modules.get("app.rag.vectorstore") | |
| # Temporarily remove fake_module to import the real module | |
| if fake_module: | |
| del sys.modules["app.rag.vectorstore"] | |
| try: | |
| # Import the real module | |
| import app.rag.vectorstore as real_vectorstore | |
| # Mock the ChromaDB client and collection | |
| mock_client = MagicMock() | |
| mock_collection = MagicMock() | |
| mock_client.get_or_create_collection.return_value = mock_collection | |
| mock_client.get_collection.return_value = mock_collection | |
| # Simulate existing chunks in ChromaDB | |
| mock_collection.get.return_value = {"ids": ["doc_123_0", "doc_123_1"]} | |
| monkeypatch.setattr(real_vectorstore, "get_chroma_client", lambda: mock_client) | |
| # Mock embedding model on the app.rag.embeddings module directly | |
| import app.rag.embeddings as embeddings_module | |
| mock_embeddings = MagicMock() | |
| mock_embeddings.embed_documents.return_value = [[0.1] * 384] | |
| monkeypatch.setattr( | |
| embeddings_module, | |
| "get_embedding_model", | |
| lambda: mock_embeddings, | |
| ) | |
| # Mock BM25 actions to avoid I/O or extra imports | |
| monkeypatch.setattr("app.rag.bm25.store_bm25_index", lambda *args, **kwargs: None) | |
| monkeypatch.setattr("app.rag.bm25.delete_bm25_index", lambda *args, **kwargs: None) | |
| # Execute store_chunks | |
| chunks = [{"text": "Hello world", "page": 1, "chunk_index": 0}] | |
| real_vectorstore.store_chunks( | |
| chunks=chunks, | |
| document_id="doc_123", | |
| filename="test.pdf", | |
| user_id="user_123", | |
| ) | |
| # Assertions | |
| # 1. It should check for existing chunks using correct metadata filters | |
| mock_collection.get.assert_called_once_with( | |
| where={"document_id": {"$eq": "doc_123"}}, | |
| include=[], | |
| ) | |
| # 2. It should delete those chunks by ID | |
| mock_collection.delete.assert_called_once_with(ids=["doc_123_0", "doc_123_1"]) | |
| # 3. It should add the new chunks to the collection | |
| mock_collection.add.assert_called_once() | |
| finally: | |
| # Restore the fake module | |
| if fake_module: | |
| sys.modules["app.rag.vectorstore"] = fake_module | |