PDF-Assit_RAG / backend /tests /test_vectorstore.py
Param20h's picture
deploy: pure backend API with keywords fix
7c46845 unverified
Raw
History Blame Contribute Delete
2.59 kB
import sys
from unittest.mock import MagicMock
def test_store_chunks_deletes_old_chunks(monkeypatch):
"""
Test that store_chunks cleans up old chunks for the specific document and user
before embedding and saving the new chunks.
"""
# Keep track of fake module from sys.modules
fake_module = sys.modules.get("app.rag.vectorstore")
# Temporarily remove fake_module to import the real module
if fake_module:
del sys.modules["app.rag.vectorstore"]
try:
# Import the real module
import app.rag.vectorstore as real_vectorstore
# Mock the ChromaDB client and collection
mock_client = MagicMock()
mock_collection = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_client.get_collection.return_value = mock_collection
# Simulate existing chunks in ChromaDB
mock_collection.get.return_value = {"ids": ["doc_123_0", "doc_123_1"]}
monkeypatch.setattr(real_vectorstore, "get_chroma_client", lambda: mock_client)
# Mock embedding model on the app.rag.embeddings module directly
import app.rag.embeddings as embeddings_module
mock_embeddings = MagicMock()
mock_embeddings.embed_documents.return_value = [[0.1] * 384]
monkeypatch.setattr(
embeddings_module,
"get_embedding_model",
lambda: mock_embeddings,
)
# Mock BM25 actions to avoid I/O or extra imports
monkeypatch.setattr("app.rag.bm25.store_bm25_index", lambda *args, **kwargs: None)
monkeypatch.setattr("app.rag.bm25.delete_bm25_index", lambda *args, **kwargs: None)
# Execute store_chunks
chunks = [{"text": "Hello world", "page": 1, "chunk_index": 0}]
real_vectorstore.store_chunks(
chunks=chunks,
document_id="doc_123",
filename="test.pdf",
user_id="user_123",
)
# Assertions
# 1. It should check for existing chunks using correct metadata filters
mock_collection.get.assert_called_once_with(
where={"document_id": {"$eq": "doc_123"}},
include=[],
)
# 2. It should delete those chunks by ID
mock_collection.delete.assert_called_once_with(ids=["doc_123_0", "doc_123_1"])
# 3. It should add the new chunks to the collection
mock_collection.add.assert_called_once()
finally:
# Restore the fake module
if fake_module:
sys.modules["app.rag.vectorstore"] = fake_module