Spaces:
Sleeping
Sleeping
| """ | |
| Unit tests for RAG pipeline functionality | |
| """ | |
| import pytest | |
| import asyncio | |
| from unittest.mock import AsyncMock, MagicMock, patch | |
| from typing import List, Dict, Any | |
| from uuid import UUID, uuid4 | |
| from ..rag.pipeline import RAGPipeline, query_rag, process_document_for_rag, search_documents | |
| async def test_rag_pipeline_initialization(): | |
| """Test RAGPipeline initialization""" | |
| pipeline = RAGPipeline() | |
| assert pipeline.vector_ops is not None | |
| async def test_query_rag_basic(): | |
| """Test basic RAG query functionality""" | |
| # Mock the embedding result | |
| mock_embedding = [0.1, 0.2, 0.3] + [0.0] * (1536 - 3) # 1536 dimensions | |
| # Mock the search results | |
| mock_search_results = [ | |
| { | |
| "id": "point_id_1", | |
| "document_id": str(uuid4()), | |
| "score": 0.9, | |
| "payload": {"chunk_text": "This is relevant context.", "user_id": str(uuid4())} | |
| } | |
| ] | |
| # Mock the chat response | |
| mock_chat_response = "This is a test response based on the context." | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding): | |
| with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results): | |
| with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response): | |
| user_id = uuid4() | |
| result = await query_rag( | |
| query="Test query?", | |
| user_id=user_id, | |
| top_k=5 | |
| ) | |
| assert result is not None | |
| assert "response" in result | |
| assert "sources" in result | |
| assert "context_used" in result | |
| assert result["response"] == mock_chat_response | |
| async def test_query_rag_no_results(): | |
| """Test RAG query when no results are found""" | |
| # Mock the embedding result | |
| mock_embedding = [0.4, 0.5, 0.6] + [0.0] * (1536 - 3) # 1536 dimensions | |
| # Mock empty search results | |
| mock_search_results = [] | |
| # Mock the chat response for when there's no context | |
| mock_chat_response = "I couldn't find relevant information to answer your question." | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding): | |
| with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results): | |
| with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response): | |
| user_id = uuid4() | |
| result = await query_rag( | |
| query="Test query with no results?", | |
| user_id=user_id, | |
| top_k=5 | |
| ) | |
| assert result is not None | |
| assert "response" in result | |
| assert result["response"] == mock_chat_response | |
| assert len(result["sources"]) == 0 # No sources since no results found | |
| async def test_query_rag_with_conversation_history(): | |
| """Test RAG query with conversation history""" | |
| # Mock the embedding result | |
| mock_embedding = [0.7, 0.8, 0.9] + [0.0] * (1536 - 3) # 1536 dimensions | |
| # Mock the search results | |
| mock_search_results = [ | |
| { | |
| "id": "point_id_1", | |
| "document_id": str(uuid4()), | |
| "score": 0.85, | |
| "payload": {"chunk_text": "Previous conversation context is relevant here.", "user_id": str(uuid4())} | |
| } | |
| ] | |
| # Mock the chat response | |
| mock_chat_response = "Based on our previous conversation and the provided context, here is the answer." | |
| conversation_history = [ | |
| {"role": "user", "content": "What did we talk about earlier?"}, | |
| {"role": "assistant", "content": "We talked about AI and RAG systems."} | |
| ] | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding): | |
| with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results): | |
| with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response): | |
| user_id = uuid4() | |
| result = await query_rag( | |
| query="Can you elaborate on RAG?", | |
| user_id=user_id, | |
| top_k=3, | |
| conversation_history=conversation_history | |
| ) | |
| assert result is not None | |
| assert "response" in result | |
| assert "sources" in result | |
| assert result["response"] == mock_chat_response | |
| async def test_process_document_for_rag(): | |
| """Test processing a document for RAG""" | |
| # Mock the embedding processor | |
| with patch('..embeddings.processor.process_document', return_value=True): | |
| document_id = uuid4() | |
| user_id = uuid4() | |
| result = await process_document_for_rag( | |
| document_id=document_id, | |
| user_id=user_id, | |
| content="This is a test document for RAG processing.", | |
| title="Test Document for RAG" | |
| ) | |
| assert result is True # Should return True on success | |
| async def test_search_documents(): | |
| """Test document search functionality""" | |
| # Mock the embedding result | |
| mock_embedding = [0.2, 0.3, 0.4] + [0.0] * (1536 - 3) # 1536 dimensions | |
| # Mock the search results | |
| mock_search_results = [ | |
| { | |
| "id": "point_id_1", | |
| "document_id": str(uuid4()), | |
| "score": 0.92, | |
| "payload": {"chunk_text": "This is relevant information about the search topic.", "user_id": str(uuid4())} | |
| }, | |
| { | |
| "id": "point_id_2", | |
| "document_id": str(uuid4()), | |
| "score": 0.87, | |
| "payload": {"chunk_text": "More context related to the search query.", "user_id": str(uuid4())} | |
| } | |
| ] | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding): | |
| with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results): | |
| user_id = uuid4() | |
| result = await search_documents( | |
| query="Test search query", | |
| user_id=user_id, | |
| top_k=5 | |
| ) | |
| assert result is not None | |
| assert len(result) == 2 # Two results returned | |
| assert all("payload" in r for r in result) # All results have payloads | |
| async def test_rag_pipeline_query_method(): | |
| """Test the query method of RAGPipeline class""" | |
| pipeline = RAGPipeline() | |
| # Mock the embedding result | |
| mock_embedding = [0.5, 0.6, 0.7] + [0.0] * (1536 - 3) # 1536 dimensions | |
| # Mock the search results | |
| mock_search_results = [ | |
| { | |
| "id": "point_id_1", | |
| "document_id": str(uuid4()), | |
| "score": 0.95, | |
| "payload": {"chunk_text": "Specific context for this query.", "user_id": str(uuid4())} | |
| } | |
| ] | |
| # Mock the chat response | |
| mock_chat_response = "Based on the specific context, here is the detailed answer." | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding): | |
| with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results): | |
| with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response): | |
| user_id = uuid4() | |
| result = await pipeline.query_rag( | |
| query="Specific query?", | |
| user_id=user_id, | |
| top_k=3 | |
| ) | |
| assert result is not None | |
| assert result["response"] == mock_chat_response | |
| async def test_process_document_method(): | |
| """Test the process_document method of RAGPipeline class""" | |
| pipeline = RAGPipeline() | |
| # Mock the embedding processor | |
| with patch('..embeddings.processor.process_document', return_value=True): | |
| document_id = uuid4() | |
| user_id = uuid4() | |
| result = await pipeline.process_document_for_rag( | |
| document_id=document_id, | |
| user_id=user_id, | |
| content="Content for the document processing test.", | |
| title="Test Document Processing" | |
| ) | |
| assert result is True # Should return True on success | |
| async def test_delete_user_documents(): | |
| """Test deleting all documents for a user""" | |
| pipeline = RAGPipeline() | |
| # Mock the vector operations | |
| with patch('..qdrant.operations.VectorOperations.delete_vectors_by_user_id', return_value=True): | |
| user_id = uuid4() | |
| result = await pipeline.delete_user_documents(user_id) | |
| assert result is True # Should return True on success | |
| async def test_rag_error_handling(): | |
| """Test RAG pipeline error handling""" | |
| # Mock to simulate an error in embedding generation | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=None): | |
| user_id = uuid4() | |
| result = await query_rag( | |
| query="Test query that will fail embedding", | |
| user_id=user_id | |
| ) | |
| assert result is None # Should return None on failure | |
| async def test_rag_pipeline_search_method(): | |
| """Test the search_documents method of RAGPipeline class""" | |
| pipeline = RAGPipeline() | |
| # Mock the embedding result | |
| mock_embedding = [0.8, 0.9, 1.0] + [0.0] * (1536 - 3) # 1536 dimensions | |
| # Mock the search results | |
| mock_search_results = [ | |
| { | |
| "id": "point_id_1", | |
| "document_id": str(uuid4()), | |
| "score": 0.90, | |
| "payload": {"chunk_text": "Search result content.", "user_id": str(uuid4())} | |
| } | |
| ] | |
| with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding): | |
| with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results): | |
| user_id = uuid4() | |
| result = await pipeline.search_documents( | |
| query="Test search", | |
| user_id=user_id, | |
| top_k=5 | |
| ) | |
| assert result is not None | |
| assert len(result) == 1 # One result returned |