ai-textbook-backend / src /tests /test_rag.py
Naveedtechlab's picture
Add full AI Native Textbook project source code
db7c1e8
"""
Unit tests for RAG pipeline functionality
"""
import pytest
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from typing import List, Dict, Any
from uuid import UUID, uuid4
from ..rag.pipeline import RAGPipeline, query_rag, process_document_for_rag, search_documents
@pytest.mark.asyncio
async def test_rag_pipeline_initialization():
"""Test RAGPipeline initialization"""
pipeline = RAGPipeline()
assert pipeline.vector_ops is not None
@pytest.mark.asyncio
async def test_query_rag_basic():
"""Test basic RAG query functionality"""
# Mock the embedding result
mock_embedding = [0.1, 0.2, 0.3] + [0.0] * (1536 - 3) # 1536 dimensions
# Mock the search results
mock_search_results = [
{
"id": "point_id_1",
"document_id": str(uuid4()),
"score": 0.9,
"payload": {"chunk_text": "This is relevant context.", "user_id": str(uuid4())}
}
]
# Mock the chat response
mock_chat_response = "This is a test response based on the context."
with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
user_id = uuid4()
result = await query_rag(
query="Test query?",
user_id=user_id,
top_k=5
)
assert result is not None
assert "response" in result
assert "sources" in result
assert "context_used" in result
assert result["response"] == mock_chat_response
@pytest.mark.asyncio
async def test_query_rag_no_results():
"""Test RAG query when no results are found"""
# Mock the embedding result
mock_embedding = [0.4, 0.5, 0.6] + [0.0] * (1536 - 3) # 1536 dimensions
# Mock empty search results
mock_search_results = []
# Mock the chat response for when there's no context
mock_chat_response = "I couldn't find relevant information to answer your question."
with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
user_id = uuid4()
result = await query_rag(
query="Test query with no results?",
user_id=user_id,
top_k=5
)
assert result is not None
assert "response" in result
assert result["response"] == mock_chat_response
assert len(result["sources"]) == 0 # No sources since no results found
@pytest.mark.asyncio
async def test_query_rag_with_conversation_history():
"""Test RAG query with conversation history"""
# Mock the embedding result
mock_embedding = [0.7, 0.8, 0.9] + [0.0] * (1536 - 3) # 1536 dimensions
# Mock the search results
mock_search_results = [
{
"id": "point_id_1",
"document_id": str(uuid4()),
"score": 0.85,
"payload": {"chunk_text": "Previous conversation context is relevant here.", "user_id": str(uuid4())}
}
]
# Mock the chat response
mock_chat_response = "Based on our previous conversation and the provided context, here is the answer."
conversation_history = [
{"role": "user", "content": "What did we talk about earlier?"},
{"role": "assistant", "content": "We talked about AI and RAG systems."}
]
with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
user_id = uuid4()
result = await query_rag(
query="Can you elaborate on RAG?",
user_id=user_id,
top_k=3,
conversation_history=conversation_history
)
assert result is not None
assert "response" in result
assert "sources" in result
assert result["response"] == mock_chat_response
@pytest.mark.asyncio
async def test_process_document_for_rag():
"""Test processing a document for RAG"""
# Mock the embedding processor
with patch('..embeddings.processor.process_document', return_value=True):
document_id = uuid4()
user_id = uuid4()
result = await process_document_for_rag(
document_id=document_id,
user_id=user_id,
content="This is a test document for RAG processing.",
title="Test Document for RAG"
)
assert result is True # Should return True on success
@pytest.mark.asyncio
async def test_search_documents():
"""Test document search functionality"""
# Mock the embedding result
mock_embedding = [0.2, 0.3, 0.4] + [0.0] * (1536 - 3) # 1536 dimensions
# Mock the search results
mock_search_results = [
{
"id": "point_id_1",
"document_id": str(uuid4()),
"score": 0.92,
"payload": {"chunk_text": "This is relevant information about the search topic.", "user_id": str(uuid4())}
},
{
"id": "point_id_2",
"document_id": str(uuid4()),
"score": 0.87,
"payload": {"chunk_text": "More context related to the search query.", "user_id": str(uuid4())}
}
]
with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
user_id = uuid4()
result = await search_documents(
query="Test search query",
user_id=user_id,
top_k=5
)
assert result is not None
assert len(result) == 2 # Two results returned
assert all("payload" in r for r in result) # All results have payloads
@pytest.mark.asyncio
async def test_rag_pipeline_query_method():
"""Test the query method of RAGPipeline class"""
pipeline = RAGPipeline()
# Mock the embedding result
mock_embedding = [0.5, 0.6, 0.7] + [0.0] * (1536 - 3) # 1536 dimensions
# Mock the search results
mock_search_results = [
{
"id": "point_id_1",
"document_id": str(uuid4()),
"score": 0.95,
"payload": {"chunk_text": "Specific context for this query.", "user_id": str(uuid4())}
}
]
# Mock the chat response
mock_chat_response = "Based on the specific context, here is the detailed answer."
with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
user_id = uuid4()
result = await pipeline.query_rag(
query="Specific query?",
user_id=user_id,
top_k=3
)
assert result is not None
assert result["response"] == mock_chat_response
@pytest.mark.asyncio
async def test_process_document_method():
"""Test the process_document method of RAGPipeline class"""
pipeline = RAGPipeline()
# Mock the embedding processor
with patch('..embeddings.processor.process_document', return_value=True):
document_id = uuid4()
user_id = uuid4()
result = await pipeline.process_document_for_rag(
document_id=document_id,
user_id=user_id,
content="Content for the document processing test.",
title="Test Document Processing"
)
assert result is True # Should return True on success
@pytest.mark.asyncio
async def test_delete_user_documents():
"""Test deleting all documents for a user"""
pipeline = RAGPipeline()
# Mock the vector operations
with patch('..qdrant.operations.VectorOperations.delete_vectors_by_user_id', return_value=True):
user_id = uuid4()
result = await pipeline.delete_user_documents(user_id)
assert result is True # Should return True on success
@pytest.mark.asyncio
async def test_rag_error_handling():
"""Test RAG pipeline error handling"""
# Mock to simulate an error in embedding generation
with patch('..embeddings.gemini_client.generate_embedding', return_value=None):
user_id = uuid4()
result = await query_rag(
query="Test query that will fail embedding",
user_id=user_id
)
assert result is None # Should return None on failure
@pytest.mark.asyncio
async def test_rag_pipeline_search_method():
"""Test the search_documents method of RAGPipeline class"""
pipeline = RAGPipeline()
# Mock the embedding result
mock_embedding = [0.8, 0.9, 1.0] + [0.0] * (1536 - 3) # 1536 dimensions
# Mock the search results
mock_search_results = [
{
"id": "point_id_1",
"document_id": str(uuid4()),
"score": 0.90,
"payload": {"chunk_text": "Search result content.", "user_id": str(uuid4())}
}
]
with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
user_id = uuid4()
result = await pipeline.search_documents(
query="Test search",
user_id=user_id,
top_k=5
)
assert result is not None
assert len(result) == 1 # One result returned