Spaces:

Naveedtechlab
/

ai-textbook-backend

Sleeping

App Files Files Community

ai-textbook-backend / src /tests /test_rag.py

Naveedtechlab

Add full AI Native Textbook project source code

db7c1e8 4 months ago

raw

history blame contribute delete

10.4 kB

	"""
	Unit tests for RAG pipeline functionality
	"""
	import pytest
	import asyncio
	from unittest.mock import AsyncMock, MagicMock, patch
	from typing import List, Dict, Any
	from uuid import UUID, uuid4

	from ..rag.pipeline import RAGPipeline, query_rag, process_document_for_rag, search_documents


	@pytest.mark.asyncio
	async def test_rag_pipeline_initialization():
	"""Test RAGPipeline initialization"""
	pipeline = RAGPipeline()

	assert pipeline.vector_ops is not None


	@pytest.mark.asyncio
	async def test_query_rag_basic():
	"""Test basic RAG query functionality"""
	# Mock the embedding result
	mock_embedding = [0.1, 0.2, 0.3] + [0.0] * (1536 - 3) # 1536 dimensions

	# Mock the search results
	mock_search_results = [
	{
	"id": "point_id_1",
	"document_id": str(uuid4()),
	"score": 0.9,
	"payload": {"chunk_text": "This is relevant context.", "user_id": str(uuid4())}
	}
	]

	# Mock the chat response
	mock_chat_response = "This is a test response based on the context."

	with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
	with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
	with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
	user_id = uuid4()
	result = await query_rag(
	query="Test query?",
	user_id=user_id,
	top_k=5
	)

	assert result is not None
	assert "response" in result
	assert "sources" in result
	assert "context_used" in result
	assert result["response"] == mock_chat_response


	@pytest.mark.asyncio
	async def test_query_rag_no_results():
	"""Test RAG query when no results are found"""
	# Mock the embedding result
	mock_embedding = [0.4, 0.5, 0.6] + [0.0] * (1536 - 3) # 1536 dimensions

	# Mock empty search results
	mock_search_results = []

	# Mock the chat response for when there's no context
	mock_chat_response = "I couldn't find relevant information to answer your question."

	with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
	with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
	with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
	user_id = uuid4()
	result = await query_rag(
	query="Test query with no results?",
	user_id=user_id,
	top_k=5
	)

	assert result is not None
	assert "response" in result
	assert result["response"] == mock_chat_response
	assert len(result["sources"]) == 0 # No sources since no results found


	@pytest.mark.asyncio
	async def test_query_rag_with_conversation_history():
	"""Test RAG query with conversation history"""
	# Mock the embedding result
	mock_embedding = [0.7, 0.8, 0.9] + [0.0] * (1536 - 3) # 1536 dimensions

	# Mock the search results
	mock_search_results = [
	{
	"id": "point_id_1",
	"document_id": str(uuid4()),
	"score": 0.85,
	"payload": {"chunk_text": "Previous conversation context is relevant here.", "user_id": str(uuid4())}
	}
	]

	# Mock the chat response
	mock_chat_response = "Based on our previous conversation and the provided context, here is the answer."

	conversation_history = [
	{"role": "user", "content": "What did we talk about earlier?"},
	{"role": "assistant", "content": "We talked about AI and RAG systems."}
	]

	with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
	with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
	with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
	user_id = uuid4()
	result = await query_rag(
	query="Can you elaborate on RAG?",
	user_id=user_id,
	top_k=3,
	conversation_history=conversation_history
	)

	assert result is not None
	assert "response" in result
	assert "sources" in result
	assert result["response"] == mock_chat_response


	@pytest.mark.asyncio
	async def test_process_document_for_rag():
	"""Test processing a document for RAG"""
	# Mock the embedding processor
	with patch('..embeddings.processor.process_document', return_value=True):
	document_id = uuid4()
	user_id = uuid4()

	result = await process_document_for_rag(
	document_id=document_id,
	user_id=user_id,
	content="This is a test document for RAG processing.",
	title="Test Document for RAG"
	)

	assert result is True # Should return True on success


	@pytest.mark.asyncio
	async def test_search_documents():
	"""Test document search functionality"""
	# Mock the embedding result
	mock_embedding = [0.2, 0.3, 0.4] + [0.0] * (1536 - 3) # 1536 dimensions

	# Mock the search results
	mock_search_results = [
	{
	"id": "point_id_1",
	"document_id": str(uuid4()),
	"score": 0.92,
	"payload": {"chunk_text": "This is relevant information about the search topic.", "user_id": str(uuid4())}
	},
	{
	"id": "point_id_2",
	"document_id": str(uuid4()),
	"score": 0.87,
	"payload": {"chunk_text": "More context related to the search query.", "user_id": str(uuid4())}
	}
	]

	with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
	with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
	user_id = uuid4()
	result = await search_documents(
	query="Test search query",
	user_id=user_id,
	top_k=5
	)

	assert result is not None
	assert len(result) == 2 # Two results returned
	assert all("payload" in r for r in result) # All results have payloads


	@pytest.mark.asyncio
	async def test_rag_pipeline_query_method():
	"""Test the query method of RAGPipeline class"""
	pipeline = RAGPipeline()

	# Mock the embedding result
	mock_embedding = [0.5, 0.6, 0.7] + [0.0] * (1536 - 3) # 1536 dimensions

	# Mock the search results
	mock_search_results = [
	{
	"id": "point_id_1",
	"document_id": str(uuid4()),
	"score": 0.95,
	"payload": {"chunk_text": "Specific context for this query.", "user_id": str(uuid4())}
	}
	]

	# Mock the chat response
	mock_chat_response = "Based on the specific context, here is the detailed answer."

	with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
	with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
	with patch('..embeddings.gemini_client.generate_chat_response', return_value=mock_chat_response):
	user_id = uuid4()
	result = await pipeline.query_rag(
	query="Specific query?",
	user_id=user_id,
	top_k=3
	)

	assert result is not None
	assert result["response"] == mock_chat_response


	@pytest.mark.asyncio
	async def test_process_document_method():
	"""Test the process_document method of RAGPipeline class"""
	pipeline = RAGPipeline()

	# Mock the embedding processor
	with patch('..embeddings.processor.process_document', return_value=True):
	document_id = uuid4()
	user_id = uuid4()

	result = await pipeline.process_document_for_rag(
	document_id=document_id,
	user_id=user_id,
	content="Content for the document processing test.",
	title="Test Document Processing"
	)

	assert result is True # Should return True on success


	@pytest.mark.asyncio
	async def test_delete_user_documents():
	"""Test deleting all documents for a user"""
	pipeline = RAGPipeline()

	# Mock the vector operations
	with patch('..qdrant.operations.VectorOperations.delete_vectors_by_user_id', return_value=True):
	user_id = uuid4()
	result = await pipeline.delete_user_documents(user_id)

	assert result is True # Should return True on success


	@pytest.mark.asyncio
	async def test_rag_error_handling():
	"""Test RAG pipeline error handling"""
	# Mock to simulate an error in embedding generation
	with patch('..embeddings.gemini_client.generate_embedding', return_value=None):
	user_id = uuid4()
	result = await query_rag(
	query="Test query that will fail embedding",
	user_id=user_id
	)

	assert result is None # Should return None on failure


	@pytest.mark.asyncio
	async def test_rag_pipeline_search_method():
	"""Test the search_documents method of RAGPipeline class"""
	pipeline = RAGPipeline()

	# Mock the embedding result
	mock_embedding = [0.8, 0.9, 1.0] + [0.0] * (1536 - 3) # 1536 dimensions

	# Mock the search results
	mock_search_results = [
	{
	"id": "point_id_1",
	"document_id": str(uuid4()),
	"score": 0.90,
	"payload": {"chunk_text": "Search result content.", "user_id": str(uuid4())}
	}
	]

	with patch('..embeddings.gemini_client.generate_embedding', return_value=mock_embedding):
	with patch('..qdrant.operations.VectorOperations.search_vectors', return_value=mock_search_results):
	user_id = uuid4()
	result = await pipeline.search_documents(
	query="Test search",
	user_id=user_id,
	top_k=5
	)

	assert result is not None
	assert len(result) == 1 # One result returned