Spaces:
Build error
Build error
| import sys | |
| import os | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| import unittest | |
| from unittest.mock import MagicMock, patch, mock_open | |
| import pinecone | |
| from langchain.schema import Document | |
| from core.rag_engine import RAGPrep | |
| from typing import List, Dict, Optional | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader | |
| from langchain_openai import OpenAIEmbeddings | |
| import pinecone | |
| from tqdm.auto import tqdm | |
| from langchain.schema import Document | |
| from config import get_settings | |
| class TestRAGPrep(unittest.TestCase): | |
| def setUp(self): | |
| """Set up test fixtures""" | |
| self.settings = get_settings() | |
| self.mock_settings = MagicMock() | |
| self.mock_settings.INDEX_NAME = "test-index" | |
| self.mock_settings.PINECONE_API_KEY = self.settings.PINECONE_API_KEY | |
| self.mock_settings.CLOUD = "aws" | |
| self.mock_settings.REGION = "us-east-1" | |
| self.mock_settings.PDF_DIRECTORY = self.settings.PDF_DIRECTORY | |
| self.mock_settings.CHUNK_SIZE = 1000 | |
| self.mock_settings.CHUNK_OVERLAP = 200 | |
| self.mock_settings.DIMENSIONS = 1536 | |
| self.mock_settings.OPENAI_API_KEY = self.settings.OPENAI_API_KEY | |
| # Create patcher for get_settings and other dependencies | |
| self.settings_patcher = patch('core.rag_engine.get_settings', return_value=self.mock_settings) | |
| self.embeddings_patcher = patch('core.rag_engine.OpenAIEmbeddings') | |
| self.pinecone_patcher = patch('core.rag_engine.pinecone.Pinecone') | |
| # Start all patchers | |
| self.mock_get_settings = self.settings_patcher.start() | |
| self.mock_embeddings = self.embeddings_patcher.start() | |
| self.mock_pinecone = self.pinecone_patcher.start() | |
| def tearDown(self): | |
| """Clean up after tests""" | |
| self.settings_patcher.stop() | |
| self.embeddings_patcher.stop() | |
| self.pinecone_patcher.stop() | |
| def test_init(self): | |
| """Test RAGPrep initialization""" | |
| # Create instance | |
| rag_prep = RAGPrep() | |
| # Assert initialization | |
| self.assertEqual(rag_prep.index_name, "test-index") | |
| self.assertEqual(rag_prep.settings, self.mock_settings) | |
| self.mock_pinecone.assert_called_once_with(self.mock_settings.PINECONE_API_KEY) | |
| self.mock_embeddings.assert_called_once_with(openai_api_key=self.mock_settings.OPENAI_API_KEY) | |
| def test_load_and_split_pdfs(self, mock_loader_class): | |
| """Test PDF loading and splitting""" | |
| # Setup mock documents | |
| mock_docs = [ | |
| Document(page_content="Test content 1", metadata={"source": "test1.pdf", "page": 1}), | |
| Document(page_content="Test content 2", metadata={"source": "test2.pdf", "page": 1}) | |
| ] | |
| # Configure the mock loader | |
| mock_loader_instance = MagicMock() | |
| mock_loader_instance.load.return_value = mock_docs | |
| mock_loader_class.return_value = mock_loader_instance | |
| # Create instance and test | |
| rag_prep = RAGPrep() | |
| chunks = rag_prep.load_and_split_pdfs() | |
| # Assertions | |
| self.assertIsInstance(chunks, list) | |
| mock_loader_class.assert_called_once_with( | |
| self.mock_settings.PDF_DIRECTORY, | |
| glob="**/*.pdf", | |
| loader_cls=PyPDFLoader | |
| ) | |
| mock_loader_instance.load.assert_called_once() | |
| def test_process_and_upload(self): | |
| """Test processing and uploading documents""" | |
| # Setup mock documents | |
| mock_docs = [ | |
| Document(page_content="Test 1", metadata={"source": "test.pdf", "page": 1}), | |
| Document(page_content="Test 2", metadata={"source": "test.pdf", "page": 2}) | |
| ] | |
| # Create mock embeddings instance | |
| mock_embeddings_instance = MagicMock() | |
| mock_embeddings_instance.embed_documents.return_value = [[0.1] * 1536, [0.2] * 1536] | |
| self.mock_embeddings.return_value = mock_embeddings_instance | |
| # Mock the index | |
| mock_index = MagicMock() | |
| self.mock_pinecone.return_value.Index.return_value = mock_index | |
| # Mock load_and_split_pdfs | |
| with patch.object(RAGPrep, 'load_and_split_pdfs', return_value=mock_docs): | |
| # Create instance and test | |
| rag_prep = RAGPrep() | |
| rag_prep.process_and_upload() | |
| # Assertions | |
| mock_embeddings_instance.embed_documents.assert_called_once() | |
| self.assertTrue(mock_index.upsert.called) | |
| # Verify the format of the upsert call | |
| called_args = mock_index.upsert.call_args[1]['vectors'] | |
| self.assertEqual(len(called_args), 2) # Two documents | |
| self.assertTrue(all(len(v[1]) == 1536 for v in called_args)) | |
| def test_cleanup_index_success(self): | |
| """Test successful index cleanup""" | |
| with patch('pinecone.Pinecone') as mock_pinecone: | |
| # Setup mock | |
| mock_pc = mock_pinecone.return_value | |
| mock_pc.list_indexes.return_value.names.return_value = ["test-index"] | |
| mock_index = MagicMock() | |
| mock_pc.Index.return_value = mock_index | |
| # Create instance and test | |
| rag_prep = RAGPrep() | |
| result = rag_prep.cleanup_index() | |
| # Assertions | |
| self.assertTrue(result) | |
| mock_index.delete.assert_called_once_with(delete_all=True) | |
| def test_cleanup_index_no_index(self): | |
| """Test cleanup when index doesn't exist""" | |
| with patch('pinecone.Pinecone') as mock_pinecone: | |
| # Setup mock | |
| mock_pc = mock_pinecone.return_value | |
| mock_pc.list_indexes.return_value.names.return_value = [] | |
| # Create instance and test | |
| rag_prep = RAGPrep() | |
| result = rag_prep.cleanup_index() | |
| # Assertions | |
| self.assertTrue(result) | |
| mock_pc.Index.assert_not_called() | |
| def test_cleanup_index_error(self): | |
| """Test cleanup with error""" | |
| with patch('pinecone.Pinecone') as mock_pinecone: | |
| # Setup mock to raise exception | |
| mock_pc = mock_pinecone.return_value | |
| mock_pc.list_indexes.return_value.names.return_value = ["test-index"] | |
| mock_pc.Index.side_effect = Exception("Test error") | |
| # Create instance and test | |
| rag_prep = RAGPrep() | |
| result = rag_prep.cleanup_index() | |
| # Assertions | |
| self.assertFalse(result) | |
| if __name__ == '__main__': | |
| unittest.main() |