Spaces:
Build error
Build error
| """ | |
| Test RAG Pipeline - RAG-The-Game-Changer | |
| Simple test to verify the implementation works. | |
| """ | |
| import asyncio | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| async def test_basic_functionality(): | |
| """Test basic RAG functionality.""" | |
| logger.info("Starting RAG pipeline test...") | |
| try: | |
| # Test imports | |
| logger.info("Testing imports...") | |
| from config import RAGPipeline, RAGConfig | |
| logger.info("β Imports successful") | |
| # Test pipeline initialization | |
| logger.info("Testing pipeline initialization...") | |
| pipeline = RAGPipeline() | |
| logger.info("β Pipeline created") | |
| # Test pipeline stats | |
| stats = await pipeline.get_stats() | |
| logger.info(f"β Pipeline stats: {stats}") | |
| # Test health check | |
| health = await pipeline.health_check() | |
| logger.info(f"β Health check: {health}") | |
| # Test document ingestion | |
| logger.info("Testing document ingestion...") | |
| test_docs = [ | |
| { | |
| "content": "RAG stands for Retrieval-Augmented Generation. It combines retrieval systems with generative AI models to produce more accurate and grounded responses.", | |
| "metadata": {"title": "RAG Introduction", "source": "test_doc_1"}, | |
| }, | |
| { | |
| "content": "Vector databases store high-dimensional vectors and enable fast similarity search. Common examples include Pinecone, Weaviate, and FAISS.", | |
| "metadata": {"title": "Vector Databases", "source": "test_doc_2"}, | |
| }, | |
| { | |
| "content": "Embeddings are numerical representations of text that capture semantic meaning. They allow machines to understand text similarity and relationships.", | |
| "metadata": {"title": "Text Embeddings", "source": "test_doc_3"}, | |
| }, | |
| ] | |
| ingest_result = await pipeline.ingest(test_docs, chunk_strategy="semantic") | |
| logger.info(f"β Ingestion result: {ingest_result}") | |
| # Test querying | |
| logger.info("Testing query functionality...") | |
| test_queries = [ | |
| "What is RAG?", | |
| "How do vector databases work?", | |
| "What are text embeddings?", | |
| ] | |
| for query in test_queries: | |
| response = await pipeline.query( | |
| query=query, top_k=3, include_sources=True, include_confidence=True | |
| ) | |
| logger.info(f"β Query: {query}") | |
| logger.info(f" Answer: {response.answer}") | |
| logger.info(f" Confidence: {response.confidence}") | |
| logger.info(f" Sources: {len(response.sources)}") | |
| logger.info(f" Time: {response.total_time_ms:.2f}ms") | |
| logger.info("") | |
| logger.info("β All tests completed successfully!") | |
| return True | |
| except Exception as e: | |
| logger.error(f"β Test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| async def test_components(): | |
| """Test individual components.""" | |
| logger.info("Testing individual components...") | |
| try: | |
| # Test embedding service | |
| logger.info("Testing embedding service...") | |
| from config.embedding_configs.embedding_service import create_embedding_service | |
| embedder = create_embedding_service("mock", {"dimensions": 384}) | |
| result = await embedder.embed_texts(["Hello world", "RAG test"]) | |
| logger.info(f"β Embedding service: {result.embeddings.shape}") | |
| # Test retriever | |
| logger.info("Testing retriever...") | |
| from retrieval_systems.dense_retriever import DenseRetriever | |
| retriever = DenseRetriever({"embedding_provider": "mock"}) | |
| test_docs = [ | |
| {"content": "This is a test document about AI", "document_id": "doc1"}, | |
| {"content": "This is another test about machine learning", "document_id": "doc2"}, | |
| ] | |
| await retriever.add_documents(test_docs) | |
| retrieval_result = await retriever.retrieve("What is AI?", top_k=2) | |
| logger.info(f"β Retrieval result: {len(retrieval_result.chunks)} chunks") | |
| # Test chunker | |
| logger.info("Testing chunker...") | |
| from data_ingestion.chunkers.document_chunker import create_chunker | |
| chunker = create_chunker("semantic", {"max_chunk_size": 200}) | |
| test_content = "This is a long test document that should be split into multiple chunks. It contains several sentences and paragraphs to test the chunking functionality." | |
| chunks = await chunker.chunk(test_content, {"source": "test"}, "test_doc") | |
| logger.info(f"β Chunking result: {len(chunks)} chunks") | |
| logger.info("β Component tests completed!") | |
| return True | |
| except Exception as e: | |
| logger.error(f"β Component test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| async def main(): | |
| """Main test function.""" | |
| logger.info("π Starting RAG-The-Game-Changer Tests") | |
| logger.info("=" * 50) | |
| # Run tests | |
| test1 = await test_components() | |
| test2 = await test_basic_functionality() | |
| # Summary | |
| logger.info("=" * 50) | |
| if test1 and test2: | |
| logger.info("π All tests passed!") | |
| return True | |
| else: | |
| logger.error("β Some tests failed!") | |
| return False | |
| if __name__ == "__main__": | |
| success = asyncio.run(main()) | |
| exit(0 if success else 1) | |