Spaces:

hugging2021
/

rag-the-game-changer

Build error

File size: 5,521 Bytes

40f6dcf

"""
Test RAG Pipeline - RAG-The-Game-Changer

Simple test to verify the implementation works.
"""

import asyncio
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


async def test_basic_functionality():
    """Test basic RAG functionality."""
    logger.info("Starting RAG pipeline test...")

    try:
        # Test imports
        logger.info("Testing imports...")
        from config import RAGPipeline, RAGConfig

        logger.info("✅ Imports successful")

        # Test pipeline initialization
        logger.info("Testing pipeline initialization...")
        pipeline = RAGPipeline()
        logger.info("✅ Pipeline created")

        # Test pipeline stats
        stats = await pipeline.get_stats()
        logger.info(f"✅ Pipeline stats: {stats}")

        # Test health check
        health = await pipeline.health_check()
        logger.info(f"✅ Health check: {health}")

        # Test document ingestion
        logger.info("Testing document ingestion...")
        test_docs = [
            {
                "content": "RAG stands for Retrieval-Augmented Generation. It combines retrieval systems with generative AI models to produce more accurate and grounded responses.",
                "metadata": {"title": "RAG Introduction", "source": "test_doc_1"},
            },
            {
                "content": "Vector databases store high-dimensional vectors and enable fast similarity search. Common examples include Pinecone, Weaviate, and FAISS.",
                "metadata": {"title": "Vector Databases", "source": "test_doc_2"},
            },
            {
                "content": "Embeddings are numerical representations of text that capture semantic meaning. They allow machines to understand text similarity and relationships.",
                "metadata": {"title": "Text Embeddings", "source": "test_doc_3"},
            },
        ]

        ingest_result = await pipeline.ingest(test_docs, chunk_strategy="semantic")
        logger.info(f"✅ Ingestion result: {ingest_result}")

        # Test querying
        logger.info("Testing query functionality...")
        test_queries = [
            "What is RAG?",
            "How do vector databases work?",
            "What are text embeddings?",
        ]

        for query in test_queries:
            response = await pipeline.query(
                query=query, top_k=3, include_sources=True, include_confidence=True
            )

            logger.info(f"✅ Query: {query}")
            logger.info(f"   Answer: {response.answer}")
            logger.info(f"   Confidence: {response.confidence}")
            logger.info(f"   Sources: {len(response.sources)}")
            logger.info(f"   Time: {response.total_time_ms:.2f}ms")
            logger.info("")

        logger.info("✅ All tests completed successfully!")
        return True

    except Exception as e:
        logger.error(f"❌ Test failed: {e}")
        import traceback

        traceback.print_exc()
        return False


async def test_components():
    """Test individual components."""
    logger.info("Testing individual components...")

    try:
        # Test embedding service
        logger.info("Testing embedding service...")
        from config.embedding_configs.embedding_service import create_embedding_service

        embedder = create_embedding_service("mock", {"dimensions": 384})
        result = await embedder.embed_texts(["Hello world", "RAG test"])
        logger.info(f"✅ Embedding service: {result.embeddings.shape}")

        # Test retriever
        logger.info("Testing retriever...")
        from retrieval_systems.dense_retriever import DenseRetriever

        retriever = DenseRetriever({"embedding_provider": "mock"})
        test_docs = [
            {"content": "This is a test document about AI", "document_id": "doc1"},
            {"content": "This is another test about machine learning", "document_id": "doc2"},
        ]
        await retriever.add_documents(test_docs)

        retrieval_result = await retriever.retrieve("What is AI?", top_k=2)
        logger.info(f"✅ Retrieval result: {len(retrieval_result.chunks)} chunks")

        # Test chunker
        logger.info("Testing chunker...")
        from data_ingestion.chunkers.document_chunker import create_chunker

        chunker = create_chunker("semantic", {"max_chunk_size": 200})
        test_content = "This is a long test document that should be split into multiple chunks. It contains several sentences and paragraphs to test the chunking functionality."
        chunks = await chunker.chunk(test_content, {"source": "test"}, "test_doc")
        logger.info(f"✅ Chunking result: {len(chunks)} chunks")

        logger.info("✅ Component tests completed!")
        return True

    except Exception as e:
        logger.error(f"❌ Component test failed: {e}")
        import traceback

        traceback.print_exc()
        return False


async def main():
    """Main test function."""
    logger.info("🚀 Starting RAG-The-Game-Changer Tests")
    logger.info("=" * 50)

    # Run tests
    test1 = await test_components()
    test2 = await test_basic_functionality()

    # Summary
    logger.info("=" * 50)
    if test1 and test2:
        logger.info("🎉 All tests passed!")
        return True
    else:
        logger.error("❌ Some tests failed!")
        return False


if __name__ == "__main__":
    success = asyncio.run(main())
    exit(0 if success else 1)