File size: 5,521 Bytes
40f6dcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
Test RAG Pipeline - RAG-The-Game-Changer

Simple test to verify the implementation works.
"""

import asyncio
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


async def test_basic_functionality():
    """Test basic RAG functionality."""
    logger.info("Starting RAG pipeline test...")

    try:
        # Test imports
        logger.info("Testing imports...")
        from config import RAGPipeline, RAGConfig

        logger.info("βœ… Imports successful")

        # Test pipeline initialization
        logger.info("Testing pipeline initialization...")
        pipeline = RAGPipeline()
        logger.info("βœ… Pipeline created")

        # Test pipeline stats
        stats = await pipeline.get_stats()
        logger.info(f"βœ… Pipeline stats: {stats}")

        # Test health check
        health = await pipeline.health_check()
        logger.info(f"βœ… Health check: {health}")

        # Test document ingestion
        logger.info("Testing document ingestion...")
        test_docs = [
            {
                "content": "RAG stands for Retrieval-Augmented Generation. It combines retrieval systems with generative AI models to produce more accurate and grounded responses.",
                "metadata": {"title": "RAG Introduction", "source": "test_doc_1"},
            },
            {
                "content": "Vector databases store high-dimensional vectors and enable fast similarity search. Common examples include Pinecone, Weaviate, and FAISS.",
                "metadata": {"title": "Vector Databases", "source": "test_doc_2"},
            },
            {
                "content": "Embeddings are numerical representations of text that capture semantic meaning. They allow machines to understand text similarity and relationships.",
                "metadata": {"title": "Text Embeddings", "source": "test_doc_3"},
            },
        ]

        ingest_result = await pipeline.ingest(test_docs, chunk_strategy="semantic")
        logger.info(f"βœ… Ingestion result: {ingest_result}")

        # Test querying
        logger.info("Testing query functionality...")
        test_queries = [
            "What is RAG?",
            "How do vector databases work?",
            "What are text embeddings?",
        ]

        for query in test_queries:
            response = await pipeline.query(
                query=query, top_k=3, include_sources=True, include_confidence=True
            )

            logger.info(f"βœ… Query: {query}")
            logger.info(f"   Answer: {response.answer}")
            logger.info(f"   Confidence: {response.confidence}")
            logger.info(f"   Sources: {len(response.sources)}")
            logger.info(f"   Time: {response.total_time_ms:.2f}ms")
            logger.info("")

        logger.info("βœ… All tests completed successfully!")
        return True

    except Exception as e:
        logger.error(f"❌ Test failed: {e}")
        import traceback

        traceback.print_exc()
        return False


async def test_components():
    """Test individual components."""
    logger.info("Testing individual components...")

    try:
        # Test embedding service
        logger.info("Testing embedding service...")
        from config.embedding_configs.embedding_service import create_embedding_service

        embedder = create_embedding_service("mock", {"dimensions": 384})
        result = await embedder.embed_texts(["Hello world", "RAG test"])
        logger.info(f"βœ… Embedding service: {result.embeddings.shape}")

        # Test retriever
        logger.info("Testing retriever...")
        from retrieval_systems.dense_retriever import DenseRetriever

        retriever = DenseRetriever({"embedding_provider": "mock"})
        test_docs = [
            {"content": "This is a test document about AI", "document_id": "doc1"},
            {"content": "This is another test about machine learning", "document_id": "doc2"},
        ]
        await retriever.add_documents(test_docs)

        retrieval_result = await retriever.retrieve("What is AI?", top_k=2)
        logger.info(f"βœ… Retrieval result: {len(retrieval_result.chunks)} chunks")

        # Test chunker
        logger.info("Testing chunker...")
        from data_ingestion.chunkers.document_chunker import create_chunker

        chunker = create_chunker("semantic", {"max_chunk_size": 200})
        test_content = "This is a long test document that should be split into multiple chunks. It contains several sentences and paragraphs to test the chunking functionality."
        chunks = await chunker.chunk(test_content, {"source": "test"}, "test_doc")
        logger.info(f"βœ… Chunking result: {len(chunks)} chunks")

        logger.info("βœ… Component tests completed!")
        return True

    except Exception as e:
        logger.error(f"❌ Component test failed: {e}")
        import traceback

        traceback.print_exc()
        return False


async def main():
    """Main test function."""
    logger.info("πŸš€ Starting RAG-The-Game-Changer Tests")
    logger.info("=" * 50)

    # Run tests
    test1 = await test_components()
    test2 = await test_basic_functionality()

    # Summary
    logger.info("=" * 50)
    if test1 and test2:
        logger.info("πŸŽ‰ All tests passed!")
        return True
    else:
        logger.error("❌ Some tests failed!")
        return False


if __name__ == "__main__":
    success = asyncio.run(main())
    exit(0 if success else 1)