Spaces:
Build error
Build error
File size: 5,521 Bytes
40f6dcf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | """
Test RAG Pipeline - RAG-The-Game-Changer
Simple test to verify the implementation works.
"""
import asyncio
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def test_basic_functionality():
"""Test basic RAG functionality."""
logger.info("Starting RAG pipeline test...")
try:
# Test imports
logger.info("Testing imports...")
from config import RAGPipeline, RAGConfig
logger.info("β
Imports successful")
# Test pipeline initialization
logger.info("Testing pipeline initialization...")
pipeline = RAGPipeline()
logger.info("β
Pipeline created")
# Test pipeline stats
stats = await pipeline.get_stats()
logger.info(f"β
Pipeline stats: {stats}")
# Test health check
health = await pipeline.health_check()
logger.info(f"β
Health check: {health}")
# Test document ingestion
logger.info("Testing document ingestion...")
test_docs = [
{
"content": "RAG stands for Retrieval-Augmented Generation. It combines retrieval systems with generative AI models to produce more accurate and grounded responses.",
"metadata": {"title": "RAG Introduction", "source": "test_doc_1"},
},
{
"content": "Vector databases store high-dimensional vectors and enable fast similarity search. Common examples include Pinecone, Weaviate, and FAISS.",
"metadata": {"title": "Vector Databases", "source": "test_doc_2"},
},
{
"content": "Embeddings are numerical representations of text that capture semantic meaning. They allow machines to understand text similarity and relationships.",
"metadata": {"title": "Text Embeddings", "source": "test_doc_3"},
},
]
ingest_result = await pipeline.ingest(test_docs, chunk_strategy="semantic")
logger.info(f"β
Ingestion result: {ingest_result}")
# Test querying
logger.info("Testing query functionality...")
test_queries = [
"What is RAG?",
"How do vector databases work?",
"What are text embeddings?",
]
for query in test_queries:
response = await pipeline.query(
query=query, top_k=3, include_sources=True, include_confidence=True
)
logger.info(f"β
Query: {query}")
logger.info(f" Answer: {response.answer}")
logger.info(f" Confidence: {response.confidence}")
logger.info(f" Sources: {len(response.sources)}")
logger.info(f" Time: {response.total_time_ms:.2f}ms")
logger.info("")
logger.info("β
All tests completed successfully!")
return True
except Exception as e:
logger.error(f"β Test failed: {e}")
import traceback
traceback.print_exc()
return False
async def test_components():
"""Test individual components."""
logger.info("Testing individual components...")
try:
# Test embedding service
logger.info("Testing embedding service...")
from config.embedding_configs.embedding_service import create_embedding_service
embedder = create_embedding_service("mock", {"dimensions": 384})
result = await embedder.embed_texts(["Hello world", "RAG test"])
logger.info(f"β
Embedding service: {result.embeddings.shape}")
# Test retriever
logger.info("Testing retriever...")
from retrieval_systems.dense_retriever import DenseRetriever
retriever = DenseRetriever({"embedding_provider": "mock"})
test_docs = [
{"content": "This is a test document about AI", "document_id": "doc1"},
{"content": "This is another test about machine learning", "document_id": "doc2"},
]
await retriever.add_documents(test_docs)
retrieval_result = await retriever.retrieve("What is AI?", top_k=2)
logger.info(f"β
Retrieval result: {len(retrieval_result.chunks)} chunks")
# Test chunker
logger.info("Testing chunker...")
from data_ingestion.chunkers.document_chunker import create_chunker
chunker = create_chunker("semantic", {"max_chunk_size": 200})
test_content = "This is a long test document that should be split into multiple chunks. It contains several sentences and paragraphs to test the chunking functionality."
chunks = await chunker.chunk(test_content, {"source": "test"}, "test_doc")
logger.info(f"β
Chunking result: {len(chunks)} chunks")
logger.info("β
Component tests completed!")
return True
except Exception as e:
logger.error(f"β Component test failed: {e}")
import traceback
traceback.print_exc()
return False
async def main():
"""Main test function."""
logger.info("π Starting RAG-The-Game-Changer Tests")
logger.info("=" * 50)
# Run tests
test1 = await test_components()
test2 = await test_basic_functionality()
# Summary
logger.info("=" * 50)
if test1 and test2:
logger.info("π All tests passed!")
return True
else:
logger.error("β Some tests failed!")
return False
if __name__ == "__main__":
success = asyncio.run(main())
exit(0 if success else 1)
|