CarsRUS / tests /test_rag.py
galbendavids's picture
RAG: comparison by supported models, partial answer for 1 known model, stronger prompts for context aggregation
de2bc35
#!/usr/bin/env python
"""
Simple test file for RAG Engine
Tests basic initialization and search functionality
"""
import sys
import os
_tests_dir = os.path.dirname(os.path.abspath(__file__))
_project_root = os.path.dirname(_tests_dir)
if _project_root not in sys.path:
sys.path.insert(0, _project_root)
def test_initialization():
"""Test RAG engine initialization"""
print("\n" + "="*60)
print("TEST 1: RAG Engine Initialization")
print("="*60)
from rag_engine import RAGEngine
try:
engine = RAGEngine()
print(f"βœ… Engine initialized successfully")
print(f" - Chunks loaded: {len(engine.chunks)}")
print(f" - Metadata entries: {len(engine.chunk_metadata)}")
print(f" - Keyword index entries: {len(engine.keyword_index)}")
print(f" - Embeddings: {engine.embeddings}")
return True, engine
except Exception as e:
print(f"❌ Initialization failed: {e}")
import traceback
traceback.print_exc()
return False, None
def test_search(engine):
"""Test hybrid search functionality"""
print("\n" + "="*60)
print("TEST 2: Hybrid Search")
print("="*60)
try:
query = "Tell me about the Audi RS3"
print(f"Testing search for: '{query}'")
results = engine._hybrid_search(query, top_k=3)
print(f"βœ… Search successful")
print(f" - Results found: {len(results)}")
if results:
print(f" - Top result score: {results[0]['score']:.3f}")
print(f" - Top result title: {results[0]['metadata']['title']}")
return True
except Exception as e:
print(f"❌ Search failed: {e}")
import traceback
traceback.print_exc()
return False
def test_car_normalization(engine):
"""Test car name normalization"""
print("\n" + "="*60)
print("TEST 3: Car Name Normalization")
print("="*60)
test_cases = [
("Audi RS3", "audi_rs3"),
("RS3", "audi_rs3"),
("Χ§Χ™Χ” EV9", "kia_ev9"),
("Citroen C3", "citroen_c3"),
]
passed = 0
failed = 0
for text, expected in test_cases:
result = engine._normalize_car_name(text)
if result == expected:
print(f"βœ… '{text}' β†’ {result}")
passed += 1
else:
print(f"❌ '{text}' β†’ {result} (expected {expected})")
failed += 1
print(f" - Passed: {passed}/{len(test_cases)}")
return failed == 0
def test_embeddings(engine):
"""Test that embeddings are lazy loaded"""
print("\n" + "="*60)
print("TEST 4: Lazy Embedding Loading")
print("="*60)
try:
# Check initial state
if engine.embeddings is None:
print("βœ… Embeddings are None at startup (lazy loading working)")
else:
print("⚠️ Embeddings already loaded (not lazy)")
# Trigger embedding generation
query = "Test query"
engine._hybrid_search(query, top_k=1)
if engine.embeddings is not None:
print(f"βœ… Embeddings generated after first search")
print(f" - Shape: {engine.embeddings.shape}")
print(f" - Expected chunks: {len(engine.chunks)}")
return True
else:
print(f"❌ Embeddings not generated")
return False
except Exception as e:
print(f"❌ Embedding test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests"""
print("\n" + "="*60)
print("CARSRUS RAG ENGINE TEST SUITE")
print("="*60)
# Test 1: Initialization
success, engine = test_initialization()
if not success:
print("\n❌ TESTS FAILED - Initialization error")
return 1
# Test 2: Normalization
if not test_car_normalization(engine):
print("\n⚠️ Some normalization tests failed")
# Test 3: Search
if not test_search(engine):
print("\n❌ TESTS FAILED - Search error")
return 1
# Test 4: Embeddings
if not test_embeddings(engine):
print("\n⚠️ Embedding test had issues")
# Summary
print("\n" + "="*60)
print("βœ… ALL CRITICAL TESTS PASSED")
print("="*60)
print("\nRAG Engine is ready for deployment!")
print("- Initialization: βœ…")
print("- Data loading: βœ…")
print("- Search functionality: βœ…")
print("- Lazy loading: βœ…")
return 0
if __name__ == "__main__":
exit(main())