""" Test DM Guide RAG Quality Comprehensive tests to verify that DM Guide ingestion provides high-quality retrieval for magic items, rules, treasure, and mechanics. This directly addresses the HIGH PRIORITY TODO item: "Improve RAG Data for Equipment, Abilities & Class Features" """ import pytest import sys from pathlib import Path # Add project to path sys.path.insert(0, str(Path(__file__).parent.parent)) from dnd_rag_system.core.chroma_manager import ChromaDBManager class TestDMGuideRAGQuality: """Test suite for DM Guide RAG retrieval quality.""" @pytest.fixture(autouse=True) def setup(self): """Initialize ChromaDB manager for all tests.""" self.db_manager = ChromaDBManager() # ========================================================================= # MAGIC ITEMS TESTS (HIGH PRIORITY) # ========================================================================= def test_ring_of_protection_retrieval(self): """Test that 'Ring of Protection' query returns ring content.""" results = self.db_manager.search('dm_guide', 'Ring of Protection', n_results=3) assert results['documents'], "No results returned" assert len(results['documents'][0]) > 0, "Empty results" # Check that at least one result mentions rings top_result = results['documents'][0][0].lower() assert 'ring' in top_result, "Top result doesn't mention rings" # Check relevance (distance should be reasonable) top_distance = results['distances'][0][0] assert top_distance < 1.5, f"Top result distance too high: {top_distance}" def test_magic_weapon_queries(self): """Test queries for magic weapons like +1, +2, Flametongue, etc.""" test_queries = [ "magic sword +1", "flaming weapon", "vorpal blade" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) assert results['documents'], f"No results for query: {query}" assert len(results['documents'][0]) > 0, f"Empty results for: {query}" # Distance check top_distance = results['distances'][0][0] assert top_distance < 2.0, f"Distance too high for '{query}': {top_distance}" def test_wondrous_items_retrieval(self): """Test retrieval of wondrous items like Bag of Holding, Immovable Rod.""" test_queries = [ "Bag of Holding", "Immovable Rod", "wondrous items" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) assert results['documents'], f"No results for: {query}" assert len(results['documents'][0]) > 0, f"Empty results for: {query}" def test_potion_retrieval(self): """Test retrieval of potions beyond basic healing.""" test_queries = [ "potion of invisibility", "potion of flying", "elixir" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) assert results['documents'], f"No results for: {query}" def test_magic_armor_retrieval(self): """Test retrieval of magic armor.""" test_queries = [ "magic armor +1", "armor of resistance", "plate armor enchanted" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) assert results['documents'], f"No results for: {query}" # ========================================================================= # RULES & MECHANICS TESTS # ========================================================================= def test_combat_rules_retrieval(self): """Test retrieval of combat rules and mechanics.""" test_queries = [ "grappling rules", "cover and concealment", "flanking bonus" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) # Just verify we get results - content may vary assert results['documents'], f"No results for: {query}" def test_condition_mechanics_retrieval(self): """Test retrieval of condition mechanics (stunned, paralyzed, etc.).""" test_queries = [ "paralyzed condition", "stunned mechanics", "restrained condition" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) assert results['documents'], f"No results for: {query}" # ========================================================================= # TREASURE & REWARDS TESTS # ========================================================================= def test_treasure_hoard_retrieval(self): """Test retrieval of treasure tables and loot generation.""" results = self.db_manager.search('dm_guide', 'treasure hoard', n_results=3) assert results['documents'], "No treasure results" assert len(results['documents'][0]) > 0 # Check distance top_distance = results['distances'][0][0] assert top_distance < 1.5, f"Treasure query distance too high: {top_distance}" def test_loot_generation_queries(self): """Test queries related to loot and rewards.""" test_queries = [ "treasure by challenge rating", "random loot table", "gem values" ] for query in test_queries: results = self.db_manager.search('dm_guide', query, n_results=2) assert results['documents'], f"No results for: {query}" # ========================================================================= # METADATA QUALITY TESTS # ========================================================================= def test_metadata_completeness(self): """Verify that chunks have complete metadata.""" results = self.db_manager.search('dm_guide', 'magic items', n_results=5) assert results['metadatas'], "No metadata returned" for metadata in results['metadatas'][0]: # Check required fields assert 'source' in metadata, "Missing 'source' in metadata" assert metadata['source'] == 'dm_guide', "Incorrect source" assert 'section' in metadata, "Missing 'section' in metadata" assert 'page_start' in metadata, "Missing 'page_start' in metadata" assert 'page_end' in metadata, "Missing 'page_end' in metadata" assert 'content_type' in metadata, "Missing 'content_type' in metadata" def test_magic_item_tags_present(self): """Verify that magic item chunks are properly tagged.""" results = self.db_manager.search('dm_guide', 'Ring of Protection', n_results=3) assert results['metadatas'], "No metadata returned" # At least one result should have magic_items tag # Note: tags are stored in chunk.tags, but may not be in metadata # This test verifies we can find magic item content found_magic_item_content = False for doc in results['documents'][0]: if 'magic' in doc.lower() or 'ring' in doc.lower(): found_magic_item_content = True break assert found_magic_item_content, "No magic item content found in top results" # ========================================================================= # RETRIEVAL QUALITY TESTS # ========================================================================= def test_top_result_relevance(self): """Test that top results are highly relevant (low distance).""" high_quality_queries = [ ("Ring of Protection", 1.5), ("magic items", 1.3), ("treasure", 1.4), ] for query, max_distance in high_quality_queries: results = self.db_manager.search('dm_guide', query, n_results=1) assert results['documents'], f"No results for: {query}" top_distance = results['distances'][0][0] assert top_distance < max_distance, \ f"Query '{query}' top result distance {top_distance:.3f} exceeds {max_distance}" def test_no_empty_chunks(self): """Verify that no chunks are empty or too short.""" # Get a sample of chunks results = self.db_manager.search('dm_guide', 'magic', n_results=10) assert results['documents'], "No results returned" for doc in results['documents'][0]: assert len(doc) > 100, f"Chunk too short: {len(doc)} chars" assert doc.strip(), "Empty chunk found" def test_page_numbers_valid(self): """Verify that page numbers are valid and in order.""" results = self.db_manager.search('dm_guide', 'magic items', n_results=10) assert results['metadatas'], "No metadata returned" for metadata in results['metadatas'][0]: page_start = metadata.get('page_start', 0) page_end = metadata.get('page_end', 0) assert page_start > 0, "Invalid page_start" assert page_end > 0, "Invalid page_end" assert page_start <= page_end, f"Page range invalid: {page_start}-{page_end}" assert page_end - page_start < 10, f"Page range too large: {page_start}-{page_end}" # ========================================================================= # CROSS-COLLECTION TESTS # ========================================================================= def test_search_all_includes_dm_guide(self): """Test that search_all() includes dm_guide results.""" all_results = self.db_manager.search_all( 'Ring of Protection', n_results_per_collection=2 ) assert 'dm_guide' in all_results, "dm_guide not in search_all results" assert all_results['dm_guide']['documents'], "dm_guide returned no results" assert len(all_results['dm_guide']['documents'][0]) > 0, "dm_guide results empty" def test_magic_item_query_across_collections(self): """Test that magic item queries work across all collections.""" all_results = self.db_manager.search_all( 'magic sword', n_results_per_collection=2 ) # Should get results from dm_guide (and possibly equipment if it exists) assert 'dm_guide' in all_results dm_guide_results = all_results['dm_guide'] assert dm_guide_results['documents'], "No dm_guide results for 'magic sword'" def test_spell_and_magic_item_combined_query(self): """Test query that could match both spells and magic items.""" # Query for something that appears in both contexts all_results = self.db_manager.search_all( 'invisibility', n_results_per_collection=2 ) # Should get results from both spells and dm_guide assert 'dnd_spells' in all_results, "No spell results" assert 'dm_guide' in all_results, "No dm_guide results" # Both should have content assert all_results['dnd_spells']['documents'][0], "Empty spell results" assert all_results['dm_guide']['documents'][0], "Empty dm_guide results" # ========================================================================= # COLLECTION STATISTICS TESTS # ========================================================================= def test_dm_guide_collection_exists(self): """Verify dm_guide collection exists and has documents.""" stats = self.db_manager.get_collection_stats('dm_guide') assert stats, "No stats returned" assert 'total_documents' in stats, "Missing total_documents in stats" assert stats['total_documents'] > 0, "dm_guide collection is empty" # Should have around 95 chunks (3 pages per chunk for ~285 pages) assert stats['total_documents'] >= 80, \ f"Too few documents: {stats['total_documents']}, expected ~95" assert stats['total_documents'] <= 110, \ f"Too many documents: {stats['total_documents']}, expected ~95" def test_chunk_types_correct(self): """Verify chunk types are properly set.""" stats = self.db_manager.get_collection_stats('dm_guide') assert 'chunk_types' in stats, "Missing chunk_types in stats" assert 'dm_guide_section' in stats['chunk_types'], \ "dm_guide_section chunk type not found" # All chunks should be dm_guide_section type total_chunks = stats['total_documents'] section_chunks = stats['chunk_types']['dm_guide_section'] assert section_chunks == total_chunks, \ f"Chunk type mismatch: {section_chunks} != {total_chunks}" # ========================================================================= # SPECIFIC USE CASE TESTS (from TODO.md) # ========================================================================= def test_player_asks_for_magic_ring(self): """ Simulate: Player asks 'What magic rings are available?' This directly addresses TODO item: Missing magic items data """ results = self.db_manager.search('dm_guide', 'magic rings available', n_results=3) assert results['documents'], "No results for magic rings query" # Should find ring-related content combined_text = ' '.join(results['documents'][0]).lower() assert 'ring' in combined_text, "No ring content found" # Check quality top_distance = results['distances'][0][0] assert top_distance < 1.5, f"Magic rings query quality poor: {top_distance}" def test_gm_needs_treasure_for_cr5_encounter(self): """ Simulate: GM needs treasure for a CR 5 encounter """ results = self.db_manager.search( 'dm_guide', 'treasure for challenge rating 5 encounter', n_results=3 ) assert results['documents'], "No treasure results" # Verify we got treasure-related content combined_text = ' '.join(results['documents'][0]).lower() assert any(word in combined_text for word in ['treasure', 'loot', 'gold', 'reward']), \ "No treasure-related content found" def test_player_finds_wondrous_item_identification(self): """ Simulate: Player finds unknown wondrous item and wants to identify it """ results = self.db_manager.search( 'dm_guide', 'identify unknown wondrous item', n_results=3 ) assert results['documents'], "No identification results" # ========================================================================= # PERFORMANCE TESTS # ========================================================================= def test_query_response_time(self): """Verify that queries complete in reasonable time.""" import time start = time.time() results = self.db_manager.search('dm_guide', 'magic items', n_results=5) elapsed = time.time() - start assert results['documents'], "No results returned" assert elapsed < 2.0, f"Query took too long: {elapsed:.2f}s" def test_batch_query_performance(self): """Test that multiple queries can be performed efficiently.""" import time queries = [ 'Ring of Protection', 'magic sword', 'treasure hoard', 'potion of healing', 'wondrous items' ] start = time.time() for query in queries: self.db_manager.search('dm_guide', query, n_results=2) elapsed = time.time() - start assert elapsed < 5.0, f"Batch queries took too long: {elapsed:.2f}s" # ============================================================================= # INTEGRATION TESTS # ============================================================================= class TestDMGuideIntegration: """Integration tests for DM Guide with other collections.""" @pytest.fixture(autouse=True) def setup(self): """Initialize ChromaDB manager.""" self.db_manager = ChromaDBManager() def test_all_collections_accessible(self): """Verify all collections are accessible.""" stats = self.db_manager.get_all_stats() assert 'collections' in stats, "No collections in stats" # Check that major collections exist collections = stats['collections'] assert 'dnd_spells' in collections, "Missing spells collection" assert 'dnd_monsters' in collections, "Missing monsters collection" assert 'dm_guide' in collections, "Missing dm_guide collection" def test_total_rag_coverage(self): """Calculate total RAG coverage across all collections.""" stats = self.db_manager.get_all_stats() total_docs = stats.get('total_documents', 0) print(f"\nšŸ“Š Total RAG Documents: {total_docs}") for collection_name, col_stats in stats['collections'].items(): doc_count = col_stats.get('total_documents', 0) print(f" {collection_name}: {doc_count} docs") # Verify we have substantial coverage assert total_docs > 900, f"Total RAG coverage too low: {total_docs} docs" if __name__ == '__main__': # Run with: python -m pytest tests/test_dm_guide_rag_quality.py -v pytest.main([__file__, '-v', '--tb=short'])