File size: 17,746 Bytes
021570c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
"""
Test DM Guide RAG Quality

Comprehensive tests to verify that DM Guide ingestion provides
high-quality retrieval for magic items, rules, treasure, and mechanics.

This directly addresses the HIGH PRIORITY TODO item:
"Improve RAG Data for Equipment, Abilities & Class Features"
"""

import pytest
import sys
from pathlib import Path

# Add project to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from dnd_rag_system.core.chroma_manager import ChromaDBManager


class TestDMGuideRAGQuality:
    """Test suite for DM Guide RAG retrieval quality."""

    @pytest.fixture(autouse=True)
    def setup(self):
        """Initialize ChromaDB manager for all tests."""
        self.db_manager = ChromaDBManager()

    # =========================================================================
    # MAGIC ITEMS TESTS (HIGH PRIORITY)
    # =========================================================================

    def test_ring_of_protection_retrieval(self):
        """Test that 'Ring of Protection' query returns ring content."""
        results = self.db_manager.search('dm_guide', 'Ring of Protection', n_results=3)

        assert results['documents'], "No results returned"
        assert len(results['documents'][0]) > 0, "Empty results"

        # Check that at least one result mentions rings
        top_result = results['documents'][0][0].lower()
        assert 'ring' in top_result, "Top result doesn't mention rings"

        # Check relevance (distance should be reasonable)
        top_distance = results['distances'][0][0]
        assert top_distance < 1.5, f"Top result distance too high: {top_distance}"

    def test_magic_weapon_queries(self):
        """Test queries for magic weapons like +1, +2, Flametongue, etc."""
        test_queries = [
            "magic sword +1",
            "flaming weapon",
            "vorpal blade"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            assert results['documents'], f"No results for query: {query}"
            assert len(results['documents'][0]) > 0, f"Empty results for: {query}"

            # Distance check
            top_distance = results['distances'][0][0]
            assert top_distance < 2.0, f"Distance too high for '{query}': {top_distance}"

    def test_wondrous_items_retrieval(self):
        """Test retrieval of wondrous items like Bag of Holding, Immovable Rod."""
        test_queries = [
            "Bag of Holding",
            "Immovable Rod",
            "wondrous items"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            assert results['documents'], f"No results for: {query}"
            assert len(results['documents'][0]) > 0, f"Empty results for: {query}"

    def test_potion_retrieval(self):
        """Test retrieval of potions beyond basic healing."""
        test_queries = [
            "potion of invisibility",
            "potion of flying",
            "elixir"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            assert results['documents'], f"No results for: {query}"

    def test_magic_armor_retrieval(self):
        """Test retrieval of magic armor."""
        test_queries = [
            "magic armor +1",
            "armor of resistance",
            "plate armor enchanted"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            assert results['documents'], f"No results for: {query}"

    # =========================================================================
    # RULES & MECHANICS TESTS
    # =========================================================================

    def test_combat_rules_retrieval(self):
        """Test retrieval of combat rules and mechanics."""
        test_queries = [
            "grappling rules",
            "cover and concealment",
            "flanking bonus"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            # Just verify we get results - content may vary
            assert results['documents'], f"No results for: {query}"

    def test_condition_mechanics_retrieval(self):
        """Test retrieval of condition mechanics (stunned, paralyzed, etc.)."""
        test_queries = [
            "paralyzed condition",
            "stunned mechanics",
            "restrained condition"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            assert results['documents'], f"No results for: {query}"

    # =========================================================================
    # TREASURE & REWARDS TESTS
    # =========================================================================

    def test_treasure_hoard_retrieval(self):
        """Test retrieval of treasure tables and loot generation."""
        results = self.db_manager.search('dm_guide', 'treasure hoard', n_results=3)

        assert results['documents'], "No treasure results"
        assert len(results['documents'][0]) > 0

        # Check distance
        top_distance = results['distances'][0][0]
        assert top_distance < 1.5, f"Treasure query distance too high: {top_distance}"

    def test_loot_generation_queries(self):
        """Test queries related to loot and rewards."""
        test_queries = [
            "treasure by challenge rating",
            "random loot table",
            "gem values"
        ]

        for query in test_queries:
            results = self.db_manager.search('dm_guide', query, n_results=2)

            assert results['documents'], f"No results for: {query}"

    # =========================================================================
    # METADATA QUALITY TESTS
    # =========================================================================

    def test_metadata_completeness(self):
        """Verify that chunks have complete metadata."""
        results = self.db_manager.search('dm_guide', 'magic items', n_results=5)

        assert results['metadatas'], "No metadata returned"

        for metadata in results['metadatas'][0]:
            # Check required fields
            assert 'source' in metadata, "Missing 'source' in metadata"
            assert metadata['source'] == 'dm_guide', "Incorrect source"

            assert 'section' in metadata, "Missing 'section' in metadata"
            assert 'page_start' in metadata, "Missing 'page_start' in metadata"
            assert 'page_end' in metadata, "Missing 'page_end' in metadata"
            assert 'content_type' in metadata, "Missing 'content_type' in metadata"

    def test_magic_item_tags_present(self):
        """Verify that magic item chunks are properly tagged."""
        results = self.db_manager.search('dm_guide', 'Ring of Protection', n_results=3)

        assert results['metadatas'], "No metadata returned"

        # At least one result should have magic_items tag
        # Note: tags are stored in chunk.tags, but may not be in metadata
        # This test verifies we can find magic item content
        found_magic_item_content = False
        for doc in results['documents'][0]:
            if 'magic' in doc.lower() or 'ring' in doc.lower():
                found_magic_item_content = True
                break

        assert found_magic_item_content, "No magic item content found in top results"

    # =========================================================================
    # RETRIEVAL QUALITY TESTS
    # =========================================================================

    def test_top_result_relevance(self):
        """Test that top results are highly relevant (low distance)."""
        high_quality_queries = [
            ("Ring of Protection", 1.5),
            ("magic items", 1.3),
            ("treasure", 1.4),
        ]

        for query, max_distance in high_quality_queries:
            results = self.db_manager.search('dm_guide', query, n_results=1)

            assert results['documents'], f"No results for: {query}"

            top_distance = results['distances'][0][0]
            assert top_distance < max_distance, \
                f"Query '{query}' top result distance {top_distance:.3f} exceeds {max_distance}"

    def test_no_empty_chunks(self):
        """Verify that no chunks are empty or too short."""
        # Get a sample of chunks
        results = self.db_manager.search('dm_guide', 'magic', n_results=10)

        assert results['documents'], "No results returned"

        for doc in results['documents'][0]:
            assert len(doc) > 100, f"Chunk too short: {len(doc)} chars"
            assert doc.strip(), "Empty chunk found"

    def test_page_numbers_valid(self):
        """Verify that page numbers are valid and in order."""
        results = self.db_manager.search('dm_guide', 'magic items', n_results=10)

        assert results['metadatas'], "No metadata returned"

        for metadata in results['metadatas'][0]:
            page_start = metadata.get('page_start', 0)
            page_end = metadata.get('page_end', 0)

            assert page_start > 0, "Invalid page_start"
            assert page_end > 0, "Invalid page_end"
            assert page_start <= page_end, f"Page range invalid: {page_start}-{page_end}"
            assert page_end - page_start < 10, f"Page range too large: {page_start}-{page_end}"

    # =========================================================================
    # CROSS-COLLECTION TESTS
    # =========================================================================

    def test_search_all_includes_dm_guide(self):
        """Test that search_all() includes dm_guide results."""
        all_results = self.db_manager.search_all(
            'Ring of Protection',
            n_results_per_collection=2
        )

        assert 'dm_guide' in all_results, "dm_guide not in search_all results"
        assert all_results['dm_guide']['documents'], "dm_guide returned no results"
        assert len(all_results['dm_guide']['documents'][0]) > 0, "dm_guide results empty"

    def test_magic_item_query_across_collections(self):
        """Test that magic item queries work across all collections."""
        all_results = self.db_manager.search_all(
            'magic sword',
            n_results_per_collection=2
        )

        # Should get results from dm_guide (and possibly equipment if it exists)
        assert 'dm_guide' in all_results
        dm_guide_results = all_results['dm_guide']

        assert dm_guide_results['documents'], "No dm_guide results for 'magic sword'"

    def test_spell_and_magic_item_combined_query(self):
        """Test query that could match both spells and magic items."""
        # Query for something that appears in both contexts
        all_results = self.db_manager.search_all(
            'invisibility',
            n_results_per_collection=2
        )

        # Should get results from both spells and dm_guide
        assert 'dnd_spells' in all_results, "No spell results"
        assert 'dm_guide' in all_results, "No dm_guide results"

        # Both should have content
        assert all_results['dnd_spells']['documents'][0], "Empty spell results"
        assert all_results['dm_guide']['documents'][0], "Empty dm_guide results"

    # =========================================================================
    # COLLECTION STATISTICS TESTS
    # =========================================================================

    def test_dm_guide_collection_exists(self):
        """Verify dm_guide collection exists and has documents."""
        stats = self.db_manager.get_collection_stats('dm_guide')

        assert stats, "No stats returned"
        assert 'total_documents' in stats, "Missing total_documents in stats"
        assert stats['total_documents'] > 0, "dm_guide collection is empty"

        # Should have around 95 chunks (3 pages per chunk for ~285 pages)
        assert stats['total_documents'] >= 80, \
            f"Too few documents: {stats['total_documents']}, expected ~95"
        assert stats['total_documents'] <= 110, \
            f"Too many documents: {stats['total_documents']}, expected ~95"

    def test_chunk_types_correct(self):
        """Verify chunk types are properly set."""
        stats = self.db_manager.get_collection_stats('dm_guide')

        assert 'chunk_types' in stats, "Missing chunk_types in stats"
        assert 'dm_guide_section' in stats['chunk_types'], \
            "dm_guide_section chunk type not found"

        # All chunks should be dm_guide_section type
        total_chunks = stats['total_documents']
        section_chunks = stats['chunk_types']['dm_guide_section']

        assert section_chunks == total_chunks, \
            f"Chunk type mismatch: {section_chunks} != {total_chunks}"

    # =========================================================================
    # SPECIFIC USE CASE TESTS (from TODO.md)
    # =========================================================================

    def test_player_asks_for_magic_ring(self):
        """
        Simulate: Player asks 'What magic rings are available?'
        This directly addresses TODO item: Missing magic items data
        """
        results = self.db_manager.search('dm_guide', 'magic rings available', n_results=3)

        assert results['documents'], "No results for magic rings query"

        # Should find ring-related content
        combined_text = ' '.join(results['documents'][0]).lower()
        assert 'ring' in combined_text, "No ring content found"

        # Check quality
        top_distance = results['distances'][0][0]
        assert top_distance < 1.5, f"Magic rings query quality poor: {top_distance}"

    def test_gm_needs_treasure_for_cr5_encounter(self):
        """
        Simulate: GM needs treasure for a CR 5 encounter
        """
        results = self.db_manager.search(
            'dm_guide',
            'treasure for challenge rating 5 encounter',
            n_results=3
        )

        assert results['documents'], "No treasure results"

        # Verify we got treasure-related content
        combined_text = ' '.join(results['documents'][0]).lower()
        assert any(word in combined_text for word in ['treasure', 'loot', 'gold', 'reward']), \
            "No treasure-related content found"

    def test_player_finds_wondrous_item_identification(self):
        """
        Simulate: Player finds unknown wondrous item and wants to identify it
        """
        results = self.db_manager.search(
            'dm_guide',
            'identify unknown wondrous item',
            n_results=3
        )

        assert results['documents'], "No identification results"

    # =========================================================================
    # PERFORMANCE TESTS
    # =========================================================================

    def test_query_response_time(self):
        """Verify that queries complete in reasonable time."""
        import time

        start = time.time()
        results = self.db_manager.search('dm_guide', 'magic items', n_results=5)
        elapsed = time.time() - start

        assert results['documents'], "No results returned"
        assert elapsed < 2.0, f"Query took too long: {elapsed:.2f}s"

    def test_batch_query_performance(self):
        """Test that multiple queries can be performed efficiently."""
        import time

        queries = [
            'Ring of Protection',
            'magic sword',
            'treasure hoard',
            'potion of healing',
            'wondrous items'
        ]

        start = time.time()
        for query in queries:
            self.db_manager.search('dm_guide', query, n_results=2)
        elapsed = time.time() - start

        assert elapsed < 5.0, f"Batch queries took too long: {elapsed:.2f}s"


# =============================================================================
# INTEGRATION TESTS
# =============================================================================

class TestDMGuideIntegration:
    """Integration tests for DM Guide with other collections."""

    @pytest.fixture(autouse=True)
    def setup(self):
        """Initialize ChromaDB manager."""
        self.db_manager = ChromaDBManager()

    def test_all_collections_accessible(self):
        """Verify all collections are accessible."""
        stats = self.db_manager.get_all_stats()

        assert 'collections' in stats, "No collections in stats"

        # Check that major collections exist
        collections = stats['collections']
        assert 'dnd_spells' in collections, "Missing spells collection"
        assert 'dnd_monsters' in collections, "Missing monsters collection"
        assert 'dm_guide' in collections, "Missing dm_guide collection"

    def test_total_rag_coverage(self):
        """Calculate total RAG coverage across all collections."""
        stats = self.db_manager.get_all_stats()

        total_docs = stats.get('total_documents', 0)
        print(f"\n📊 Total RAG Documents: {total_docs}")

        for collection_name, col_stats in stats['collections'].items():
            doc_count = col_stats.get('total_documents', 0)
            print(f"   {collection_name}: {doc_count} docs")

        # Verify we have substantial coverage
        assert total_docs > 900, f"Total RAG coverage too low: {total_docs} docs"


if __name__ == '__main__':
    # Run with: python -m pytest tests/test_dm_guide_rag_quality.py -v
    pytest.main([__file__, '-v', '--tb=short'])