Spaces:
Build error
Build error
File size: 17,746 Bytes
021570c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 |
"""
Test DM Guide RAG Quality
Comprehensive tests to verify that DM Guide ingestion provides
high-quality retrieval for magic items, rules, treasure, and mechanics.
This directly addresses the HIGH PRIORITY TODO item:
"Improve RAG Data for Equipment, Abilities & Class Features"
"""
import pytest
import sys
from pathlib import Path
# Add project to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from dnd_rag_system.core.chroma_manager import ChromaDBManager
class TestDMGuideRAGQuality:
"""Test suite for DM Guide RAG retrieval quality."""
@pytest.fixture(autouse=True)
def setup(self):
"""Initialize ChromaDB manager for all tests."""
self.db_manager = ChromaDBManager()
# =========================================================================
# MAGIC ITEMS TESTS (HIGH PRIORITY)
# =========================================================================
def test_ring_of_protection_retrieval(self):
"""Test that 'Ring of Protection' query returns ring content."""
results = self.db_manager.search('dm_guide', 'Ring of Protection', n_results=3)
assert results['documents'], "No results returned"
assert len(results['documents'][0]) > 0, "Empty results"
# Check that at least one result mentions rings
top_result = results['documents'][0][0].lower()
assert 'ring' in top_result, "Top result doesn't mention rings"
# Check relevance (distance should be reasonable)
top_distance = results['distances'][0][0]
assert top_distance < 1.5, f"Top result distance too high: {top_distance}"
def test_magic_weapon_queries(self):
"""Test queries for magic weapons like +1, +2, Flametongue, etc."""
test_queries = [
"magic sword +1",
"flaming weapon",
"vorpal blade"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
assert results['documents'], f"No results for query: {query}"
assert len(results['documents'][0]) > 0, f"Empty results for: {query}"
# Distance check
top_distance = results['distances'][0][0]
assert top_distance < 2.0, f"Distance too high for '{query}': {top_distance}"
def test_wondrous_items_retrieval(self):
"""Test retrieval of wondrous items like Bag of Holding, Immovable Rod."""
test_queries = [
"Bag of Holding",
"Immovable Rod",
"wondrous items"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
assert results['documents'], f"No results for: {query}"
assert len(results['documents'][0]) > 0, f"Empty results for: {query}"
def test_potion_retrieval(self):
"""Test retrieval of potions beyond basic healing."""
test_queries = [
"potion of invisibility",
"potion of flying",
"elixir"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
assert results['documents'], f"No results for: {query}"
def test_magic_armor_retrieval(self):
"""Test retrieval of magic armor."""
test_queries = [
"magic armor +1",
"armor of resistance",
"plate armor enchanted"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
assert results['documents'], f"No results for: {query}"
# =========================================================================
# RULES & MECHANICS TESTS
# =========================================================================
def test_combat_rules_retrieval(self):
"""Test retrieval of combat rules and mechanics."""
test_queries = [
"grappling rules",
"cover and concealment",
"flanking bonus"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
# Just verify we get results - content may vary
assert results['documents'], f"No results for: {query}"
def test_condition_mechanics_retrieval(self):
"""Test retrieval of condition mechanics (stunned, paralyzed, etc.)."""
test_queries = [
"paralyzed condition",
"stunned mechanics",
"restrained condition"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
assert results['documents'], f"No results for: {query}"
# =========================================================================
# TREASURE & REWARDS TESTS
# =========================================================================
def test_treasure_hoard_retrieval(self):
"""Test retrieval of treasure tables and loot generation."""
results = self.db_manager.search('dm_guide', 'treasure hoard', n_results=3)
assert results['documents'], "No treasure results"
assert len(results['documents'][0]) > 0
# Check distance
top_distance = results['distances'][0][0]
assert top_distance < 1.5, f"Treasure query distance too high: {top_distance}"
def test_loot_generation_queries(self):
"""Test queries related to loot and rewards."""
test_queries = [
"treasure by challenge rating",
"random loot table",
"gem values"
]
for query in test_queries:
results = self.db_manager.search('dm_guide', query, n_results=2)
assert results['documents'], f"No results for: {query}"
# =========================================================================
# METADATA QUALITY TESTS
# =========================================================================
def test_metadata_completeness(self):
"""Verify that chunks have complete metadata."""
results = self.db_manager.search('dm_guide', 'magic items', n_results=5)
assert results['metadatas'], "No metadata returned"
for metadata in results['metadatas'][0]:
# Check required fields
assert 'source' in metadata, "Missing 'source' in metadata"
assert metadata['source'] == 'dm_guide', "Incorrect source"
assert 'section' in metadata, "Missing 'section' in metadata"
assert 'page_start' in metadata, "Missing 'page_start' in metadata"
assert 'page_end' in metadata, "Missing 'page_end' in metadata"
assert 'content_type' in metadata, "Missing 'content_type' in metadata"
def test_magic_item_tags_present(self):
"""Verify that magic item chunks are properly tagged."""
results = self.db_manager.search('dm_guide', 'Ring of Protection', n_results=3)
assert results['metadatas'], "No metadata returned"
# At least one result should have magic_items tag
# Note: tags are stored in chunk.tags, but may not be in metadata
# This test verifies we can find magic item content
found_magic_item_content = False
for doc in results['documents'][0]:
if 'magic' in doc.lower() or 'ring' in doc.lower():
found_magic_item_content = True
break
assert found_magic_item_content, "No magic item content found in top results"
# =========================================================================
# RETRIEVAL QUALITY TESTS
# =========================================================================
def test_top_result_relevance(self):
"""Test that top results are highly relevant (low distance)."""
high_quality_queries = [
("Ring of Protection", 1.5),
("magic items", 1.3),
("treasure", 1.4),
]
for query, max_distance in high_quality_queries:
results = self.db_manager.search('dm_guide', query, n_results=1)
assert results['documents'], f"No results for: {query}"
top_distance = results['distances'][0][0]
assert top_distance < max_distance, \
f"Query '{query}' top result distance {top_distance:.3f} exceeds {max_distance}"
def test_no_empty_chunks(self):
"""Verify that no chunks are empty or too short."""
# Get a sample of chunks
results = self.db_manager.search('dm_guide', 'magic', n_results=10)
assert results['documents'], "No results returned"
for doc in results['documents'][0]:
assert len(doc) > 100, f"Chunk too short: {len(doc)} chars"
assert doc.strip(), "Empty chunk found"
def test_page_numbers_valid(self):
"""Verify that page numbers are valid and in order."""
results = self.db_manager.search('dm_guide', 'magic items', n_results=10)
assert results['metadatas'], "No metadata returned"
for metadata in results['metadatas'][0]:
page_start = metadata.get('page_start', 0)
page_end = metadata.get('page_end', 0)
assert page_start > 0, "Invalid page_start"
assert page_end > 0, "Invalid page_end"
assert page_start <= page_end, f"Page range invalid: {page_start}-{page_end}"
assert page_end - page_start < 10, f"Page range too large: {page_start}-{page_end}"
# =========================================================================
# CROSS-COLLECTION TESTS
# =========================================================================
def test_search_all_includes_dm_guide(self):
"""Test that search_all() includes dm_guide results."""
all_results = self.db_manager.search_all(
'Ring of Protection',
n_results_per_collection=2
)
assert 'dm_guide' in all_results, "dm_guide not in search_all results"
assert all_results['dm_guide']['documents'], "dm_guide returned no results"
assert len(all_results['dm_guide']['documents'][0]) > 0, "dm_guide results empty"
def test_magic_item_query_across_collections(self):
"""Test that magic item queries work across all collections."""
all_results = self.db_manager.search_all(
'magic sword',
n_results_per_collection=2
)
# Should get results from dm_guide (and possibly equipment if it exists)
assert 'dm_guide' in all_results
dm_guide_results = all_results['dm_guide']
assert dm_guide_results['documents'], "No dm_guide results for 'magic sword'"
def test_spell_and_magic_item_combined_query(self):
"""Test query that could match both spells and magic items."""
# Query for something that appears in both contexts
all_results = self.db_manager.search_all(
'invisibility',
n_results_per_collection=2
)
# Should get results from both spells and dm_guide
assert 'dnd_spells' in all_results, "No spell results"
assert 'dm_guide' in all_results, "No dm_guide results"
# Both should have content
assert all_results['dnd_spells']['documents'][0], "Empty spell results"
assert all_results['dm_guide']['documents'][0], "Empty dm_guide results"
# =========================================================================
# COLLECTION STATISTICS TESTS
# =========================================================================
def test_dm_guide_collection_exists(self):
"""Verify dm_guide collection exists and has documents."""
stats = self.db_manager.get_collection_stats('dm_guide')
assert stats, "No stats returned"
assert 'total_documents' in stats, "Missing total_documents in stats"
assert stats['total_documents'] > 0, "dm_guide collection is empty"
# Should have around 95 chunks (3 pages per chunk for ~285 pages)
assert stats['total_documents'] >= 80, \
f"Too few documents: {stats['total_documents']}, expected ~95"
assert stats['total_documents'] <= 110, \
f"Too many documents: {stats['total_documents']}, expected ~95"
def test_chunk_types_correct(self):
"""Verify chunk types are properly set."""
stats = self.db_manager.get_collection_stats('dm_guide')
assert 'chunk_types' in stats, "Missing chunk_types in stats"
assert 'dm_guide_section' in stats['chunk_types'], \
"dm_guide_section chunk type not found"
# All chunks should be dm_guide_section type
total_chunks = stats['total_documents']
section_chunks = stats['chunk_types']['dm_guide_section']
assert section_chunks == total_chunks, \
f"Chunk type mismatch: {section_chunks} != {total_chunks}"
# =========================================================================
# SPECIFIC USE CASE TESTS (from TODO.md)
# =========================================================================
def test_player_asks_for_magic_ring(self):
"""
Simulate: Player asks 'What magic rings are available?'
This directly addresses TODO item: Missing magic items data
"""
results = self.db_manager.search('dm_guide', 'magic rings available', n_results=3)
assert results['documents'], "No results for magic rings query"
# Should find ring-related content
combined_text = ' '.join(results['documents'][0]).lower()
assert 'ring' in combined_text, "No ring content found"
# Check quality
top_distance = results['distances'][0][0]
assert top_distance < 1.5, f"Magic rings query quality poor: {top_distance}"
def test_gm_needs_treasure_for_cr5_encounter(self):
"""
Simulate: GM needs treasure for a CR 5 encounter
"""
results = self.db_manager.search(
'dm_guide',
'treasure for challenge rating 5 encounter',
n_results=3
)
assert results['documents'], "No treasure results"
# Verify we got treasure-related content
combined_text = ' '.join(results['documents'][0]).lower()
assert any(word in combined_text for word in ['treasure', 'loot', 'gold', 'reward']), \
"No treasure-related content found"
def test_player_finds_wondrous_item_identification(self):
"""
Simulate: Player finds unknown wondrous item and wants to identify it
"""
results = self.db_manager.search(
'dm_guide',
'identify unknown wondrous item',
n_results=3
)
assert results['documents'], "No identification results"
# =========================================================================
# PERFORMANCE TESTS
# =========================================================================
def test_query_response_time(self):
"""Verify that queries complete in reasonable time."""
import time
start = time.time()
results = self.db_manager.search('dm_guide', 'magic items', n_results=5)
elapsed = time.time() - start
assert results['documents'], "No results returned"
assert elapsed < 2.0, f"Query took too long: {elapsed:.2f}s"
def test_batch_query_performance(self):
"""Test that multiple queries can be performed efficiently."""
import time
queries = [
'Ring of Protection',
'magic sword',
'treasure hoard',
'potion of healing',
'wondrous items'
]
start = time.time()
for query in queries:
self.db_manager.search('dm_guide', query, n_results=2)
elapsed = time.time() - start
assert elapsed < 5.0, f"Batch queries took too long: {elapsed:.2f}s"
# =============================================================================
# INTEGRATION TESTS
# =============================================================================
class TestDMGuideIntegration:
"""Integration tests for DM Guide with other collections."""
@pytest.fixture(autouse=True)
def setup(self):
"""Initialize ChromaDB manager."""
self.db_manager = ChromaDBManager()
def test_all_collections_accessible(self):
"""Verify all collections are accessible."""
stats = self.db_manager.get_all_stats()
assert 'collections' in stats, "No collections in stats"
# Check that major collections exist
collections = stats['collections']
assert 'dnd_spells' in collections, "Missing spells collection"
assert 'dnd_monsters' in collections, "Missing monsters collection"
assert 'dm_guide' in collections, "Missing dm_guide collection"
def test_total_rag_coverage(self):
"""Calculate total RAG coverage across all collections."""
stats = self.db_manager.get_all_stats()
total_docs = stats.get('total_documents', 0)
print(f"\n📊 Total RAG Documents: {total_docs}")
for collection_name, col_stats in stats['collections'].items():
doc_count = col_stats.get('total_documents', 0)
print(f" {collection_name}: {doc_count} docs")
# Verify we have substantial coverage
assert total_docs > 900, f"Total RAG coverage too low: {total_docs} docs"
if __name__ == '__main__':
# Run with: python -m pytest tests/test_dm_guide_rag_quality.py -v
pytest.main([__file__, '-v', '--tb=short'])
|