Spaces:

vimalk78
/

abc123

Sleeping

File size: 12,805 Bytes

38c016b

"""
Specific unit tests to verify the list index out of range bug is completely fixed.
These tests reproduce the exact conditions that were causing the crash.
"""

import pytest
import asyncio
import sys
from pathlib import Path
from unittest.mock import Mock, patch

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from src.services.crossword_generator import CrosswordGenerator


class TestIndexBugFix:
    """Test cases specifically for the index out of range bug."""

    @pytest.fixture
    def real_vector_words(self):
        """Real word data that was causing the crash - from the actual logs."""
        return [
            {'word': 'ZOOLOGY', 'clue': 'zoology (animal)', 'similarity': 0.6106429100036621, 'source': 'vector_search', 'crossword_score': 16},
            {'word': 'NATURE', 'clue': 'nature (animal)', 'similarity': 0.5933953523635864, 'source': 'vector_search', 'crossword_score': 18},
            {'word': 'VETERINARY', 'clue': 'veterinary (animal)', 'similarity': 0.7589661479, 'source': 'vector_search', 'crossword_score': 25},
            {'word': 'ZOOLOGICAL', 'clue': 'zoological (animal)', 'similarity': 0.668032, 'source': 'vector_search', 'crossword_score': 22},
            {'word': 'MAMMALIAN', 'clue': 'mammalian (animal)', 'similarity': 0.6375998, 'source': 'vector_search', 'crossword_score': 20},
            {'word': 'CHILDREN', 'clue': 'children (animal)', 'similarity': 0.6281173, 'source': 'vector_search', 'crossword_score': 19},
            {'word': 'ELEPHANT', 'clue': 'elephant (animal)', 'similarity': 0.6157694, 'source': 'vector_search', 'crossword_score': 18},
            {'word': 'FAUNA', 'clue': 'fauna (animal)', 'similarity': 0.5890194177627563, 'source': 'vector_search', 'crossword_score': 16},
            {'word': 'ORGANISM', 'clue': 'organism (animal)', 'similarity': 0.58123, 'source': 'vector_search', 'crossword_score': 19},
            {'word': 'MAMMAL', 'clue': 'mammal (animal)', 'similarity': 0.57892, 'source': 'vector_search', 'crossword_score': 17},
            {'word': 'CREATURE', 'clue': 'creature (animal)', 'similarity': 0.57654, 'source': 'vector_search', 'crossword_score': 18},
            {'word': 'SPECIES', 'clue': 'species (animal)', 'similarity': 0.57432, 'source': 'vector_search', 'crossword_score': 16}
        ]

    def test_calculate_placement_score_bounds_checking(self):
        """Test that _calculate_placement_score handles out-of-bounds access correctly."""
        generator = CrosswordGenerator()
        
        # Create a small 5x5 grid
        grid = [["." for _ in range(5)] for _ in range(5)]
        
        # Test cases that should NOT crash
        test_cases = [
            # Horizontal placement that would go out of bounds
            {"row": 2, "col": 3, "direction": "horizontal", "word": "ELEPHANT"},  # 8 letters, would go to col 10
            {"row": 4, "col": 0, "direction": "horizontal", "word": "VETERINARY"},  # 10 letters, would go to col 9
            
            # Vertical placement that would go out of bounds  
            {"row": 3, "col": 2, "direction": "vertical", "word": "ZOOLOGICAL"},  # 10 letters, would go to row 12
            {"row": 1, "col": 4, "direction": "vertical", "word": "MAMMALIAN"},   # 9 letters, would go to row 9
            
            # Edge cases
            {"row": 0, "col": 0, "direction": "horizontal", "word": "SUPERLONGWORD"},
            {"row": 0, "col": 0, "direction": "vertical", "word": "SUPERLONGWORD"},
            {"row": 4, "col": 4, "direction": "horizontal", "word": "TEST"},
            {"row": 4, "col": 4, "direction": "vertical", "word": "TEST"},
        ]
        
        for i, test_case in enumerate(test_cases):
            placement = {
                "row": test_case["row"],
                "col": test_case["col"], 
                "direction": test_case["direction"]
            }
            word = test_case["word"]
            
            try:
                # This should NOT raise IndexError
                score = generator._calculate_placement_score(grid, word, placement, [])
                print(f"✅ Test case {i+1}: {word} at ({test_case['row']},{test_case['col']}) {test_case['direction']} -> score: {score}")
                assert isinstance(score, int), f"Score should be integer, got {type(score)}"
                
            except IndexError as e:
                pytest.fail(f"❌ IndexError in test case {i+1}: {word} at ({test_case['row']},{test_case['col']}) {test_case['direction']} - {e}")
            except Exception as e:
                pytest.fail(f"❌ Unexpected error in test case {i+1}: {e}")

    def test_word_sorting_alignment(self, real_vector_words):
        """Test that word sorting maintains alignment between word_list and word_objs."""
        generator = CrosswordGenerator()
        
        # This is the exact code path that was causing the index error
        word_pairs = []
        for i, w in enumerate(real_vector_words):
            if isinstance(w, dict) and "word" in w:
                word_pairs.append((w["word"].upper(), w))
            else:
                pytest.fail(f"Invalid word format at index {i}: {w}")
        
        # Sort pairs by word length (longest first)
        word_pairs.sort(key=lambda pair: len(pair[0]), reverse=True)
        
        # Extract sorted lists
        word_list = [pair[0] for pair in word_pairs]
        sorted_word_objs = [pair[1] for pair in word_pairs]
        
        # Verify alignment
        assert len(word_list) == len(sorted_word_objs), "Array lengths must match"
        
        for i, (word, word_obj) in enumerate(zip(word_list, sorted_word_objs)):
            assert word == word_obj["word"].upper(), f"Mismatch at index {i}: {word} != {word_obj['word'].upper()}"
        
        print(f"✅ Word sorting alignment verified for {len(word_list)} words")

    def test_grid_creation_with_real_data(self, real_vector_words):
        """Test grid creation with the exact data that was causing crashes."""
        generator = CrosswordGenerator()
        
        try:
            # This should NOT crash
            result = generator._create_grid(real_vector_words)
            
            if result is None:
                print("⚠️ Grid creation returned None (no successful placement)")
            else:
                print(f"✅ Grid creation succeeded with {len(result['placed_words'])} placed words")
                assert "grid" in result
                assert "clues" in result
                assert "placed_words" in result
                
        except IndexError as e:
            pytest.fail(f"❌ IndexError in grid creation: {e}")
        except Exception as e:
            # Other exceptions are okay (e.g., timeout, no intersections found)
            print(f"ℹ️ Grid creation failed with non-index error: {e}")

    def test_backtrack_placement_bounds(self, real_vector_words):
        """Test that backtracking placement handles bounds correctly."""
        generator = CrosswordGenerator()
        
        # Create grid
        grid = [["." for _ in range(15)] for _ in range(15)]
        placed_words = []
        
        # Extract word list
        word_list = [w["word"].upper() for w in real_vector_words]
        word_list.sort(key=len, reverse=True)
        
        try:
            # Test backtracking - should not crash even if no solution found
            result = generator._backtrack_placement(
                grid, word_list, real_vector_words, 0, placed_words, 
                start_time=0, timeout=1.0  # Short timeout
            )
            
            print(f"✅ Backtrack placement completed without IndexError, result: {result}")
            
        except IndexError as e:
            pytest.fail(f"❌ IndexError in backtrack placement: {e}")
        except Exception as e:
            # Other exceptions are okay (timeout, etc.)
            print(f"ℹ️ Backtrack placement failed with non-index error: {e}")

    def test_intersection_placement_edge_cases(self):
        """Test intersection placement calculations with edge cases."""
        generator = CrosswordGenerator()
        
        # Create grid with a word already placed
        grid = [["." for _ in range(10)] for _ in range(10)]
        
        # Place "TEST" horizontally at (5, 2)
        for i, letter in enumerate("TEST"):
            grid[5][2 + i] = letter
        
        placed_words = [{
            "word": "TEST",
            "row": 5,
            "col": 2,
            "direction": "horizontal",
            "number": 1
        }]
        
        # Test words that might cause out-of-bounds access
        test_words = ["VETERINARY", "ZOOLOGICAL", "ELEPHANT", "T", "AT", "STRESS"]
        
        for word in test_words:
            try:
                placements = generator._find_all_intersection_placements(grid, word, placed_words)
                print(f"✅ Found {len(placements)} intersection placements for '{word}'")
                
                # Test each placement
                for placement in placements:
                    try:
                        score = generator._calculate_placement_score(grid, word, placement, placed_words)
                        print(f"  - Placement at ({placement['row']},{placement['col']}) {placement['direction']}: score {score}")
                    except IndexError as e:
                        pytest.fail(f"❌ IndexError calculating score for {word}: {e}")
                        
            except IndexError as e:
                pytest.fail(f"❌ IndexError finding intersections for {word}: {e}")

    @pytest.mark.asyncio
    async def test_full_puzzle_generation_stress(self, real_vector_words):
        """Stress test full puzzle generation with problematic data."""
        generator = CrosswordGenerator()
        
        # Mock vector service
        mock_vector_service = Mock()
        mock_vector_service.find_similar_words = Mock(return_value=real_vector_words)
        generator.vector_service = mock_vector_service
        
        try:
            # This should complete without IndexError
            result = await generator.generate_puzzle(["Animals"], "medium", True)
            
            if result is None:
                print("⚠️ Puzzle generation returned None")
            else:
                print(f"✅ Full puzzle generation succeeded!")
                assert "grid" in result
                assert "clues" in result
                assert "metadata" in result
                
        except IndexError as e:
            pytest.fail(f"❌ IndexError in full puzzle generation: {e}")
        except Exception as e:
            # Other exceptions might be okay
            print(f"ℹ️ Puzzle generation failed with non-index error: {e}")

    def test_edge_case_grids(self):
        """Test edge cases with different grid sizes and word combinations."""
        generator = CrosswordGenerator()
        
        edge_cases = [
            # Very small grid
            {"grid_size": 3, "words": ["CAT", "DOG"]},
            # Single cell grid
            {"grid_size": 1, "words": ["A"]},
            # Large grid with short words
            {"grid_size": 20, "words": ["A", "I", "IT", "AT"]},
            # Small grid with long words
            {"grid_size": 5, "words": ["SUPERCALIFRAGILISTICEXPIALIDOCIOUS"]},
        ]
        
        for case in edge_cases:
            grid = [["." for _ in range(case["grid_size"])] for _ in range(case["grid_size"])]
            placed_words = []
            
            for word in case["words"]:
                try:
                    # Test various placement attempts
                    for row in range(case["grid_size"]):
                        for col in range(case["grid_size"]):
                            for direction in ["horizontal", "vertical"]:
                                placement = {"row": row, "col": col, "direction": direction}
                                
                                # These should not crash
                                can_place = generator._can_place_word(grid, word, row, col, direction)
                                score = generator._calculate_placement_score(grid, word, placement, placed_words)
                                
                                assert isinstance(can_place, bool)
                                assert isinstance(score, int)
                                
                except IndexError as e:
                    pytest.fail(f"❌ IndexError with grid_size={case['grid_size']}, word='{word}': {e}")

if __name__ == "__main__":
    # Run just these specific tests
    pytest.main([__file__, "-v", "--tb=short"])