Spaces:

vimalk78
/

abc123

Sleeping

File size: 10,469 Bytes

38c016b

#!/usr/bin/env python3
"""
Debug the complete crossword generation process to identify display/numbering issues.
"""

import asyncio
import sys
import json
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

from src.services.crossword_generator_fixed import CrosswordGeneratorFixed

async def debug_complete_generation():
    """Debug the complete crossword generation process."""
    
    print("🔍 Debugging Complete Crossword Generation Process\n")
    
    # Create generator with no vector service to use static words
    generator = CrosswordGeneratorFixed(vector_service=None)
    
    # Override the word selection to use controlled test words
    test_words = [
        {"word": "MACHINE", "clue": "Device with moving parts"},
        {"word": "COMPUTER", "clue": "Electronic device"}, 
        {"word": "EXPERT", "clue": "Person with specialized knowledge"},
        {"word": "SCIENCE", "clue": "Systematic study"},
        {"word": "TECHNOLOGY", "clue": "Applied science"},
        {"word": "RESEARCH", "clue": "Systematic investigation"},
        {"word": "ANALYSIS", "clue": "Detailed examination"},
        {"word": "METHOD", "clue": "Systematic approach"}
    ]
    
    # Mock the word selection method
    async def mock_select_words(topics, difficulty, use_ai):
        return test_words
    generator._select_words = mock_select_words
    
    print("=" * 70)
    print("GENERATING COMPLETE CROSSWORD")
    print("=" * 70)
    
    try:
        result = await generator.generate_puzzle(["technology"], "medium", use_ai=False)
        
        if result:
            print("✅ Crossword generation successful!")
            
            # Analyze the complete result
            analyze_crossword_result(result)
        else:
            print("❌ Crossword generation failed - returned None")
            
    except Exception as e:
        print(f"❌ Crossword generation failed with error: {e}")
        import traceback
        traceback.print_exc()

def analyze_crossword_result(result):
    """Analyze the complete crossword result for potential issues."""
    
    print("\n" + "=" * 70)
    print("CROSSWORD RESULT ANALYSIS")
    print("=" * 70)
    
    # Print basic metadata
    metadata = result.get("metadata", {})
    print("Metadata:")
    for key, value in metadata.items():
        print(f"  {key}: {value}")
    
    # Analyze the grid
    grid = result.get("grid", [])
    print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}")
    
    print("\nGrid layout:")
    print_numbered_grid(grid)
    
    # Analyze placed words vs clues
    clues = result.get("clues", [])
    print(f"\nNumber of clues generated: {len(clues)}")
    
    print("\nClue analysis:")
    for i, clue in enumerate(clues):
        print(f"  Clue {i+1}:")
        print(f"    Number: {clue.get('number', 'MISSING')}")
        print(f"    Word: {clue.get('word', 'MISSING')}")
        print(f"    Direction: {clue.get('direction', 'MISSING')}")
        print(f"    Position: {clue.get('position', 'MISSING')}")
        print(f"    Text: {clue.get('text', 'MISSING')}")
    
    # Check for potential issues
    print("\n" + "=" * 70)
    print("ISSUE DETECTION")
    print("=" * 70)
    
    check_word_boundary_consistency(grid, clues)
    check_numbering_consistency(clues)
    check_grid_word_alignment(grid, clues)

def print_numbered_grid(grid):
    """Print grid with coordinates for analysis."""
    if not grid:
        print("  Empty grid")
        return
    
    # Print column headers
    print("    ", end="")
    for c in range(len(grid[0])):
        print(f"{c:2d}", end="")
    print()
    
    # Print rows with row numbers
    for r in range(len(grid)):
        print(f" {r:2d}: ", end="")
        for c in range(len(grid[0])):
            cell = grid[r][c]
            if cell == ".":
                print(" .", end="")
            else:
                print(f" {cell}", end="")
        print()

def check_word_boundary_consistency(grid, clues):
    """Check if words in clues match what's actually in the grid."""
    
    print("Checking word boundary consistency:")
    
    issues_found = []
    
    for clue in clues:
        word = clue.get("word", "")
        position = clue.get("position", {})
        direction = clue.get("direction", "")
        
        if not all([word, position, direction]):
            issues_found.append(f"Incomplete clue data: {clue}")
            continue
        
        row = position.get("row", -1)
        col = position.get("col", -1)
        
        if row < 0 or col < 0:
            issues_found.append(f"Invalid position for word '{word}': {position}")
            continue
        
        # Extract the actual word from the grid
        grid_word = extract_word_from_grid(grid, row, col, direction, len(word))
        
        if grid_word != word:
            issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'")
    
    if issues_found:
        print("  ❌ Issues found:")
        for issue in issues_found:
            print(f"    {issue}")
    else:
        print("  ✅ All words match grid positions")

def extract_word_from_grid(grid, row, col, direction, expected_length):
    """Extract a word from the grid at the given position and direction."""
    
    if row >= len(grid) or col >= len(grid[0]):
        return "OUT_OF_BOUNDS"
    
    word = ""
    
    if direction == "across":  # horizontal
        for i in range(expected_length):
            if col + i >= len(grid[0]):
                return word + "TRUNCATED"
            word += grid[row][col + i]
    
    elif direction == "down":  # vertical
        for i in range(expected_length):
            if row + i >= len(grid):
                return word + "TRUNCATED"
            word += grid[row + i][col]
    
    return word

def check_numbering_consistency(clues):
    """Check if clue numbering is consistent and logical."""
    
    print("\nChecking numbering consistency:")
    
    numbers = [clue.get("number", -1) for clue in clues]
    issues = []
    
    # Check for duplicate numbers
    if len(numbers) != len(set(numbers)):
        issues.append("Duplicate clue numbers found")
    
    # Check for missing numbers in sequence
    if numbers:
        min_num = min(numbers)
        max_num = max(numbers)
        expected = set(range(min_num, max_num + 1))
        actual = set(numbers)
        
        if expected != actual:
            missing = expected - actual
            extra = actual - expected
            if missing:
                issues.append(f"Missing numbers: {sorted(missing)}")
            if extra:
                issues.append(f"Extra numbers: {sorted(extra)}")
    
    if issues:
        print("  ❌ Numbering issues:")
        for issue in issues:
            print(f"    {issue}")
    else:
        print("  ✅ Numbering is consistent")

def check_grid_word_alignment(grid, clues):
    """Check if all words are properly aligned and don't create unintended extensions."""
    
    print("\nChecking grid word alignment:")
    
    # Find all letter sequences in the grid
    horizontal_sequences = find_horizontal_sequences(grid)
    vertical_sequences = find_vertical_sequences(grid)
    
    print(f"  Found {len(horizontal_sequences)} horizontal sequences")
    print(f"  Found {len(vertical_sequences)} vertical sequences")
    
    # Check if each sequence corresponds to a clue
    clue_words = {}
    for clue in clues:
        pos = clue.get("position", {})
        key = (pos.get("row"), pos.get("col"), clue.get("direction"))
        clue_words[key] = clue.get("word", "")
    
    issues = []
    
    # Check horizontal sequences
    for seq in horizontal_sequences:
        row, start_col, word = seq
        key = (row, start_col, "across")
        if key not in clue_words:
            issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})")
        elif clue_words[key] != word:
            issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})")
    
    # Check vertical sequences
    for seq in vertical_sequences:
        col, start_row, word = seq
        key = (start_row, col, "down")
        if key not in clue_words:
            issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})")
        elif clue_words[key] != word:
            issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})")
    
    if issues:
        print("  ❌ Alignment issues found:")
        for issue in issues:
            print(f"    {issue}")
    else:
        print("  ✅ All words are properly aligned")

def find_horizontal_sequences(grid):
    """Find all horizontal letter sequences of length > 1."""
    sequences = []
    
    for r in range(len(grid)):
        current_word = ""
        start_col = None
        
        for c in range(len(grid[0])):
            if grid[r][c] != ".":
                if start_col is None:
                    start_col = c
                current_word += grid[r][c]
            else:
                if current_word and len(current_word) > 1:
                    sequences.append((r, start_col, current_word))
                current_word = ""
                start_col = None
        
        # Handle word at end of row
        if current_word and len(current_word) > 1:
            sequences.append((r, start_col, current_word))
    
    return sequences

def find_vertical_sequences(grid):
    """Find all vertical letter sequences of length > 1."""
    sequences = []
    
    for c in range(len(grid[0])):
        current_word = ""
        start_row = None
        
        for r in range(len(grid)):
            if grid[r][c] != ".":
                if start_row is None:
                    start_row = r
                current_word += grid[r][c]
            else:
                if current_word and len(current_word) > 1:
                    sequences.append((c, start_row, current_word))
                current_word = ""
                start_row = None
        
        # Handle word at end of column
        if current_word and len(current_word) > 1:
            sequences.append((c, start_row, current_word))
    
    return sequences

if __name__ == "__main__":
    asyncio.run(debug_complete_generation())