#!/usr/bin/env python3 """ Debug the complete crossword generation process to identify display/numbering issues. """ import asyncio import sys import json from pathlib import Path # Add project root to path project_root = Path(__file__).parent sys.path.insert(0, str(project_root)) from src.services.crossword_generator_fixed import CrosswordGeneratorFixed async def debug_complete_generation(): """Debug the complete crossword generation process.""" print("🔍 Debugging Complete Crossword Generation Process\n") # Create generator with no vector service to use static words generator = CrosswordGeneratorFixed(vector_service=None) # Override the word selection to use controlled test words test_words = [ {"word": "MACHINE", "clue": "Device with moving parts"}, {"word": "COMPUTER", "clue": "Electronic device"}, {"word": "EXPERT", "clue": "Person with specialized knowledge"}, {"word": "SCIENCE", "clue": "Systematic study"}, {"word": "TECHNOLOGY", "clue": "Applied science"}, {"word": "RESEARCH", "clue": "Systematic investigation"}, {"word": "ANALYSIS", "clue": "Detailed examination"}, {"word": "METHOD", "clue": "Systematic approach"} ] # Mock the word selection method async def mock_select_words(topics, difficulty, use_ai): return test_words generator._select_words = mock_select_words print("=" * 70) print("GENERATING COMPLETE CROSSWORD") print("=" * 70) try: result = await generator.generate_puzzle(["technology"], "medium", use_ai=False) if result: print("✅ Crossword generation successful!") # Analyze the complete result analyze_crossword_result(result) else: print("❌ Crossword generation failed - returned None") except Exception as e: print(f"❌ Crossword generation failed with error: {e}") import traceback traceback.print_exc() def analyze_crossword_result(result): """Analyze the complete crossword result for potential issues.""" print("\n" + "=" * 70) print("CROSSWORD RESULT ANALYSIS") print("=" * 70) # Print basic metadata metadata = result.get("metadata", {}) print("Metadata:") for key, value in metadata.items(): print(f" {key}: {value}") # Analyze the grid grid = result.get("grid", []) print(f"\nGrid dimensions: {len(grid)}x{len(grid[0]) if grid else 0}") print("\nGrid layout:") print_numbered_grid(grid) # Analyze placed words vs clues clues = result.get("clues", []) print(f"\nNumber of clues generated: {len(clues)}") print("\nClue analysis:") for i, clue in enumerate(clues): print(f" Clue {i+1}:") print(f" Number: {clue.get('number', 'MISSING')}") print(f" Word: {clue.get('word', 'MISSING')}") print(f" Direction: {clue.get('direction', 'MISSING')}") print(f" Position: {clue.get('position', 'MISSING')}") print(f" Text: {clue.get('text', 'MISSING')}") # Check for potential issues print("\n" + "=" * 70) print("ISSUE DETECTION") print("=" * 70) check_word_boundary_consistency(grid, clues) check_numbering_consistency(clues) check_grid_word_alignment(grid, clues) def print_numbered_grid(grid): """Print grid with coordinates for analysis.""" if not grid: print(" Empty grid") return # Print column headers print(" ", end="") for c in range(len(grid[0])): print(f"{c:2d}", end="") print() # Print rows with row numbers for r in range(len(grid)): print(f" {r:2d}: ", end="") for c in range(len(grid[0])): cell = grid[r][c] if cell == ".": print(" .", end="") else: print(f" {cell}", end="") print() def check_word_boundary_consistency(grid, clues): """Check if words in clues match what's actually in the grid.""" print("Checking word boundary consistency:") issues_found = [] for clue in clues: word = clue.get("word", "") position = clue.get("position", {}) direction = clue.get("direction", "") if not all([word, position, direction]): issues_found.append(f"Incomplete clue data: {clue}") continue row = position.get("row", -1) col = position.get("col", -1) if row < 0 or col < 0: issues_found.append(f"Invalid position for word '{word}': {position}") continue # Extract the actual word from the grid grid_word = extract_word_from_grid(grid, row, col, direction, len(word)) if grid_word != word: issues_found.append(f"Mismatch for '{word}' at ({row}, {col}) {direction}: grid shows '{grid_word}'") if issues_found: print(" ❌ Issues found:") for issue in issues_found: print(f" {issue}") else: print(" ✅ All words match grid positions") def extract_word_from_grid(grid, row, col, direction, expected_length): """Extract a word from the grid at the given position and direction.""" if row >= len(grid) or col >= len(grid[0]): return "OUT_OF_BOUNDS" word = "" if direction == "across": # horizontal for i in range(expected_length): if col + i >= len(grid[0]): return word + "TRUNCATED" word += grid[row][col + i] elif direction == "down": # vertical for i in range(expected_length): if row + i >= len(grid): return word + "TRUNCATED" word += grid[row + i][col] return word def check_numbering_consistency(clues): """Check if clue numbering is consistent and logical.""" print("\nChecking numbering consistency:") numbers = [clue.get("number", -1) for clue in clues] issues = [] # Check for duplicate numbers if len(numbers) != len(set(numbers)): issues.append("Duplicate clue numbers found") # Check for missing numbers in sequence if numbers: min_num = min(numbers) max_num = max(numbers) expected = set(range(min_num, max_num + 1)) actual = set(numbers) if expected != actual: missing = expected - actual extra = actual - expected if missing: issues.append(f"Missing numbers: {sorted(missing)}") if extra: issues.append(f"Extra numbers: {sorted(extra)}") if issues: print(" ❌ Numbering issues:") for issue in issues: print(f" {issue}") else: print(" ✅ Numbering is consistent") def check_grid_word_alignment(grid, clues): """Check if all words are properly aligned and don't create unintended extensions.""" print("\nChecking grid word alignment:") # Find all letter sequences in the grid horizontal_sequences = find_horizontal_sequences(grid) vertical_sequences = find_vertical_sequences(grid) print(f" Found {len(horizontal_sequences)} horizontal sequences") print(f" Found {len(vertical_sequences)} vertical sequences") # Check if each sequence corresponds to a clue clue_words = {} for clue in clues: pos = clue.get("position", {}) key = (pos.get("row"), pos.get("col"), clue.get("direction")) clue_words[key] = clue.get("word", "") issues = [] # Check horizontal sequences for seq in horizontal_sequences: row, start_col, word = seq key = (row, start_col, "across") if key not in clue_words: issues.append(f"Unaccounted horizontal sequence: '{word}' at ({row}, {start_col})") elif clue_words[key] != word: issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({row}, {start_col})") # Check vertical sequences for seq in vertical_sequences: col, start_row, word = seq key = (start_row, col, "down") if key not in clue_words: issues.append(f"Unaccounted vertical sequence: '{word}' at ({start_row}, {col})") elif clue_words[key] != word: issues.append(f"Mismatch: clue says '{clue_words[key]}' but grid shows '{word}' at ({start_row}, {col})") if issues: print(" ❌ Alignment issues found:") for issue in issues: print(f" {issue}") else: print(" ✅ All words are properly aligned") def find_horizontal_sequences(grid): """Find all horizontal letter sequences of length > 1.""" sequences = [] for r in range(len(grid)): current_word = "" start_col = None for c in range(len(grid[0])): if grid[r][c] != ".": if start_col is None: start_col = c current_word += grid[r][c] else: if current_word and len(current_word) > 1: sequences.append((r, start_col, current_word)) current_word = "" start_col = None # Handle word at end of row if current_word and len(current_word) > 1: sequences.append((r, start_col, current_word)) return sequences def find_vertical_sequences(grid): """Find all vertical letter sequences of length > 1.""" sequences = [] for c in range(len(grid[0])): current_word = "" start_row = None for r in range(len(grid)): if grid[r][c] != ".": if start_row is None: start_row = r current_word += grid[r][c] else: if current_word and len(current_word) > 1: sequences.append((c, start_row, current_word)) current_word = "" start_row = None # Handle word at end of column if current_word and len(current_word) > 1: sequences.append((c, start_row, current_word)) return sequences if __name__ == "__main__": asyncio.run(debug_complete_generation())