Spaces:
Sleeping
Sleeping
| import base64 | |
| import json | |
| import requests | |
| import re | |
| import sys | |
| import os | |
| from pathlib import Path | |
| # ===== CONFIGURATION ===== | |
| # Default image path matching user's likely workflow | |
| DEFAULT_IMAGE_PATH = "resources/downloads/photo_2025-12-13_09-18-38.jpg" | |
| OCR_API_URL = "https://gaxyqcsvy2ii5nsxz74lgsj3ay0gljec.lambda-url.us-east-1.on.aws/" | |
| DICTIONARY_URL = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt" | |
| DICTIONARY_FILE = "words_alpha.txt" | |
| # ========================= | |
| def image_to_base64_data_url(image_path: str) -> str: | |
| path = Path(image_path) | |
| if not path.exists(): | |
| raise FileNotFoundError(f"Image not found: {image_path}") | |
| mime = "image/jpeg" | |
| ext = path.suffix.lower() | |
| if ext == ".png": | |
| mime = "image/png" | |
| elif ext in [".jpg", ".jpeg"]: | |
| mime = "image/jpeg" | |
| with open(path, "rb") as f: | |
| encoded = base64.b64encode(f.read()).decode() | |
| return f"data:{mime};base64,{encoded}" | |
| def get_ocr_result(image_path: str): | |
| """ | |
| Sends the image to the OCR API and returns the parsed JSON response. | |
| """ | |
| print(f"[*] Sending image to OCR API: {image_path}") | |
| try: | |
| data_url = image_to_base64_data_url(image_path) | |
| except FileNotFoundError: | |
| print(f"[!] File not found: {image_path}") | |
| return None | |
| payload = {"image": data_url} | |
| headers = { | |
| "Accept": "*/*", | |
| "Content-Type": "text/plain;charset=UTF-8", | |
| "Origin": "https://wordsearchonline.com", | |
| "Referer": "https://wordsearchonline.com/", | |
| "User-Agent": "Mozilla/5.0 (Linux; Android 10) Chrome/137 Mobile", | |
| } | |
| try: | |
| r = requests.post( | |
| OCR_API_URL, | |
| data=json.dumps(payload), | |
| headers=headers, | |
| timeout=30 | |
| ) | |
| r.raise_for_status() | |
| # The API returns a string that might be JSON or plain text | |
| # If it returns a JSON object, r.json() will work | |
| # If it returns a string representation of JSON, we interpret that. | |
| try: | |
| return r.json() | |
| except: | |
| # If response is just text | |
| return {"text": r.text} | |
| except Exception as e: | |
| print(f"[!] OCR Request failed: {e}") | |
| return None | |
| def parse_grid_from_ocr(ocr_text): | |
| """ | |
| Extracts the character grid from OCR text. | |
| Assumes the grid appears at the start and consists of lines of uppercase letters. | |
| """ | |
| lines = ocr_text.split('\n') | |
| grid = [] | |
| # Heuristic: Process lines until we hit a keyword like "words:" or empty gap/different structure | |
| # We expect an N x N or N x M grid of capitalized letters. | |
| for line in lines: | |
| stripped = line.strip() | |
| if not stripped: | |
| continue | |
| # Stop if we encounter the "words:" section | |
| if "words:" in stripped.lower(): | |
| break | |
| # Clean the line: keep only letters | |
| # Some OCR might put spaces between letters "A B C" -> "ABC" | |
| clean_row = re.sub(r'[^a-zA-Z]', '', stripped).upper() | |
| # We only accept rows that have a reasonable length (e.g. > 3) to filter noise | |
| if len(clean_row) >= 4: | |
| grid.append(list(clean_row)) | |
| return grid | |
| def load_dictionary(): | |
| """ | |
| Loads an English dictionary set for word validation. | |
| Downloads it if not present. | |
| """ | |
| words = set() | |
| path = Path(DICTIONARY_FILE) | |
| # Common slang/game words that might be missing from formal dictionaries | |
| extra_words = { | |
| "KINDA", "GONNA", "WANNA", "GOTTA", "GIMME", "LEMME", "CAUSE", | |
| "DUNNO", "SORTA", "OUTTA", "INNIT", "YALL", "AINT" | |
| } | |
| if not path.exists(): | |
| print(f"[*] Dictionary file not found. Downloading from {DICTIONARY_URL}...") | |
| try: | |
| r = requests.get(DICTIONARY_URL, timeout=10) | |
| if r.status_code == 200: | |
| with open(path, "w", encoding="utf-8") as f: | |
| f.write(r.text) | |
| print("[*] Dictionary downloaded successfully.") | |
| else: | |
| print(f"[!] Failed to download dictionary (Status: {r.status_code}). Validation will be skipped.") | |
| # Even if download fails, return extra_words at least | |
| return extra_words if extra_words else None | |
| except Exception as e: | |
| print(f"[!] Dictionary download error: {e}. Validation will be skipped.") | |
| return extra_words if extra_words else None | |
| try: | |
| with open(path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| w = line.strip() | |
| if w: | |
| words.add(w.upper()) | |
| # Add extras | |
| words.update(extra_words) | |
| return words | |
| except Exception as e: | |
| print(f"[!] Error reading dictionary: {e}") | |
| return None | |
| def find_words_in_grid(grid, constraints, dictionary): | |
| """ | |
| Thinking Algorithm: | |
| 1. Iterate for each constraint (StartChar, Length). | |
| 2. Scan every cell in the grid. | |
| 3. If cell matches StartChar, scan in all 8 directions for a string of Length. | |
| 4. If Candidate string is found, validate against dictionary (if available). | |
| """ | |
| found_map = {} # Key: Index of constraint, Value: List of words found | |
| rows = len(grid) | |
| if rows == 0: | |
| return found_map | |
| # 8 Directions: (row_delta, col_delta) | |
| directions = [ | |
| (0, 1), (0, -1), # Right, Left | |
| (1, 0), (-1, 0), # Down, Up | |
| (1, 1), (1, -1), # Down-Right, Down-Left | |
| (-1, 1), (-1, -1) # Up-Right, Up-Left | |
| ] | |
| for idx, (start_char, length) in enumerate(constraints): | |
| start_char = start_char.upper() | |
| candidates = set() | |
| for r in range(rows): | |
| # Safe column range for this specific row | |
| cols = len(grid[r]) | |
| for c in range(cols): | |
| if grid[r][c] == start_char: | |
| # Check all directions | |
| for dr, dc in directions: | |
| # Check if the word fits in this direction | |
| end_r = r + (length - 1) * dr | |
| end_c = c + (length - 1) * dc | |
| # Use loose bounds check first | |
| if 0 <= end_r < rows: | |
| # Now check if columns are valid for every step | |
| # This is important for jagged arrays | |
| word_chars = [] | |
| valid_path = True | |
| for k in range(length): | |
| curr_r = r + k*dr | |
| curr_c = c + k*dc | |
| if 0 <= curr_c < len(grid[curr_r]): | |
| word_chars.append(grid[curr_r][curr_c]) | |
| else: | |
| valid_path = False | |
| break | |
| if valid_path: | |
| candidate_word = "".join(word_chars) | |
| # Validation | |
| if dictionary: | |
| if candidate_word in dictionary: | |
| candidates.add(candidate_word) | |
| else: | |
| # If no dictionary, return all matches (might be noisy) | |
| candidates.add(candidate_word) | |
| found_map[idx] = list(candidates) | |
| return found_map | |
| def save_grid_to_file(grid, filename="grid.txt"): | |
| """Saves the current grid to a text file.""" | |
| try: | |
| with open(filename, "w") as f: | |
| for row in grid: | |
| f.write(" ".join(row) + "\n") | |
| print(f"[*] Grid saved to {filename}") | |
| except Exception as e: | |
| print(f"[!] Failed to save grid: {e}") | |
| def load_grid_from_file(filename="grid.txt"): | |
| """Loads a grid from a text file.""" | |
| try: | |
| grid = [] | |
| with open(filename, "r") as f: | |
| for line in f: | |
| # Remove spaces and newlines to get clean chars | |
| clean_row = [c.upper() for c in line.strip() if c.isalnum()] | |
| if clean_row: | |
| grid.append(clean_row) | |
| print(f"[*] Grid loaded from {filename}") | |
| return grid | |
| except Exception as e: | |
| print(f"[!] Failed to load grid: {e}") | |
| return None | |
| def solve_challenge(image_path, clue_text): | |
| """ | |
| Programmatic entry point for solving a challenge. | |
| Returns a dict with 'grid' and 'solutions'. | |
| """ | |
| # 1. Image -> OCR | |
| ocr_result = get_ocr_result(image_path) | |
| if not ocr_result: | |
| return {"error": "OCR failed"} | |
| # Extract text | |
| if isinstance(ocr_result, dict): | |
| raw_text = ocr_result.get("text", "") | |
| else: | |
| raw_text = str(ocr_result) | |
| # 2. OCR -> Grid | |
| grid = parse_grid_from_ocr(raw_text) | |
| if not grid: | |
| return {"error": "No grid found in image"} | |
| # 3. Load Dictionary | |
| dictionary = load_dictionary() | |
| # 4. Parse Clues | |
| pattern = re.compile(r'([A-Z])\-+\s*\((\d+)\)', re.IGNORECASE) | |
| constraints = pattern.findall(clue_text) | |
| if not constraints: | |
| return { | |
| "grid": grid, | |
| "solutions": [], | |
| "error": "No clues found in text" | |
| } | |
| # 5. Solve | |
| parsed_constraints = [(c[0], int(c[1])) for c in constraints] | |
| solutions_map = find_words_in_grid(grid, parsed_constraints, dictionary) | |
| # Format results | |
| results = [] | |
| for idx, (char, length) in enumerate(parsed_constraints): | |
| found_words = solutions_map.get(idx, []) | |
| results.append({ | |
| "pattern": f"{char.upper()}{'-'*(length-1)} ({length})", | |
| "found": found_words | |
| }) | |
| return { | |
| "grid": grid, | |
| "solutions": results | |
| } | |
| def main(): | |
| print(f"=== Word Search Solver ===") | |
| grid = None | |
| # Check if grid.txt exists and ask user | |
| if Path("grid.txt").exists(): | |
| use_saved = input("[?] Found saved 'grid.txt'. Use it? (Y/n): ").strip().lower() | |
| if use_saved in ["", "y", "yes"]: | |
| grid = load_grid_from_file("grid.txt") | |
| # If no grid loaded (or user said no), proceed with Image Processing | |
| if not grid: | |
| # 1. Image Path | |
| image_path = None | |
| if len(sys.argv) > 1: | |
| image_path = sys.argv[1] | |
| # Prompt if not provided or doesn't exist | |
| while not image_path or not Path(image_path).exists(): | |
| if image_path: | |
| print(f"[!] File not found: {image_path}") | |
| # Suggest the default if it exists, otherwise just blank | |
| default_hint = f" (default: {DEFAULT_IMAGE_PATH})" if Path(DEFAULT_IMAGE_PATH).exists() else "" | |
| try: | |
| user_input = input(f"Enter image path{default_hint}: ").strip() | |
| except (KeyboardInterrupt, EOFError): | |
| print("\nExiting.") | |
| sys.exit(0) | |
| if not user_input and Path(DEFAULT_IMAGE_PATH).exists(): | |
| image_path = DEFAULT_IMAGE_PATH | |
| elif user_input: | |
| # Handle quotes in path if user drags and drops file | |
| image_path = user_input.strip('"\'') | |
| else: | |
| print("[!] Please enter a valid path.") | |
| continue | |
| print(f"[*] Using image: {image_path}") | |
| # 2. Process Image | |
| ocr_result = get_ocr_result(image_path) | |
| if not ocr_result: | |
| print("[!] Failed to get OCR result. Exiting.") | |
| return | |
| # Extract text from JSON | |
| if isinstance(ocr_result, dict): | |
| raw_text = ocr_result.get("text", "") | |
| else: | |
| raw_text = str(ocr_result) | |
| if not raw_text: | |
| print("[!] OCR returned empty text.") | |
| return | |
| # 3. Parse Grid | |
| grid = parse_grid_from_ocr(raw_text) | |
| if grid: | |
| save_grid_to_file(grid) | |
| if not grid: | |
| print("[!] No valid grid detected.") | |
| return | |
| print("\n[+] Current Grid:") | |
| for row in grid: | |
| print(" " + " ".join(row)) | |
| print(f" (Size: {len(grid)}x{len(grid[0]) if grid else 0})") | |
| # 4. Load Dictionary (background task) | |
| print("\n[*] Loading dictionary for validation...") | |
| dictionary = load_dictionary() | |
| if dictionary: | |
| print(f"[*] Dictionary loaded ({len(dictionary)} words).") | |
| else: | |
| print("[!] Warning: Dictionary not available. Results may contain invalid words.") | |
| # 5. User Input for Challenge | |
| print("\n" + "="*40) | |
| print("PASTE THE CHALLENGE TEXT BELOW.") | |
| print("example: 'Find these words: O--- (4)'") | |
| print("Press Enter twice to finish input.") | |
| print("="*40) | |
| user_lines = [] | |
| blank_count = 0 | |
| while True: | |
| try: | |
| line = input() | |
| if not line.strip(): | |
| blank_count += 1 | |
| if blank_count >= 1: # One empty line to stop? Or just keep strict? | |
| # Let's say one empty line is enough if we have content | |
| if user_lines: break | |
| else: | |
| blank_count = 0 | |
| user_lines.append(line) | |
| except (EOFError, KeyboardInterrupt): | |
| break | |
| user_msg = "\n".join(user_lines) | |
| # 6. Parse Constraints | |
| # Regex for "X--- (N)" format | |
| # Matches: One letter, hyphens, space, parens with number | |
| pattern = re.compile(r'([A-Z])\-+\s*\((\d+)\)', re.IGNORECASE) | |
| constraints = pattern.findall(user_msg) | |
| if not constraints: | |
| print("[!] No constraints found in message. (Format: 'X--- (4)')") | |
| return | |
| print(f"\n[*] Found {len(constraints)} patterns to search.") | |
| # 7. Solve | |
| parsed_constraints = [(c[0], int(c[1])) for c in constraints] | |
| solutions = find_words_in_grid(grid, parsed_constraints, dictionary) | |
| # 8. Output | |
| print("\n" + "="*15 + " SOLUTIONS " + "="*15) | |
| for idx, (char, length) in enumerate(parsed_constraints): | |
| found = solutions.get(idx, []) | |
| pattern_str = f"{char.upper()}{'-'*(length-1)} ({length})" | |
| if found: | |
| print(f"{pattern_str} => {', '.join(found)}") | |
| else: | |
| print(f"{pattern_str} => [Not Found]") | |
| print("="*41) | |
| if __name__ == "__main__": | |
| main() | |