Spaces:

AadityaPawarx1
/

Word-Grid-Solver

Sleeping

App Files Files Community

Word-Grid-Solver / solver.py

AadityaPawarx1

Upload 9 files

20397ff verified about 2 months ago

raw

history blame contribute delete

15.1 kB

	import base64
	import json
	import requests
	import re
	import sys
	import os
	from pathlib import Path

	# ===== CONFIGURATION =====
	# Default image path matching user's likely workflow
	DEFAULT_IMAGE_PATH = "resources/downloads/photo_2025-12-13_09-18-38.jpg"
	OCR_API_URL = "https://gaxyqcsvy2ii5nsxz74lgsj3ay0gljec.lambda-url.us-east-1.on.aws/"
	DICTIONARY_URL = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
	DICTIONARY_FILE = "words_alpha.txt"
	# =========================

	def image_to_base64_data_url(image_path: str) -> str:
	path = Path(image_path)
	if not path.exists():
	raise FileNotFoundError(f"Image not found: {image_path}")

	mime = "image/jpeg"
	ext = path.suffix.lower()
	if ext == ".png":
	mime = "image/png"
	elif ext in [".jpg", ".jpeg"]:
	mime = "image/jpeg"

	with open(path, "rb") as f:
	encoded = base64.b64encode(f.read()).decode()

	return f"data:{mime};base64,{encoded}"

	def get_ocr_result(image_path: str):
	"""
	Sends the image to the OCR API and returns the parsed JSON response.
	"""
	print(f"[*] Sending image to OCR API: {image_path}")

	try:
	data_url = image_to_base64_data_url(image_path)
	except FileNotFoundError:
	print(f"[!] File not found: {image_path}")
	return None

	payload = {"image": data_url}
	headers = {
	"Accept": "/",
	"Content-Type": "text/plain;charset=UTF-8",
	"Origin": "https://wordsearchonline.com",
	"Referer": "https://wordsearchonline.com/",
	"User-Agent": "Mozilla/5.0 (Linux; Android 10) Chrome/137 Mobile",
	}

	try:
	r = requests.post(
	OCR_API_URL,
	data=json.dumps(payload),
	headers=headers,
	timeout=30
	)
	r.raise_for_status()
	# The API returns a string that might be JSON or plain text
	# If it returns a JSON object, r.json() will work
	# If it returns a string representation of JSON, we interpret that.
	try:
	return r.json()
	except:
	# If response is just text
	return {"text": r.text}

	except Exception as e:
	print(f"[!] OCR Request failed: {e}")
	return None

	def parse_grid_from_ocr(ocr_text):
	"""
	Extracts the character grid from OCR text.
	Assumes the grid appears at the start and consists of lines of uppercase letters.
	"""
	lines = ocr_text.split('\n')
	grid = []

	# Heuristic: Process lines until we hit a keyword like "words:" or empty gap/different structure
	# We expect an N x N or N x M grid of capitalized letters.

	for line in lines:
	stripped = line.strip()
	if not stripped:
	continue

	# Stop if we encounter the "words:" section
	if "words:" in stripped.lower():
	break

	# Clean the line: keep only letters
	# Some OCR might put spaces between letters "A B C" -> "ABC"
	clean_row = re.sub(r'[^a-zA-Z]', '', stripped).upper()

	# We only accept rows that have a reasonable length (e.g. > 3) to filter noise
	if len(clean_row) >= 4:
	grid.append(list(clean_row))

	return grid

	def load_dictionary():
	"""
	Loads an English dictionary set for word validation.
	Downloads it if not present.
	"""
	words = set()
	path = Path(DICTIONARY_FILE)

	# Common slang/game words that might be missing from formal dictionaries
	extra_words = {
	"KINDA", "GONNA", "WANNA", "GOTTA", "GIMME", "LEMME", "CAUSE",
	"DUNNO", "SORTA", "OUTTA", "INNIT", "YALL", "AINT"
	}

	if not path.exists():
	print(f"[*] Dictionary file not found. Downloading from {DICTIONARY_URL}...")
	try:
	r = requests.get(DICTIONARY_URL, timeout=10)
	if r.status_code == 200:
	with open(path, "w", encoding="utf-8") as f:
	f.write(r.text)
	print("[*] Dictionary downloaded successfully.")
	else:
	print(f"[!] Failed to download dictionary (Status: {r.status_code}). Validation will be skipped.")
	# Even if download fails, return extra_words at least
	return extra_words if extra_words else None
	except Exception as e:
	print(f"[!] Dictionary download error: {e}. Validation will be skipped.")
	return extra_words if extra_words else None

	try:
	with open(path, "r", encoding="utf-8") as f:
	for line in f:
	w = line.strip()
	if w:
	words.add(w.upper())
	# Add extras
	words.update(extra_words)
	return words
	except Exception as e:
	print(f"[!] Error reading dictionary: {e}")
	return None

	def find_words_in_grid(grid, constraints, dictionary):
	"""
	Thinking Algorithm:
	1. Iterate for each constraint (StartChar, Length).
	2. Scan every cell in the grid.
	3. If cell matches StartChar, scan in all 8 directions for a string of Length.
	4. If Candidate string is found, validate against dictionary (if available).
	"""
	found_map = {} # Key: Index of constraint, Value: List of words found

	rows = len(grid)
	if rows == 0:
	return found_map

	# 8 Directions: (row_delta, col_delta)
	directions = [
	(0, 1), (0, -1), # Right, Left
	(1, 0), (-1, 0), # Down, Up
	(1, 1), (1, -1), # Down-Right, Down-Left
	(-1, 1), (-1, -1) # Up-Right, Up-Left
	]

	for idx, (start_char, length) in enumerate(constraints):
	start_char = start_char.upper()
	candidates = set()

	for r in range(rows):
	# Safe column range for this specific row
	cols = len(grid[r])
	for c in range(cols):
	if grid[r][c] == start_char:
	# Check all directions
	for dr, dc in directions:
	# Check if the word fits in this direction
	end_r = r + (length - 1) * dr
	end_c = c + (length - 1) * dc

	# Use loose bounds check first
	if 0 <= end_r < rows:
	# Now check if columns are valid for every step
	# This is important for jagged arrays
	word_chars = []
	valid_path = True
	for k in range(length):
	curr_r = r + k*dr
	curr_c = c + k*dc
	if 0 <= curr_c < len(grid[curr_r]):
	word_chars.append(grid[curr_r][curr_c])
	else:
	valid_path = False
	break

	if valid_path:
	candidate_word = "".join(word_chars)

	# Validation
	if dictionary:
	if candidate_word in dictionary:
	candidates.add(candidate_word)
	else:
	# If no dictionary, return all matches (might be noisy)
	candidates.add(candidate_word)

	found_map[idx] = list(candidates)

	return found_map

	def save_grid_to_file(grid, filename="grid.txt"):
	"""Saves the current grid to a text file."""
	try:
	with open(filename, "w") as f:
	for row in grid:
	f.write(" ".join(row) + "\n")
	print(f"[*] Grid saved to {filename}")
	except Exception as e:
	print(f"[!] Failed to save grid: {e}")

	def load_grid_from_file(filename="grid.txt"):
	"""Loads a grid from a text file."""
	try:
	grid = []
	with open(filename, "r") as f:
	for line in f:
	# Remove spaces and newlines to get clean chars
	clean_row = [c.upper() for c in line.strip() if c.isalnum()]
	if clean_row:
	grid.append(clean_row)
	print(f"[*] Grid loaded from {filename}")
	return grid
	except Exception as e:
	print(f"[!] Failed to load grid: {e}")
	return None

	def solve_challenge(image_path, clue_text):
	"""
	Programmatic entry point for solving a challenge.
	Returns a dict with 'grid' and 'solutions'.
	"""
	# 1. Image -> OCR
	ocr_result = get_ocr_result(image_path)
	if not ocr_result:
	return {"error": "OCR failed"}

	# Extract text
	if isinstance(ocr_result, dict):
	raw_text = ocr_result.get("text", "")
	else:
	raw_text = str(ocr_result)

	# 2. OCR -> Grid
	grid = parse_grid_from_ocr(raw_text)
	if not grid:
	return {"error": "No grid found in image"}

	# 3. Load Dictionary
	dictionary = load_dictionary()

	# 4. Parse Clues
	pattern = re.compile(r'([A-Z])\-+\s*\((\d+)\)', re.IGNORECASE)
	constraints = pattern.findall(clue_text)

	if not constraints:
	return {
	"grid": grid,
	"solutions": [],
	"error": "No clues found in text"
	}

	# 5. Solve
	parsed_constraints = [(c[0], int(c[1])) for c in constraints]
	solutions_map = find_words_in_grid(grid, parsed_constraints, dictionary)

	# Format results
	results = []
	for idx, (char, length) in enumerate(parsed_constraints):
	found_words = solutions_map.get(idx, [])
	results.append({
	"pattern": f"{char.upper()}{'-'*(length-1)} ({length})",
	"found": found_words
	})

	return {
	"grid": grid,
	"solutions": results
	}

	def main():
	print(f"=== Word Search Solver ===")

	grid = None

	# Check if grid.txt exists and ask user
	if Path("grid.txt").exists():
	use_saved = input("[?] Found saved 'grid.txt'. Use it? (Y/n): ").strip().lower()
	if use_saved in ["", "y", "yes"]:
	grid = load_grid_from_file("grid.txt")

	# If no grid loaded (or user said no), proceed with Image Processing
	if not grid:
	# 1. Image Path
	image_path = None
	if len(sys.argv) > 1:
	image_path = sys.argv[1]

	# Prompt if not provided or doesn't exist
	while not image_path or not Path(image_path).exists():
	if image_path:
	print(f"[!] File not found: {image_path}")

	# Suggest the default if it exists, otherwise just blank
	default_hint = f" (default: {DEFAULT_IMAGE_PATH})" if Path(DEFAULT_IMAGE_PATH).exists() else ""

	try:
	user_input = input(f"Enter image path{default_hint}: ").strip()
	except (KeyboardInterrupt, EOFError):
	print("\nExiting.")
	sys.exit(0)

	if not user_input and Path(DEFAULT_IMAGE_PATH).exists():
	image_path = DEFAULT_IMAGE_PATH
	elif user_input:
	# Handle quotes in path if user drags and drops file
	image_path = user_input.strip('"\'')
	else:
	print("[!] Please enter a valid path.")
	continue

	print(f"[*] Using image: {image_path}")

	# 2. Process Image
	ocr_result = get_ocr_result(image_path)
	if not ocr_result:
	print("[!] Failed to get OCR result. Exiting.")
	return

	# Extract text from JSON
	if isinstance(ocr_result, dict):
	raw_text = ocr_result.get("text", "")
	else:
	raw_text = str(ocr_result)

	if not raw_text:
	print("[!] OCR returned empty text.")
	return

	# 3. Parse Grid
	grid = parse_grid_from_ocr(raw_text)

	if grid:
	save_grid_to_file(grid)

	if not grid:
	print("[!] No valid grid detected.")
	return

	print("\n[+] Current Grid:")
	for row in grid:
	print(" " + " ".join(row))
	print(f" (Size: {len(grid)}x{len(grid[0]) if grid else 0})")

	# 4. Load Dictionary (background task)
	print("\n[*] Loading dictionary for validation...")
	dictionary = load_dictionary()
	if dictionary:
	print(f"[*] Dictionary loaded ({len(dictionary)} words).")
	else:
	print("[!] Warning: Dictionary not available. Results may contain invalid words.")

	# 5. User Input for Challenge
	print("\n" + "="*40)
	print("PASTE THE CHALLENGE TEXT BELOW.")
	print("example: 'Find these words: O--- (4)'")
	print("Press Enter twice to finish input.")
	print("="*40)

	user_lines = []
	blank_count = 0
	while True:
	try:
	line = input()
	if not line.strip():
	blank_count += 1
	if blank_count >= 1: # One empty line to stop? Or just keep strict?
	# Let's say one empty line is enough if we have content
	if user_lines: break
	else:
	blank_count = 0
	user_lines.append(line)
	except (EOFError, KeyboardInterrupt):
	break

	user_msg = "\n".join(user_lines)

	# 6. Parse Constraints
	# Regex for "X--- (N)" format
	# Matches: One letter, hyphens, space, parens with number
	pattern = re.compile(r'([A-Z])\-+\s*\((\d+)\)', re.IGNORECASE)
	constraints = pattern.findall(user_msg)

	if not constraints:
	print("[!] No constraints found in message. (Format: 'X--- (4)')")
	return

	print(f"\n[*] Found {len(constraints)} patterns to search.")

	# 7. Solve
	parsed_constraints = [(c[0], int(c[1])) for c in constraints]
	solutions = find_words_in_grid(grid, parsed_constraints, dictionary)

	# 8. Output
	print("\n" + "="15 + " SOLUTIONS " + "="15)
	for idx, (char, length) in enumerate(parsed_constraints):
	found = solutions.get(idx, [])
	pattern_str = f"{char.upper()}{'-'*(length-1)} ({length})"

	if found:
	print(f"{pattern_str} => {', '.join(found)}")
	else:
	print(f"{pattern_str} => [Not Found]")
	print("="*41)

	if __name__ == "__main__":
	main()