from __future__ import annotations import re import os from typing import Dict, List, Optional import streamlit as st from importlib import resources # Minimal built-ins used if the external file is missing or too small FALLBACK_WORDS: Dict[int, List[str]] = { 4: [ "TREE", "BOAT", "WIND", "FROG", "LION", "MOON", "FORK", "GLOW", "GAME", "CODE", "DATA", "BLUE", "GOLD", "ROAD", "STAR", ], 5: [ "APPLE", "RIVER", "STONE", "PLANT", "MOUSE", "BOARD", "CHAIR", "SCALE", "SMILE", "CLOUD", ], 6: [ "ORANGE", "PYTHON", "STREAM", "MARKET", "FOREST", "THRIVE", "LOGGER", "BREATH", "DOMAIN", "GALAXY", ], } MIN_REQUIRED = 25 # Per specs: require >= 500 per length before using file contents def get_wordlist_files() -> list[str]: words_dir = os.path.join(os.path.dirname(__file__), "words") if not os.path.isdir(words_dir): return [] files = [f for f in os.listdir(words_dir) if f.lower().endswith(".txt")] return sorted(files) @st.cache_data(show_spinner=False) def load_word_list(selected_file: Optional[str] = None) -> Dict[int, List[str]]: """ Load a word list, filter to uppercase A–Z, lengths in {4,5,6}, and dedupe while preserving order. If `selected_file` is provided, load battlewords/words/. Otherwise, try on-disk default battlewords/words/wordlist.txt; if unavailable, try packaged resource. If fewer than 500 entries exist for any required length, fall back to built-ins for that length (per specs). NOTE: To ensure cache updates when the user picks a different file, always pass the `selected_file` argument from the UI/generator. """ words_by_len: Dict[int, List[str]] = {4: [], 5: [], 6: []} used_source = "fallback" def _finalize(wbl: Dict[int, List[str]], source: str) -> Dict[int, List[str]]: try: st.session_state.wordlist_source = source st.session_state.wordlist_selected = selected_file or "wordlist.txt" st.session_state.word_counts = {k: len(v) for k, v in wbl.items()} except Exception: pass return wbl def _read_text_from_disk(fname: str) -> str: words_dir = os.path.join(os.path.dirname(__file__), "words") path = os.path.join(words_dir, fname) with open(path, "r", encoding="utf-8") as f: return f.read() def _read_default_text() -> Optional[str]: # Prefer the on-disk default in the editable repo try: return _read_text_from_disk("wordlist.txt") except Exception: pass # Fallback to packaged data if available try: return resources.files("battlewords.words").joinpath("wordlist.txt").read_text(encoding="utf-8") except Exception: return None try: text: Optional[str] = None source_label = "fallback" if selected_file: # Validate selection against available files to avoid bad paths available = set(get_wordlist_files()) if selected_file not in available: raise FileNotFoundError(f"Selected word list '{selected_file}' not found in words/ directory.") text = _read_text_from_disk(selected_file) source_label = f"file:{selected_file}" else: text = _read_default_text() if text is not None: source_label = "default" if text is None: raise FileNotFoundError("No word list file found on disk or in packaged resources.") seen = {4: set(), 5: set(), 6: set()} for raw in text.splitlines(): line = raw.strip() if not line or line.startswith("#"): continue if "#" in line: line = line.split("#", 1)[0].strip() word = line.upper() if not re.fullmatch(r"[A-Z]+", word): continue L = len(word) if L in (4, 5, 6) and word not in seen[L]: words_by_len[L].append(word) seen[L].add(word) counts = {k: len(v) for k, v in words_by_len.items()} if all(counts[k] >= MIN_REQUIRED for k in (4, 5, 6)): used_source = source_label return _finalize(words_by_len, used_source) # Per spec: fallback for any length below threshold mixed: Dict[int, List[str]] = { 4: words_by_len[4] if counts[4] >= MIN_REQUIRED else FALLBACK_WORDS[4], 5: words_by_len[5] if counts[5] >= MIN_REQUIRED else FALLBACK_WORDS[5], 6: words_by_len[6] if counts[6] >= MIN_REQUIRED else FALLBACK_WORDS[6], } used_source = f"{source_label}+fallback" if any(counts[k] >= MIN_REQUIRED for k in (4, 5, 6)) else "fallback" return _finalize(mixed, used_source) except Exception: # Missing file or read error used_source = "fallback" return _finalize(FALLBACK_WORDS, used_source)