File size: 3,868 Bytes
def73e2
 
 
50f9808
 
def73e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50f9808
 
 
 
 
 
def73e2
 
50f9808
def73e2
50f9808
 
 
 
def73e2
 
50f9808
def73e2
 
 
 
 
 
 
50f9808
def73e2
 
 
 
 
50f9808
 
 
 
 
 
def73e2
50f9808
 
 
 
 
 
 
 
def73e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50f9808
def73e2
 
 
 
 
50f9808
 
 
def73e2
50f9808
def73e2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from __future__ import annotations

import re
import os
from typing import Dict, List, Optional

import streamlit as st
from importlib import resources


# Minimal built-ins used if the external file is missing or too small
FALLBACK_WORDS: Dict[int, List[str]] = {
    4: [
        "TREE", "BOAT", "WIND", "FROG", "LION", "MOON", "FORK", "GLOW", "GAME", "CODE",
        "DATA", "BLUE", "GOLD", "ROAD", "STAR",
    ],
    5: [
        "APPLE", "RIVER", "STONE", "PLANT", "MOUSE", "BOARD", "CHAIR", "SCALE", "SMILE", "CLOUD",
    ],
    6: [
        "ORANGE", "PYTHON", "STREAM", "MARKET", "FOREST", "THRIVE", "LOGGER", "BREATH", "DOMAIN", "GALAXY",
    ],
}

def get_wordlist_files() -> list[str]:
    words_dir = os.path.join(os.path.dirname(__file__), "words")
    if not os.path.isdir(words_dir):
        return []
    files = [f for f in os.listdir(words_dir) if f.lower().endswith(".txt")]
    return sorted(files)

@st.cache_data(show_spinner=False)
def load_word_list(selected_file: Optional[str] = None) -> Dict[int, List[str]]:
    """
    Load a word list, filter to uppercase A–Z, lengths in {4,5,6}, and dedupe while preserving order.

    If `selected_file` is provided, load battlewords/words/<selected_file>.
    Otherwise, try packaged resource battlewords/words/wordlist.txt.

    If fewer than 500 entries exist for any required length, fall back to built-ins
    for that length (per specs).
    """
    words_by_len: Dict[int, List[str]] = {4: [], 5: [], 6: []}
    used_source = "fallback"

    def _finalize(wbl: Dict[int, List[str]], source: str) -> Dict[int, List[str]]:
        try:
            st.session_state.wordlist_source = source
            st.session_state.wordlist_selected = selected_file or "wordlist.txt"
            st.session_state.word_counts = {k: len(v) for k, v in wbl.items()}
        except Exception:
            pass
        return wbl

    def _read_text_from_disk(fname: str) -> str:
        words_dir = os.path.join(os.path.dirname(__file__), "words")
        path = os.path.join(words_dir, fname)
        with open(path, "r", encoding="utf-8") as f:
            return f.read()

    try:
        text: Optional[str] = None

        if selected_file:
            # Prefer explicit selection from words/ directory.
            text = _read_text_from_disk(selected_file)
        else:
            # Fallback to packaged default wordlist.txt
            text = resources.files("battlewords.words").joinpath("wordlist.txt").read_text(encoding="utf-8")

        seen = {4: set(), 5: set(), 6: set()}
        for raw in text.splitlines():
            line = raw.strip()
            if not line or line.startswith("#"):
                continue
            if "#" in line:
                line = line.split("#", 1)[0].strip()
            word = line.upper()
            if not re.fullmatch(r"[A-Z]+", word):
                continue
            L = len(word)
            if L in (4, 5, 6) and word not in seen[L]:
                words_by_len[L].append(word)
                seen[L].add(word)

        counts = {k: len(v) for k, v in words_by_len.items()}
        if all(counts[k] >= 250 for k in (4, 5, 6)):
            used_source = "file"
            return _finalize(words_by_len, used_source)

        # Per spec: fallback for any length below threshold
        mixed: Dict[int, List[str]] = {
            4: words_by_len[4] if counts[4] >= 250 else FALLBACK_WORDS[4],
            5: words_by_len[5] if counts[5] >= 250 else FALLBACK_WORDS[5],
            6: words_by_len[6] if counts[6] >= 250 else FALLBACK_WORDS[6],
        }
        used_source = "file+fallback" if any(counts[k] >= 250 for k in (4, 5, 6)) else "fallback"
        return _finalize(mixed, used_source)

    except Exception:
        # Missing file or read error
        used_source = "fallback"
        return _finalize(FALLBACK_WORDS, used_source)