Spaces:
Running
Running
Add word loader
Browse files- battlewords/word_loader.py +81 -0
battlewords/word_loader.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from typing import Dict, List
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from importlib import resources
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# Minimal built-ins used if the external file is missing or too small
|
| 11 |
+
FALLBACK_WORDS: Dict[int, List[str]] = {
|
| 12 |
+
4: [
|
| 13 |
+
"TREE", "BOAT", "WIND", "FROG", "LION", "MOON", "FORK", "GLOW", "GAME", "CODE",
|
| 14 |
+
"DATA", "BLUE", "GOLD", "ROAD", "STAR",
|
| 15 |
+
],
|
| 16 |
+
5: [
|
| 17 |
+
"APPLE", "RIVER", "STONE", "PLANT", "MOUSE", "BOARD", "CHAIR", "SCALE", "SMILE", "CLOUD",
|
| 18 |
+
],
|
| 19 |
+
6: [
|
| 20 |
+
"ORANGE", "PYTHON", "STREAM", "MARKET", "FOREST", "THRIVE", "LOGGER", "BREATH", "DOMAIN", "GALAXY",
|
| 21 |
+
],
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@st.cache_data(show_spinner=False)
|
| 26 |
+
def load_word_list() -> Dict[int, List[str]]:
|
| 27 |
+
"""
|
| 28 |
+
Load the word list from battlewords/words/wordlist.txt, filter to uppercase A–Z,
|
| 29 |
+
lengths in {4,5,6}, and dedupe while preserving order.
|
| 30 |
+
|
| 31 |
+
If fewer than 500 entries exist for any required length, fall back to built-ins
|
| 32 |
+
for that length (per specs). Sets quick status in session_state for visibility.
|
| 33 |
+
"""
|
| 34 |
+
words_by_len: Dict[int, List[str]] = {4: [], 5: [], 6: []}
|
| 35 |
+
used_source = "fallback"
|
| 36 |
+
|
| 37 |
+
def _finalize(wbl: Dict[int, List[str]], source: str) -> Dict[int, List[str]]:
|
| 38 |
+
try:
|
| 39 |
+
st.session_state.wordlist_source = source
|
| 40 |
+
st.session_state.word_counts = {k: len(v) for k, v in wbl.items()}
|
| 41 |
+
except Exception:
|
| 42 |
+
pass
|
| 43 |
+
return wbl
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
# Read packaged resource
|
| 47 |
+
text = resources.files("battlewords.words").joinpath("wordlist.txt").read_text(encoding="utf-8")
|
| 48 |
+
|
| 49 |
+
seen = {4: set(), 5: set(), 6: set()}
|
| 50 |
+
for raw in text.splitlines():
|
| 51 |
+
line = raw.strip()
|
| 52 |
+
if not line or line.startswith("#"):
|
| 53 |
+
continue
|
| 54 |
+
if "#" in line:
|
| 55 |
+
line = line.split("#", 1)[0].strip()
|
| 56 |
+
word = line.upper()
|
| 57 |
+
if not re.fullmatch(r"[A-Z]+", word):
|
| 58 |
+
continue
|
| 59 |
+
L = len(word)
|
| 60 |
+
if L in (4, 5, 6) and word not in seen[L]:
|
| 61 |
+
words_by_len[L].append(word)
|
| 62 |
+
seen[L].add(word)
|
| 63 |
+
|
| 64 |
+
counts = {k: len(v) for k, v in words_by_len.items()}
|
| 65 |
+
if all(counts[k] >= 500 for k in (4, 5, 6)):
|
| 66 |
+
used_source = "file"
|
| 67 |
+
return _finalize(words_by_len, used_source)
|
| 68 |
+
|
| 69 |
+
# Per spec: fallback for any length below threshold
|
| 70 |
+
mixed: Dict[int, List[str]] = {
|
| 71 |
+
4: words_by_len[4] if counts[4] >= 500 else FALLBACK_WORDS[4],
|
| 72 |
+
5: words_by_len[5] if counts[5] >= 500 else FALLBACK_WORDS[5],
|
| 73 |
+
6: words_by_len[6] if counts[6] >= 500 else FALLBACK_WORDS[6],
|
| 74 |
+
}
|
| 75 |
+
used_source = "file+fallback" if any(counts[k] >= 500 for k in (4, 5, 6)) else "fallback"
|
| 76 |
+
return _finalize(mixed, used_source)
|
| 77 |
+
|
| 78 |
+
except Exception:
|
| 79 |
+
# Missing file or read error
|
| 80 |
+
used_source = "fallback"
|
| 81 |
+
return _finalize(FALLBACK_WORDS, used_source)
|