Surn commited on
Commit
def73e2
·
1 Parent(s): 032159d

Add word loader

Browse files
Files changed (1) hide show
  1. battlewords/word_loader.py +81 -0
battlewords/word_loader.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Dict, List
5
+
6
+ import streamlit as st
7
+ from importlib import resources
8
+
9
+
10
+ # Minimal built-ins used if the external file is missing or too small
11
+ FALLBACK_WORDS: Dict[int, List[str]] = {
12
+ 4: [
13
+ "TREE", "BOAT", "WIND", "FROG", "LION", "MOON", "FORK", "GLOW", "GAME", "CODE",
14
+ "DATA", "BLUE", "GOLD", "ROAD", "STAR",
15
+ ],
16
+ 5: [
17
+ "APPLE", "RIVER", "STONE", "PLANT", "MOUSE", "BOARD", "CHAIR", "SCALE", "SMILE", "CLOUD",
18
+ ],
19
+ 6: [
20
+ "ORANGE", "PYTHON", "STREAM", "MARKET", "FOREST", "THRIVE", "LOGGER", "BREATH", "DOMAIN", "GALAXY",
21
+ ],
22
+ }
23
+
24
+
25
+ @st.cache_data(show_spinner=False)
26
+ def load_word_list() -> Dict[int, List[str]]:
27
+ """
28
+ Load the word list from battlewords/words/wordlist.txt, filter to uppercase A–Z,
29
+ lengths in {4,5,6}, and dedupe while preserving order.
30
+
31
+ If fewer than 500 entries exist for any required length, fall back to built-ins
32
+ for that length (per specs). Sets quick status in session_state for visibility.
33
+ """
34
+ words_by_len: Dict[int, List[str]] = {4: [], 5: [], 6: []}
35
+ used_source = "fallback"
36
+
37
+ def _finalize(wbl: Dict[int, List[str]], source: str) -> Dict[int, List[str]]:
38
+ try:
39
+ st.session_state.wordlist_source = source
40
+ st.session_state.word_counts = {k: len(v) for k, v in wbl.items()}
41
+ except Exception:
42
+ pass
43
+ return wbl
44
+
45
+ try:
46
+ # Read packaged resource
47
+ text = resources.files("battlewords.words").joinpath("wordlist.txt").read_text(encoding="utf-8")
48
+
49
+ seen = {4: set(), 5: set(), 6: set()}
50
+ for raw in text.splitlines():
51
+ line = raw.strip()
52
+ if not line or line.startswith("#"):
53
+ continue
54
+ if "#" in line:
55
+ line = line.split("#", 1)[0].strip()
56
+ word = line.upper()
57
+ if not re.fullmatch(r"[A-Z]+", word):
58
+ continue
59
+ L = len(word)
60
+ if L in (4, 5, 6) and word not in seen[L]:
61
+ words_by_len[L].append(word)
62
+ seen[L].add(word)
63
+
64
+ counts = {k: len(v) for k, v in words_by_len.items()}
65
+ if all(counts[k] >= 500 for k in (4, 5, 6)):
66
+ used_source = "file"
67
+ return _finalize(words_by_len, used_source)
68
+
69
+ # Per spec: fallback for any length below threshold
70
+ mixed: Dict[int, List[str]] = {
71
+ 4: words_by_len[4] if counts[4] >= 500 else FALLBACK_WORDS[4],
72
+ 5: words_by_len[5] if counts[5] >= 500 else FALLBACK_WORDS[5],
73
+ 6: words_by_len[6] if counts[6] >= 500 else FALLBACK_WORDS[6],
74
+ }
75
+ used_source = "file+fallback" if any(counts[k] >= 500 for k in (4, 5, 6)) else "fallback"
76
+ return _finalize(mixed, used_source)
77
+
78
+ except Exception:
79
+ # Missing file or read error
80
+ used_source = "fallback"
81
+ return _finalize(FALLBACK_WORDS, used_source)