File size: 9,763 Bytes
1862200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4e73dc
1862200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e97c67c
1862200
 
 
 
 
 
 
 
e97c67c
1862200
 
 
 
 
 
 
 
 
 
e97c67c
 
1862200
 
 
 
 
 
e97c67c
1862200
 
 
 
 
 
 
 
e97c67c
1862200
 
 
 
 
 
 
 
e97c67c
1862200
f4e73dc
1862200
 
 
 
 
e97c67c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9f028e
 
 
 
e97c67c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
"""
src/utils/file_handler.py
-------------------------
Handles all file reading and writing for the generator.

Responsibilities:
    - save_lesson(lesson)           : Save a generated lesson to data/generated/
                                      using lesson_id as the filename
    - load_lesson(lesson_id)        : Load a lesson JSON by ID
    - load_example(grade_band)      : Load a hand-crafted example lesson from
                                      data/examples/ by grade band
    - load_registry()               : Load the deduplication registry JSON
    - update_registry(entry)        : Add a new theme x skill x grade combo
                                      to the registry and save it
    - combo_exists(theme, skill, grade_band) : Check if a combo is already
                                      in the registry β€” returns True/False

Registry format (data/registry/registry.json):
    {
        "used_combinations": [
            {
                "theme": "Space Exploration",
                "skill": "retell a story in sequence",
                "grade_band": "K-2",
                "lesson_id": "L-K2-SPK-001",
                "generated_at": "2024-01-15T10:30:00"
            }
        ]
    }
"""

import json
from datetime import datetime, timezone
from pathlib import Path

# =============================================================================
# PATH CONSTANTS
# All paths are relative to the project root.
# Using pathlib.Path so this works on Windows, Mac, and Linux equally.
# =============================================================================

# Find the project root β€” two levels up from this file (src/utils/file_handler.py)
PROJECT_ROOT  = Path(__file__).resolve().parents[2]

DATA_EXAMPLES  = PROJECT_ROOT / "data" / "examples"
DATA_GENERATED = PROJECT_ROOT / "data" / "generated"
DATA_REGISTRY  = PROJECT_ROOT / "data" / "registry"
REGISTRY_FILE  = DATA_REGISTRY / "registry.json"

# The single examples file that holds all 5 hand-crafted lessons
EXAMPLES_FILE  = DATA_EXAMPLES / "bantrly_example_lessons.json"


# =============================================================================
# LESSON I/O
# =============================================================================

def save_lesson(lesson_dict: dict) -> Path:
    """
    Save a generated lesson to data/generated/<lesson_id>.json

    Args:
        lesson_dict: The lesson as a plain Python dict (call lesson.to_dict())

    Returns:
        The Path where the file was saved.

    Raises:
        KeyError: If lesson_dict doesn't contain a 'lesson_id' field.
    """
    lesson_id = lesson_dict["lesson_id"]
    filepath  = DATA_GENERATED / f"{lesson_id}.json"

    DATA_GENERATED.mkdir(parents=True, exist_ok=True)

    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(lesson_dict, f, indent=2, ensure_ascii=False)

    print(f"[file_handler] Saved lesson β†’ {filepath}")
    return filepath


def load_lesson(lesson_id: str) -> dict:
    """
    Load a previously generated lesson from data/generated/<lesson_id>.json

    Args:
        lesson_id: e.g. "L-K2-SPK-001"

    Returns:
        The lesson as a plain Python dict.

    Raises:
        FileNotFoundError: If no lesson with that ID exists.
    """
    filepath = DATA_GENERATED / f"{lesson_id}.json"

    if not filepath.exists():
        raise FileNotFoundError(
            f"[file_handler] No lesson found with ID '{lesson_id}' at {filepath}"
        )

    with open(filepath, "r", encoding="utf-8") as f:
        return json.load(f)


# =============================================================================
# EXAMPLE LESSON LOADER
# Loads hand-crafted few-shot examples from data/examples/
# Selects the most relevant example by matching grade band.
# =============================================================================

# Maps each grade band string to the lesson_id of the matching example
GRADE_BAND_TO_EXAMPLE_ID = {
    "K-2":  "L-K2-SPK-001",
    "3-5":  "L-35-SPK-005",   # Mission Brief β€” good general 3-5 example
    "6-8":  "L-68-SPK-003",
    "9-12": "L-912-RDG-SPK-004",
}


def load_example_by_grade(grade_band: str) -> dict:
    """
    Load a hand-crafted example lesson matching the given grade band.
    Used as a few-shot example in the prompt to ground the LLM's output.

    Args:
        grade_band: One of "K-2", "3-5", "6-8", "9-12"

    Returns:
        The example lesson as a plain Python dict.

    Raises:
        ValueError: If the grade band is not one of the four valid options.
        FileNotFoundError: If the examples file doesn't exist.
    """
    if grade_band not in GRADE_BAND_TO_EXAMPLE_ID:
        raise ValueError(
            f"[file_handler] Invalid grade band '{grade_band}'. "
            f"Must be one of: {list(GRADE_BAND_TO_EXAMPLE_ID.keys())}"
        )

    if not EXAMPLES_FILE.exists():
        raise FileNotFoundError(
            f"[file_handler] Examples file not found at {EXAMPLES_FILE}. "
            f"Make sure bantrly_example_lessons.json is in data/examples/"
        )

    with open(EXAMPLES_FILE, "r", encoding="utf-8") as f:
        all_examples = json.load(f)

    lessons = all_examples["bantrly_example_lessons"]["lessons"]
    target_id = GRADE_BAND_TO_EXAMPLE_ID[grade_band]

    for lesson in lessons:
        if lesson["lesson_id"] == target_id:
            return lesson

    raise FileNotFoundError(
        f"[file_handler] Example lesson '{target_id}' not found in examples file."
    )


# =============================================================================
# DEDUPLICATION REGISTRY
# Tracks which theme x skill x grade_band combinations have been used.
# Prevents the generator from producing repetitive lessons.
# =============================================================================

def load_registry() -> dict:
    """
    Load the deduplication registry from data/registry/registry.json.
    If the file doesn't exist or is empty, returns an empty registry.

    Returns:
        A dict with a "used_combinations" list.
    """
    if not REGISTRY_FILE.exists():
        return {"used_combinations": []}

    with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
        content = f.read().strip()
        if not content:
            return {"used_combinations": []}
        return json.loads(content)


def save_registry(registry: dict) -> None:
    """
    Write the registry dict back to data/registry/registry.json.

    Args:
        registry: The full registry dict (with "used_combinations" list).
    """
    DATA_REGISTRY.mkdir(parents=True, exist_ok=True)

    with open(REGISTRY_FILE, "w", encoding="utf-8") as f:
        json.dump(registry, f, indent=2, ensure_ascii=False)


def combo_exists(theme: str, skill: str, grade_band: str, ela_domain: str) -> bool:
    """
    Check if a theme x skill x grade_band combination has already
    been used. Comparison is case-insensitive.

    Args:
        theme:      e.g. "Space Exploration"
        skill:      e.g. "retell a story in sequence"
        grade_band: e.g. "K-2"
        ela_domain: e.g. "Speaking"

    Returns:
        True if the combo already exists, False if it's new.
    """
    registry = load_registry()

    for entry in registry["used_combinations"]:
        if (
            entry["theme"].lower()      == theme.lower()      and
            entry["skill"].lower()      == skill.lower()      and
            entry["grade_band"].lower() == grade_band.lower() and
            entry.get("ela_domain", "").lower() == ela_domain.lower()
        ):
            return True

    return False


def register_combo(theme: str, skill: str, grade_band: str, ela_domain: str, lesson_id: str) -> None:
    """
    Add a new theme x skill x grade_band combo to the registry.
    Should be called immediately after a lesson is successfully generated.

    Args:
        theme:      e.g. "Space Exploration"
        skill:      e.g. "retell a story in sequence"
        grade_band: e.g. "K-2"
        ela_domain: e.g. "Speaking"
        lesson_id:  e.g. "L-K2-SPK-001"
    """
    registry = load_registry()

    entry = {
        "theme":        theme,
        "skill":        skill,
        "grade_band":   grade_band,
        "ela_domain":   ela_domain,
        "lesson_id":    lesson_id,
        "generated_at": datetime.now(timezone.utc).isoformat()
    }

    registry["used_combinations"].append(entry)
    save_registry(registry)

    print(f"[file_handler] Registered combo β†’ {grade_band} | {theme} | {skill}")

def get_covered_skills(grade_band: str, ela_domain: str) -> list[str]:
    """
    Return skills already covered for a grade band + domain from the registry.
    Ordered oldest β†’ newest (insertion order).

    Args:
        grade_band: e.g. "K-2"
        ela_domain: e.g. "Speaking"

    Returns:
        List of skill strings already generated for this band + domain.
    """
    registry   = load_registry()
    domain_key = ela_domain.lower()
    covered    = []

    for entry in registry["used_combinations"]:
        if entry["grade_band"].lower() != grade_band.lower():
            continue

        entry_domain = entry.get("ela_domain", "").lower()

        if ela_domain == "Reading β†’ Speaking":
            if entry_domain in ("reading β†’ speaking", "reading", "speaking"):
                covered.append(entry["skill"])
        elif ela_domain in ("Reading", "Speaking"):
            # Also count Reading β†’ Speaking lessons toward Reading and Speaking
            if entry_domain == domain_key or entry_domain == "reading β†’ speaking":
                covered.append(entry["skill"])
        else:
            if entry_domain == domain_key:
                covered.append(entry["skill"])

    return covered