Spaces:
Sleeping
Sleeping
File size: 9,763 Bytes
1862200 f4e73dc 1862200 e97c67c 1862200 e97c67c 1862200 e97c67c 1862200 e97c67c 1862200 e97c67c 1862200 e97c67c 1862200 f4e73dc 1862200 e97c67c d9f028e e97c67c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 | """
src/utils/file_handler.py
-------------------------
Handles all file reading and writing for the generator.
Responsibilities:
- save_lesson(lesson) : Save a generated lesson to data/generated/
using lesson_id as the filename
- load_lesson(lesson_id) : Load a lesson JSON by ID
- load_example(grade_band) : Load a hand-crafted example lesson from
data/examples/ by grade band
- load_registry() : Load the deduplication registry JSON
- update_registry(entry) : Add a new theme x skill x grade combo
to the registry and save it
- combo_exists(theme, skill, grade_band) : Check if a combo is already
in the registry β returns True/False
Registry format (data/registry/registry.json):
{
"used_combinations": [
{
"theme": "Space Exploration",
"skill": "retell a story in sequence",
"grade_band": "K-2",
"lesson_id": "L-K2-SPK-001",
"generated_at": "2024-01-15T10:30:00"
}
]
}
"""
import json
from datetime import datetime, timezone
from pathlib import Path
# =============================================================================
# PATH CONSTANTS
# All paths are relative to the project root.
# Using pathlib.Path so this works on Windows, Mac, and Linux equally.
# =============================================================================
# Find the project root β two levels up from this file (src/utils/file_handler.py)
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATA_EXAMPLES = PROJECT_ROOT / "data" / "examples"
DATA_GENERATED = PROJECT_ROOT / "data" / "generated"
DATA_REGISTRY = PROJECT_ROOT / "data" / "registry"
REGISTRY_FILE = DATA_REGISTRY / "registry.json"
# The single examples file that holds all 5 hand-crafted lessons
EXAMPLES_FILE = DATA_EXAMPLES / "bantrly_example_lessons.json"
# =============================================================================
# LESSON I/O
# =============================================================================
def save_lesson(lesson_dict: dict) -> Path:
"""
Save a generated lesson to data/generated/<lesson_id>.json
Args:
lesson_dict: The lesson as a plain Python dict (call lesson.to_dict())
Returns:
The Path where the file was saved.
Raises:
KeyError: If lesson_dict doesn't contain a 'lesson_id' field.
"""
lesson_id = lesson_dict["lesson_id"]
filepath = DATA_GENERATED / f"{lesson_id}.json"
DATA_GENERATED.mkdir(parents=True, exist_ok=True)
with open(filepath, "w", encoding="utf-8") as f:
json.dump(lesson_dict, f, indent=2, ensure_ascii=False)
print(f"[file_handler] Saved lesson β {filepath}")
return filepath
def load_lesson(lesson_id: str) -> dict:
"""
Load a previously generated lesson from data/generated/<lesson_id>.json
Args:
lesson_id: e.g. "L-K2-SPK-001"
Returns:
The lesson as a plain Python dict.
Raises:
FileNotFoundError: If no lesson with that ID exists.
"""
filepath = DATA_GENERATED / f"{lesson_id}.json"
if not filepath.exists():
raise FileNotFoundError(
f"[file_handler] No lesson found with ID '{lesson_id}' at {filepath}"
)
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
# =============================================================================
# EXAMPLE LESSON LOADER
# Loads hand-crafted few-shot examples from data/examples/
# Selects the most relevant example by matching grade band.
# =============================================================================
# Maps each grade band string to the lesson_id of the matching example
GRADE_BAND_TO_EXAMPLE_ID = {
"K-2": "L-K2-SPK-001",
"3-5": "L-35-SPK-005", # Mission Brief β good general 3-5 example
"6-8": "L-68-SPK-003",
"9-12": "L-912-RDG-SPK-004",
}
def load_example_by_grade(grade_band: str) -> dict:
"""
Load a hand-crafted example lesson matching the given grade band.
Used as a few-shot example in the prompt to ground the LLM's output.
Args:
grade_band: One of "K-2", "3-5", "6-8", "9-12"
Returns:
The example lesson as a plain Python dict.
Raises:
ValueError: If the grade band is not one of the four valid options.
FileNotFoundError: If the examples file doesn't exist.
"""
if grade_band not in GRADE_BAND_TO_EXAMPLE_ID:
raise ValueError(
f"[file_handler] Invalid grade band '{grade_band}'. "
f"Must be one of: {list(GRADE_BAND_TO_EXAMPLE_ID.keys())}"
)
if not EXAMPLES_FILE.exists():
raise FileNotFoundError(
f"[file_handler] Examples file not found at {EXAMPLES_FILE}. "
f"Make sure bantrly_example_lessons.json is in data/examples/"
)
with open(EXAMPLES_FILE, "r", encoding="utf-8") as f:
all_examples = json.load(f)
lessons = all_examples["bantrly_example_lessons"]["lessons"]
target_id = GRADE_BAND_TO_EXAMPLE_ID[grade_band]
for lesson in lessons:
if lesson["lesson_id"] == target_id:
return lesson
raise FileNotFoundError(
f"[file_handler] Example lesson '{target_id}' not found in examples file."
)
# =============================================================================
# DEDUPLICATION REGISTRY
# Tracks which theme x skill x grade_band combinations have been used.
# Prevents the generator from producing repetitive lessons.
# =============================================================================
def load_registry() -> dict:
"""
Load the deduplication registry from data/registry/registry.json.
If the file doesn't exist or is empty, returns an empty registry.
Returns:
A dict with a "used_combinations" list.
"""
if not REGISTRY_FILE.exists():
return {"used_combinations": []}
with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
content = f.read().strip()
if not content:
return {"used_combinations": []}
return json.loads(content)
def save_registry(registry: dict) -> None:
"""
Write the registry dict back to data/registry/registry.json.
Args:
registry: The full registry dict (with "used_combinations" list).
"""
DATA_REGISTRY.mkdir(parents=True, exist_ok=True)
with open(REGISTRY_FILE, "w", encoding="utf-8") as f:
json.dump(registry, f, indent=2, ensure_ascii=False)
def combo_exists(theme: str, skill: str, grade_band: str, ela_domain: str) -> bool:
"""
Check if a theme x skill x grade_band combination has already
been used. Comparison is case-insensitive.
Args:
theme: e.g. "Space Exploration"
skill: e.g. "retell a story in sequence"
grade_band: e.g. "K-2"
ela_domain: e.g. "Speaking"
Returns:
True if the combo already exists, False if it's new.
"""
registry = load_registry()
for entry in registry["used_combinations"]:
if (
entry["theme"].lower() == theme.lower() and
entry["skill"].lower() == skill.lower() and
entry["grade_band"].lower() == grade_band.lower() and
entry.get("ela_domain", "").lower() == ela_domain.lower()
):
return True
return False
def register_combo(theme: str, skill: str, grade_band: str, ela_domain: str, lesson_id: str) -> None:
"""
Add a new theme x skill x grade_band combo to the registry.
Should be called immediately after a lesson is successfully generated.
Args:
theme: e.g. "Space Exploration"
skill: e.g. "retell a story in sequence"
grade_band: e.g. "K-2"
ela_domain: e.g. "Speaking"
lesson_id: e.g. "L-K2-SPK-001"
"""
registry = load_registry()
entry = {
"theme": theme,
"skill": skill,
"grade_band": grade_band,
"ela_domain": ela_domain,
"lesson_id": lesson_id,
"generated_at": datetime.now(timezone.utc).isoformat()
}
registry["used_combinations"].append(entry)
save_registry(registry)
print(f"[file_handler] Registered combo β {grade_band} | {theme} | {skill}")
def get_covered_skills(grade_band: str, ela_domain: str) -> list[str]:
"""
Return skills already covered for a grade band + domain from the registry.
Ordered oldest β newest (insertion order).
Args:
grade_band: e.g. "K-2"
ela_domain: e.g. "Speaking"
Returns:
List of skill strings already generated for this band + domain.
"""
registry = load_registry()
domain_key = ela_domain.lower()
covered = []
for entry in registry["used_combinations"]:
if entry["grade_band"].lower() != grade_band.lower():
continue
entry_domain = entry.get("ela_domain", "").lower()
if ela_domain == "Reading β Speaking":
if entry_domain in ("reading β speaking", "reading", "speaking"):
covered.append(entry["skill"])
elif ela_domain in ("Reading", "Speaking"):
# Also count Reading β Speaking lessons toward Reading and Speaking
if entry_domain == domain_key or entry_domain == "reading β speaking":
covered.append(entry["skill"])
else:
if entry_domain == domain_key:
covered.append(entry["skill"])
return covered |