File size: 1,372 Bytes
ed6bec6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# generator/validator.py
import json
from pathlib import Path
REQUIRED_TOP_LEVEL = {"input_envelope", "output"}
REQUIRED_OUTPUT_KEYS = {"glyphic", "realized"}
def validate_jsonl(path: Path):
errors = 0
total = 0
with path.open("r", encoding="utf-8") as f:
for line_num, line in enumerate(f, start=1):
line = line.strip()
if not line:
continue
total += 1
try:
obj = json.loads(line)
except json.JSONDecodeError as e:
print(f"[error] line {line_num}: invalid JSON: {e}")
errors += 1
continue
missing = REQUIRED_TOP_LEVEL - obj.keys()
if missing:
print(f"[error] line {line_num}: missing top-level keys: {missing}")
errors += 1
continue
out = obj["output"]
missing_out = REQUIRED_OUTPUT_KEYS - out.keys()
if missing_out:
print(f"[error] line {line_num}: missing output keys: {missing_out}")
errors += 1
# quick sanity checks
if not isinstance(obj["input_envelope"], str):
print(f"[error] line {line_num}: input_envelope must be a string")
errors += 1
print(f"Validated {total} samples, errors: {errors}")
|