File size: 2,303 Bytes
fba140f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # -*- coding: utf-8 -*-
"""
Created on Sun Aug 17 20:36:17 2025
@author: adetu
"""
import json, sys
from json import JSONDecodeError
from pathlib import Path
from jsonschema import Draft202012Validator as V
def load_schema():
candidates = [
Path("schemas/attack_plan.schema.json"),
Path("../schemas/attack_plan.schema.json"),
]
for p in candidates:
if p.exists():
raw = p.read_text(encoding="utf-8-sig") # handle BOM
# strip accidental markdown fences
lines = [ln for ln in raw.splitlines() if not ln.strip().startswith("```")]
raw = "\n".join(lines).strip()
if not raw:
raise RuntimeError(f"Schema file is empty: {p}")
try:
schema = json.loads(raw)
print(f"[schema] loaded {p.resolve()}")
return schema
except JSONDecodeError as e:
ctx = raw.splitlines()[max(e.lineno-2,0):e.lineno+1]
print(f"[schema] JSON error in {p}: {e.msg} at line {e.lineno}, col {e.colno}")
print("Context:\n" + "\n".join(ctx))
raise
raise FileNotFoundError("Could not find schema at schemas/attack_plan.schema.json")
def main():
schema = load_schema()
validator = V(schema)
src = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("scripts/train_attackplan.jsonl")
raw = src.read_text(encoding="utf-8")
valid = invalid = 0
for i, line in enumerate(raw.splitlines(), 1):
if not line.strip(): # skip blanks
continue
try:
obj = json.loads(line)
except JSONDecodeError as e:
print(f"[line {i}] not JSON: {e.msg} at {e.lineno}:{e.colno}")
print(" snippet:", line[:200])
invalid += 1
continue
errs = sorted(validator.iter_errors(obj), key=lambda e: (list(e.path), e.message))
if errs:
invalid += 1
print(f"[line {i}] INVALID:")
for e in errs[:8]:
print(" -", e.message, "at", list(e.path))
else:
valid += 1
print(f"[done] {valid} valid, {invalid} invalid")
if __name__ == "__main__":
main()
|