| |
|
| | """
|
| | Created on Sun Aug 17 20:36:17 2025
|
| |
|
| | @author: adetu
|
| | """
|
| |
|
| | import json, sys
|
| | from json import JSONDecodeError
|
| | from pathlib import Path
|
| | from jsonschema import Draft202012Validator as V
|
| |
|
| | def load_schema():
|
| | candidates = [
|
| | Path("schemas/attack_plan.schema.json"),
|
| | Path("../schemas/attack_plan.schema.json"),
|
| | ]
|
| | for p in candidates:
|
| | if p.exists():
|
| | raw = p.read_text(encoding="utf-8-sig")
|
| |
|
| | lines = [ln for ln in raw.splitlines() if not ln.strip().startswith("```")]
|
| | raw = "\n".join(lines).strip()
|
| | if not raw:
|
| | raise RuntimeError(f"Schema file is empty: {p}")
|
| | try:
|
| | schema = json.loads(raw)
|
| | print(f"[schema] loaded {p.resolve()}")
|
| | return schema
|
| | except JSONDecodeError as e:
|
| | ctx = raw.splitlines()[max(e.lineno-2,0):e.lineno+1]
|
| | print(f"[schema] JSON error in {p}: {e.msg} at line {e.lineno}, col {e.colno}")
|
| | print("Context:\n" + "\n".join(ctx))
|
| | raise
|
| | raise FileNotFoundError("Could not find schema at schemas/attack_plan.schema.json")
|
| |
|
| | def main():
|
| | schema = load_schema()
|
| | validator = V(schema)
|
| |
|
| | src = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("scripts/train_attackplan.jsonl")
|
| | raw = src.read_text(encoding="utf-8")
|
| | valid = invalid = 0
|
| | for i, line in enumerate(raw.splitlines(), 1):
|
| | if not line.strip():
|
| | continue
|
| | try:
|
| | obj = json.loads(line)
|
| | except JSONDecodeError as e:
|
| | print(f"[line {i}] not JSON: {e.msg} at {e.lineno}:{e.colno}")
|
| | print(" snippet:", line[:200])
|
| | invalid += 1
|
| | continue
|
| |
|
| | errs = sorted(validator.iter_errors(obj), key=lambda e: (list(e.path), e.message))
|
| | if errs:
|
| | invalid += 1
|
| | print(f"[line {i}] INVALID:")
|
| | for e in errs[:8]:
|
| | print(" -", e.message, "at", list(e.path))
|
| | else:
|
| | valid += 1
|
| | print(f"[done] {valid} valid, {invalid} invalid")
|
| |
|
| | if __name__ == "__main__":
|
| | main()
|
| |
|