File size: 2,303 Bytes
fba140f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- coding: utf-8 -*-
"""

Created on Sun Aug 17 20:36:17 2025



@author: adetu

"""

import json, sys
from json import JSONDecodeError
from pathlib import Path
from jsonschema import Draft202012Validator as V

def load_schema():
    candidates = [
        Path("schemas/attack_plan.schema.json"),
        Path("../schemas/attack_plan.schema.json"),
    ]
    for p in candidates:
        if p.exists():
            raw = p.read_text(encoding="utf-8-sig")  # handle BOM
            # strip accidental markdown fences
            lines = [ln for ln in raw.splitlines() if not ln.strip().startswith("```")]
            raw = "\n".join(lines).strip()
            if not raw:
                raise RuntimeError(f"Schema file is empty: {p}")
            try:
                schema = json.loads(raw)
                print(f"[schema] loaded {p.resolve()}")
                return schema
            except JSONDecodeError as e:
                ctx = raw.splitlines()[max(e.lineno-2,0):e.lineno+1]
                print(f"[schema] JSON error in {p}: {e.msg} at line {e.lineno}, col {e.colno}")
                print("Context:\n" + "\n".join(ctx))
                raise
    raise FileNotFoundError("Could not find schema at schemas/attack_plan.schema.json")

def main():
    schema = load_schema()
    validator = V(schema)

    src = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("scripts/train_attackplan.jsonl")
    raw = src.read_text(encoding="utf-8")
    valid = invalid = 0
    for i, line in enumerate(raw.splitlines(), 1):
        if not line.strip():  # skip blanks
            continue
        try:
            obj = json.loads(line)
        except JSONDecodeError as e:
            print(f"[line {i}] not JSON: {e.msg} at {e.lineno}:{e.colno}")
            print(" snippet:", line[:200])
            invalid += 1
            continue

        errs = sorted(validator.iter_errors(obj), key=lambda e: (list(e.path), e.message))
        if errs:
            invalid += 1
            print(f"[line {i}] INVALID:")
            for e in errs[:8]:
                print("  -", e.message, "at", list(e.path))
        else:
            valid += 1
    print(f"[done] {valid} valid, {invalid} invalid")

if __name__ == "__main__":
    main()