File size: 2,412 Bytes
59446fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import annotations

import json
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT / "src"))

from packetcourt import audit_packet


def load_cases(path: Path) -> list[dict]:
    return [json.loads(line) for line in path.read_text().splitlines() if line.strip()]


def evaluate_case(case: dict) -> tuple[int, int, list[str]]:
    audit = audit_packet(case["front_text"], case["back_text"])
    checks: list[tuple[bool, str]] = []
    actual_claims = {claim.claim for claim in audit.claims}
    checks.append((actual_claims == set(case["expected_claims"]), "claim set"))

    verdicts = {claim.claim: claim.verdict.value for claim in audit.claims}
    for claim, expected in case.get("expected_verdicts", {}).items():
        checks.append((verdicts.get(claim) == expected, f"{claim} verdict"))

    gap_text = " ".join(
        f"{finding.headline} {finding.front_impression} {finding.quiet_context}"
        for finding in audit.persuasion_gap
    ).lower()
    for term in case.get("expected_gap_terms", []):
        checks.append((term.lower() in gap_text, f"gap contains {term}"))

    if "expected_best_before" in case:
        checks.append((audit.expiry.best_before == case["expected_best_before"], "best-before date"))
    if "expected_after_opening" in case:
        checks.append((audit.expiry.after_opening_instruction == case["expected_after_opening"], "after-opening instruction"))
    if "expected_sugar_teaspoons" in case:
        checks.append((audit.whole_packet.sugar_teaspoons == case["expected_sugar_teaspoons"], "sugar teaspoons"))

    failures = [label for passed, label in checks if not passed]
    return len(checks) - len(failures), len(checks), failures


def main() -> int:
    cases = load_cases(ROOT / "data" / "golden_cases.jsonl")
    passed = total = 0
    for case in cases:
        case_passed, case_total, failures = evaluate_case(case)
        passed += case_passed
        total += case_total
        marker = "PASS" if not failures else "FAIL"
        print(f"{marker} {case['id']} {case['title']}: {case_passed}/{case_total}")
        for failure in failures:
            print(f"  - {failure}")
    print(f"\nPacketCourt golden evaluation: {passed}/{total} checks passed across {len(cases)} cases.")
    return 0 if passed == total else 1


if __name__ == "__main__":
    raise SystemExit(main())