Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parents[1] | |
| sys.path.insert(0, str(ROOT / "src")) | |
| from packetcourt import audit_packet | |
| def load_cases(path: Path) -> list[dict]: | |
| return [json.loads(line) for line in path.read_text().splitlines() if line.strip()] | |
| def evaluate_case(case: dict) -> tuple[int, int, list[str]]: | |
| audit = audit_packet(case["front_text"], case["back_text"]) | |
| checks: list[tuple[bool, str]] = [] | |
| actual_claims = {claim.claim for claim in audit.claims} | |
| checks.append((actual_claims == set(case["expected_claims"]), "claim set")) | |
| verdicts = {claim.claim: claim.verdict.value for claim in audit.claims} | |
| for claim, expected in case.get("expected_verdicts", {}).items(): | |
| checks.append((verdicts.get(claim) == expected, f"{claim} verdict")) | |
| gap_text = " ".join( | |
| f"{finding.headline} {finding.front_impression} {finding.quiet_context}" | |
| for finding in audit.persuasion_gap | |
| ).lower() | |
| for term in case.get("expected_gap_terms", []): | |
| checks.append((term.lower() in gap_text, f"gap contains {term}")) | |
| if "expected_best_before" in case: | |
| checks.append((audit.expiry.best_before == case["expected_best_before"], "best-before date")) | |
| if "expected_after_opening" in case: | |
| checks.append((audit.expiry.after_opening_instruction == case["expected_after_opening"], "after-opening instruction")) | |
| if "expected_sugar_teaspoons" in case: | |
| checks.append((audit.whole_packet.sugar_teaspoons == case["expected_sugar_teaspoons"], "sugar teaspoons")) | |
| failures = [label for passed, label in checks if not passed] | |
| return len(checks) - len(failures), len(checks), failures | |
| def main() -> int: | |
| cases = load_cases(ROOT / "data" / "golden_cases.jsonl") | |
| passed = total = 0 | |
| for case in cases: | |
| case_passed, case_total, failures = evaluate_case(case) | |
| passed += case_passed | |
| total += case_total | |
| marker = "PASS" if not failures else "FAIL" | |
| print(f"{marker} {case['id']} {case['title']}: {case_passed}/{case_total}") | |
| for failure in failures: | |
| print(f" - {failure}") | |
| print(f"\nPacketCourt golden evaluation: {passed}/{total} checks passed across {len(cases)} cases.") | |
| return 0 if passed == total else 1 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |