|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json |
|
|
import os |
|
|
from typing import Dict, Any |
|
|
|
|
|
from engine.parser_rules import parse_text_rules |
|
|
from engine.parser_ext import parse_text_extended |
|
|
|
|
|
|
|
|
|
|
|
GOLD_PATH = "training/gold_tests.json" |
|
|
|
|
|
|
|
|
def evaluate_single_test(test: Dict[str, Any]) -> Dict[str, Any]: |
|
|
""" |
|
|
Evaluate one gold test with rules + extended parsers. |
|
|
""" |
|
|
text = test.get("input", "") |
|
|
expected = test.get("expected", {}) |
|
|
|
|
|
|
|
|
rule_out = parse_text_rules(text).get("parsed_fields", {}) |
|
|
ext_out = parse_text_extended(text).get("parsed_fields", {}) |
|
|
|
|
|
|
|
|
merged = dict(rule_out) |
|
|
for k, v in ext_out.items(): |
|
|
if v != "Unknown": |
|
|
merged[k] = v |
|
|
|
|
|
total = len(expected) |
|
|
correct = 0 |
|
|
wrong = {} |
|
|
|
|
|
for field, exp_val in expected.items(): |
|
|
got = merged.get(field, "Unknown") |
|
|
if got.lower() == exp_val.lower(): |
|
|
correct += 0 if exp_val == "Unknown" else 1 |
|
|
else: |
|
|
wrong[field] = {"expected": exp_val, "got": got} |
|
|
|
|
|
return { |
|
|
"correct": correct, |
|
|
"total": total, |
|
|
"accuracy": correct / total if total else 0, |
|
|
"wrong": wrong, |
|
|
"merged": merged, |
|
|
} |
|
|
|
|
|
|
|
|
def run_parser_eval(mode: str = "rules_extended") -> Dict[str, Any]: |
|
|
""" |
|
|
Evaluate ALL gold tests using rules + extended parsing only. |
|
|
""" |
|
|
if not os.path.exists(GOLD_PATH): |
|
|
return {"error": f"Gold test file not found at {GOLD_PATH}"} |
|
|
|
|
|
with open(GOLD_PATH, "r", encoding="utf-8") as f: |
|
|
gold = json.load(f) |
|
|
|
|
|
results = [] |
|
|
wrong_cases = [] |
|
|
|
|
|
total_correct = 0 |
|
|
total_fields = 0 |
|
|
|
|
|
for test in gold: |
|
|
out = evaluate_single_test(test) |
|
|
results.append(out) |
|
|
|
|
|
total_correct += out["correct"] |
|
|
total_fields += out["total"] |
|
|
|
|
|
if out["wrong"]: |
|
|
wrong_cases.append({ |
|
|
"name": test.get("name", "Unnamed"), |
|
|
"wrong": out["wrong"], |
|
|
"parsed": out["merged"], |
|
|
"expected": test.get("expected", {}) |
|
|
}) |
|
|
|
|
|
summary = { |
|
|
"mode": "rules+extended", |
|
|
"tests": len(gold), |
|
|
"total_correct": total_correct, |
|
|
"total_fields": total_fields, |
|
|
"overall_accuracy": total_correct / total_fields if total_fields else 0, |
|
|
"wrong_cases": wrong_cases, |
|
|
} |
|
|
|
|
|
return summary |
|
|
|