BactKing / training /parser_eval.py
EphAsad's picture
Upload 23 files
1168cd6 verified
# training/parser_eval.py
# ------------------------------------------------------------
# Parser Evaluation (Stage 10A)
#
# This version ONLY evaluates:
# - Rule parser
# - Extended parser
#
# The LLM parser is intentionally disabled at this stage
# because alias maps and schema are not trained yet.
#
# This makes Stage 10A FAST and stable (< 3 seconds).
# ------------------------------------------------------------
import json
import os
from typing import Dict, Any
from engine.parser_rules import parse_text_rules
from engine.parser_ext import parse_text_extended
# Path to the gold tests
GOLD_PATH = "training/gold_tests.json"
def evaluate_single_test(test: Dict[str, Any]) -> Dict[str, Any]:
"""
Evaluate one gold test with rules + extended parsers.
"""
text = test.get("input", "")
expected = test.get("expected", {})
# Run deterministic parsers
rule_out = parse_text_rules(text).get("parsed_fields", {})
ext_out = parse_text_extended(text).get("parsed_fields", {})
# Merge rule + extended (extended overwrites rules)
merged = dict(rule_out)
for k, v in ext_out.items():
if v != "Unknown":
merged[k] = v
total = len(expected)
correct = 0
wrong = {}
for field, exp_val in expected.items():
got = merged.get(field, "Unknown")
if got.lower() == exp_val.lower():
correct += 0 if exp_val == "Unknown" else 1 # Unknown is neutral
else:
wrong[field] = {"expected": exp_val, "got": got}
return {
"correct": correct,
"total": total,
"accuracy": correct / total if total else 0,
"wrong": wrong,
"merged": merged,
}
def run_parser_eval(mode: str = "rules_extended") -> Dict[str, Any]:
"""
Evaluate ALL gold tests using rules + extended parsing only.
"""
if not os.path.exists(GOLD_PATH):
return {"error": f"Gold test file not found at {GOLD_PATH}"}
with open(GOLD_PATH, "r", encoding="utf-8") as f:
gold = json.load(f)
results = []
wrong_cases = []
total_correct = 0
total_fields = 0
for test in gold:
out = evaluate_single_test(test)
results.append(out)
total_correct += out["correct"]
total_fields += out["total"]
if out["wrong"]:
wrong_cases.append({
"name": test.get("name", "Unnamed"),
"wrong": out["wrong"],
"parsed": out["merged"],
"expected": test.get("expected", {})
})
summary = {
"mode": "rules+extended",
"tests": len(gold),
"total_correct": total_correct,
"total_fields": total_fields,
"overall_accuracy": total_correct / total_fields if total_fields else 0,
"wrong_cases": wrong_cases,
}
return summary