ocr / spacyNER /training /evaluate.py
Hanz Pillerva
new up
486ed05
# training/evaluate.py
# ============================================================
# STEP 4 of 4 β€” EVALUATE THE TRAINED MODEL
# ============================================================
# Uses CIVIL REGISTRY dev.spacy only (not merged_dev.spacy)
# so scores reflect actual civil label performance.
# ============================================================
import spacy, subprocess, sys, json
from pathlib import Path
MODEL_PATH = "./models/civil_registry_model/model-best"
# ── Visual test cases (civil registry forms) ───────────────
TEST = [
{
"form": "Form 102 β€” Birth Certificate",
"text": (
"Registry No.: 2024-001\n"
"1. NAME (First): Ana (Middle): Garcia (Last): Reyes\n"
"2. SEX: Female\n"
"3. DATE OF BIRTH: August 21, 1995\n"
"4. PLACE OF BIRTH: Pasig City\n"
"7. MAIDEN NAME (First): Gloria (Middle): Santos (Last): Garcia\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Ramon (Middle): Cruz (Last): Reyes\n"
"15. CITIZENSHIP: Filipino"
),
"expected": [
"F102_CHILD_FIRST", "F102_CHILD_MIDDLE", "F102_CHILD_LAST",
"F102_SEX", "F102_DATE_OF_BIRTH", "F102_PLACE_OF_BIRTH",
"F102_MOTHER_FIRST", "F102_FATHER_FIRST",
],
},
{
"form": "Form 103 β€” Death Certificate",
"text": (
"1. NAME (First): Fernando (Middle): Santos (Last): Cruz\n"
"2. SEX: Male\n"
"4. AGE: 70\n"
"5. PLACE OF DEATH: PGH Manila\n"
"6. DATE OF DEATH: March 3, 2023\n"
"Immediate cause: Renal Failure"
),
"expected": [
"F103_DECEASED_FIRST", "F103_DECEASED_MIDDLE", "F103_DECEASED_LAST",
"F103_SEX", "F103_AGE", "F103_PLACE_OF_DEATH",
"F103_DATE_OF_DEATH", "F103_CAUSE_IMMEDIATE",
],
},
{
"form": "Form 97 β€” Marriage Certificate",
"text": (
"Registry No.: 2021-MC-088\n"
"MC Date of Registration: December 15, 2021\n"
"Husband (First): Miguel\n"
"Husband (Middle): Santos\n"
"Husband (Last): dela Cruz\n"
"Husband Age: 30\n"
"Husband Citizenship: Filipino\n"
"Husband Father (First): Fernando\n"
"Husband Father (Middle): Reyes\n"
"Husband Father (Last): Mendoza\n"
"Husband Father Citizenship: Filipino\n"
"Husband Mother (First): Rosario\n"
"Husband Mother (Middle): Lim\n"
"Husband Mother (Last): Santos\n"
"Husband Mother Citizenship: Filipino\n"
"Wife (First): Sofia\n"
"Wife (Middle): Tan\n"
"Wife (Last): Lim\n"
"Wife Age: 27\n"
"Wife Citizenship: Filipino\n"
"Wife Father (First): Antonio\n"
"Wife Father (Middle): Cruz\n"
"Wife Father (Last): Tan\n"
"Wife Father Citizenship: Filipino\n"
"Wife Mother (First): Shirley\n"
"Wife Mother (Middle): Go\n"
"Wife Mother (Last): Reyes\n"
"Wife Mother Citizenship: Filipino\n"
"MC Date of Marriage: December 12, 2021\n"
"MC Place of Marriage: Taguig City"
),
"expected": [
"F97_REGISTRY_NO", "F97_DATE_OF_REGISTRATION",
"F97_HUSBAND_FIRST", "F97_HUSBAND_MIDDLE", "F97_HUSBAND_LAST",
"F97_HUSBAND_AGE", "F97_HUSBAND_CITIZENSHIP",
"F97_HUSBAND_FATHER_FIRST", "F97_HUSBAND_MOTHER_FIRST",
"F97_WIFE_FIRST", "F97_WIFE_MIDDLE", "F97_WIFE_LAST",
"F97_WIFE_AGE", "F97_WIFE_CITIZENSHIP",
"F97_WIFE_FATHER_FIRST", "F97_WIFE_MOTHER_FIRST",
"F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
],
},
{
"form": "Form 90 β€” Marriage License (Groom + Bride)",
"text": (
"Registry No.: 2024-BC-001\n"
"ML Date of Registration: January 10, 2024\n"
"GROOM\n"
"Groom (First): Jose\n"
"Groom (Middle): Santos\n"
"Groom (Last): Ramos\n"
"Groom Date of Birth: March 15, 1995\n"
"Groom Age: 39\n"
"Groom Place of Birth: Manila\n"
"Groom Sex: Male\n"
"Groom Citizenship: Filipino\n"
"Groom Father (First): Pedro\n"
"Groom Father (Middle): dela Cruz\n"
"Groom Father (Last): Villanueva\n"
"Groom Father Citizenship: Filipino\n"
"Groom Mother (First): Lourdes\n"
"Groom Mother (Middle): Reyes\n"
"Groom Mother (Last): Bautista\n"
"Groom Mother Citizenship: Filipino\n"
"BRIDE\n"
"Bride (First): Maria\n"
"Bride (Middle): Garcia\n"
"Bride (Last): Torres\n"
"Bride Date of Birth: August 3, 1995\n"
"Bride Age: 35\n"
"Bride Place of Birth: Quezon City\n"
"Bride Sex: Female\n"
"Bride Citizenship: Filipino\n"
"Bride Father (First): Eduardo\n"
"Bride Father (Middle): Mendoza\n"
"Bride Father (Last): Aquino\n"
"Bride Father Citizenship: Filipino\n"
"Bride Mother (First): Gloria\n"
"Bride Mother (Middle): Santos\n"
"Bride Mother (Last): Lopez\n"
"Bride Mother Citizenship: Filipino"
),
"expected": [
"F90_REGISTRY_NO", "F90_DATE_OF_REGISTRATION",
"F90_GROOM_FIRST", "F90_GROOM_LAST",
"F90_GROOM_DATE_OF_BIRTH", "F90_GROOM_AGE",
"F90_GROOM_PLACE_OF_BIRTH", "F90_GROOM_SEX", "F90_GROOM_CITIZENSHIP",
"F90_GROOM_FATHER_FIRST", "F90_GROOM_MOTHER_FIRST",
"F90_BRIDE_FIRST", "F90_BRIDE_LAST",
"F90_BRIDE_DATE_OF_BIRTH", "F90_BRIDE_AGE",
"F90_BRIDE_PLACE_OF_BIRTH", "F90_BRIDE_SEX", "F90_BRIDE_CITIZENSHIP",
"F90_BRIDE_FATHER_FIRST", "F90_BRIDE_MOTHER_FIRST",
],
},
]
def visual_test(nlp):
print("=" * 62)
print(" VISUAL TEST β€” Does the model find the right labels?")
print("=" * 62)
total_correct = 0
total_expected = 0
for case in TEST:
doc = nlp(case["text"])
found = {ent.label_: ent.text for ent in doc.ents}
extra = {l: t for l, t in found.items()
if l not in case["expected"]}
print(f"\n {case['form']}")
print(f" {'─'*56}")
correct = 0
for label in case["expected"]:
if label in found:
print(f" βœ… {label:<35} = '{found[label]}'")
correct += 1
else:
print(f" ❌ {label:<35} ← NOT FOUND")
if extra:
print(f" {'Β·'*56}")
for label, text in list(extra.items())[:5]:
print(f" ⚠️ {label:<35} = '{text}' (extra)")
pct = correct / len(case["expected"]) * 100
bar = "β–ˆ" * int(pct / 5) + "β–‘" * (20 - int(pct / 5))
grade = "GOOD" if pct >= 70 else "PARTIAL" if pct >= 40 else "POOR"
print(f"\n [{bar}] {pct:.0f}% {grade} ({correct}/{len(case['expected'])})")
total_correct += correct
total_expected += len(case["expected"])
overall = total_correct / total_expected * 100
bar = "β–ˆ" * int(overall / 5) + "β–‘" * (20 - int(overall / 5))
print(f"\n{'=' * 62}")
print(f" OVERALL: [{bar}] {overall:.0f}% ({total_correct}/{total_expected})")
if overall >= 70:
print(f" Grade: βœ… GOOD β€” model is working well")
elif overall >= 40:
print(f" Grade: ⚠️ PARTIAL β€” needs more training examples")
else:
print(f" Grade: ❌ POOR β€” check training pipeline")
print(f"{'=' * 62}")
return overall
def spacy_eval(model_path):
"""Run official spaCy evaluate on civil-only dev.spacy."""
dev = Path("data/training/dev.spacy")
if not dev.exists():
print(f"\n ⚠️ dev.spacy not found β€” skipping spaCy eval")
print(f" β†’ Run: python training/prepare_data.py")
return
print(f"\n{'=' * 62}")
print(f" spaCy OFFICIAL EVAL β€” civil registry labels only")
print(f" Dev file: {dev} (civil registry, NOT merged)")
print(f"{'=' * 62}\n")
result = subprocess.run([
sys.executable, "-m", "spacy", "evaluate",
str(model_path), str(dev),
"--output", "data/training/eval_results.json",
])
# Parse and show only civil labels (not FORM_*)
results_file = Path("data/training/eval_results.json")
if results_file.exists():
data = json.loads(results_file.read_text())
per_type = data.get("ents_per_type", {})
civil = {k: v for k, v in per_type.items()
if not k.startswith("FORM_")}
funsd = {k: v for k, v in per_type.items()
if k.startswith("FORM_")}
if civil:
print(f"\n CIVIL REGISTRY LABELS (what matters):")
print(f" {'Label':<35} {'P':>6} {'R':>6} {'F':>6}")
print(f" {'─'*57}")
any_nonzero = False
for label, scores in sorted(civil.items()):
f = scores.get("f", 0)
p = scores.get("p", 0)
r = scores.get("r", 0)
flag = "" if f > 0 else " ← ❌ 0%"
if f > 0:
any_nonzero = True
print(f" {label:<35} {p:>6.1f} {r:>6.1f} {f:>6.1f}{flag}")
if not any_nonzero:
print(f"\n ❌ ALL civil labels are 0% β€” Phase 2 fine-tuning needed")
print(f" β†’ Run: python training/train.py (two-phase training)")
if funsd:
avg_f = sum(v.get("f", 0) for v in funsd.values()) / len(funsd)
print(f"\n FUNSD LABELS (background learning): avg F={avg_f:.1f}%")
if __name__ == "__main__":
model_path = Path(MODEL_PATH)
if not model_path.exists():
print(f"❌ Model not found: {model_path}")
print(" Run: python training/train.py")
sys.exit(1)
print(f"\n Loading model: {model_path}\n")
nlp = spacy.load(str(model_path))
overall = visual_test(nlp)
spacy_eval(model_path)
print(f"\n Results saved β†’ data/training/eval_results.json")