|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| import sys
|
| import os
|
| import time
|
| import argparse
|
| from collections import defaultdict
|
| from pathlib import Path
|
| from datetime import datetime
|
|
|
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
| from spacyNER.extractor import CivilRegistryNER
|
| from spacyNER.autofill import AutoFillEngine
|
| from spacyNER.models import Form1A, Form2A, Form3A
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| FORM_1A_TESTS = [
|
| {
|
| "id": "1A-001",
|
| "desc": "Standard birth certificate β complete fields",
|
| "text": (
|
| "1. NAME (First): Juan (Middle): dela Cruz (Last): Santos\n"
|
| "2. SEX: Male\n"
|
| "3. DATE OF BIRTH: March 15, 1990\n"
|
| "4. PLACE OF BIRTH: Makati City\n"
|
| "7. MAIDEN NAME (First): Maria (Middle): Reyes (Last): dela Cruz\n"
|
| "8. CITIZENSHIP: Filipino\n"
|
| "14. NAME (First): Pedro (Middle): Cruz (Last): Santos\n"
|
| "15. CITIZENSHIP: Filipino\n"
|
| "20a. DATE: June 10, 1985\n"
|
| "20b. PLACE: Manila"
|
| ),
|
| "expected_labels": [
|
| "F102_CHILD_FIRST", "F102_CHILD_MIDDLE", "F102_CHILD_LAST",
|
| "F102_SEX", "F102_DATE_OF_BIRTH", "F102_PLACE_OF_BIRTH",
|
| "F102_MOTHER_FIRST", "F102_MOTHER_CITIZENSHIP",
|
| "F102_FATHER_FIRST", "F102_FATHER_CITIZENSHIP",
|
| ],
|
| "expected_values": {
|
| "name_of_child": "Juan dela Cruz Santos",
|
| "sex": "Male",
|
| "name_of_mother": "Maria Reyes dela Cruz",
|
| "name_of_father": "Pedro Cruz Santos",
|
| }
|
| },
|
| {
|
| "id": "1A-002",
|
| "desc": "Birth certificate β female child, twin birth",
|
| "text": (
|
| "1. NAME (First): Ana (Middle): Garcia (Last): Reyes\n"
|
| "2. SEX: Female\n"
|
| "3. DATE OF BIRTH: August 21, 1995\n"
|
| "4. PLACE OF BIRTH: Pasig City\n"
|
| "5a. TYPE OF BIRTH: Twin\n"
|
| "7. MAIDEN NAME (First): Gloria (Middle): Santos (Last): Garcia\n"
|
| "8. CITIZENSHIP: Filipino\n"
|
| "14. NAME (First): Ramon (Middle): Cruz (Last): Reyes\n"
|
| "15. CITIZENSHIP: Filipino"
|
| ),
|
| "expected_labels": [
|
| "F102_CHILD_FIRST", "F102_SEX", "F102_DATE_OF_BIRTH",
|
| "F102_PLACE_OF_BIRTH", "F102_TYPE_OF_BIRTH",
|
| "F102_MOTHER_FIRST", "F102_FATHER_FIRST",
|
| ],
|
| "expected_values": {
|
| "name_of_child": "Ana Garcia Reyes",
|
| "sex": "Female",
|
| "type_of_birth": "Twin",
|
| }
|
| },
|
| {
|
| "id": "1A-003",
|
| "desc": "Birth certificate β no middle name (mother)",
|
| "text": (
|
| "1. NAME (First): Carlo (Middle): Santos (Last): Lim\n"
|
| "2. SEX: Male\n"
|
| "3. DATE OF BIRTH: December 1, 2010\n"
|
| "4. PLACE OF BIRTH: Cebu City\n"
|
| "7. MAIDEN NAME (First): Rosa (Middle): (Last): Santos\n"
|
| "8. CITIZENSHIP: Filipino\n"
|
| "14. NAME (First): Bernard (Middle): Cruz (Last): Lim\n"
|
| "15. CITIZENSHIP: Filipino"
|
| ),
|
| "expected_labels": [
|
| "F102_CHILD_FIRST", "F102_SEX", "F102_DATE_OF_BIRTH",
|
| "F102_MOTHER_FIRST", "F102_FATHER_FIRST",
|
| ],
|
| "expected_values": {
|
| "name_of_child": "Carlo Santos Lim",
|
| }
|
| },
|
| {
|
| "id": "1A-004",
|
| "desc": "Birth certificate β hyphenated last name",
|
| "text": (
|
| "1. NAME (First): Sofia (Middle): Mendoza (Last): Santos-Cruz\n"
|
| "2. SEX: Female\n"
|
| "3. DATE OF BIRTH: November 30, 2005\n"
|
| "4. PLACE OF BIRTH: Quezon City\n"
|
| "7. MAIDEN NAME (First): Carmen (Middle): Uy (Last): Mendoza\n"
|
| "8. CITIZENSHIP: Filipino\n"
|
| "14. NAME (First): Roberto (Middle): Cruz (Last): Santos-Cruz\n"
|
| "15. CITIZENSHIP: Filipino"
|
| ),
|
| "expected_labels": [
|
| "F102_CHILD_FIRST", "F102_CHILD_LAST", "F102_SEX",
|
| "F102_MOTHER_FIRST", "F102_FATHER_FIRST",
|
| ],
|
| "expected_values": {
|
| "name_of_child": "Sofia Mendoza Santos-Cruz",
|
| }
|
| },
|
| {
|
| "id": "1A-005",
|
| "desc": "Birth certificate β with registry number",
|
| "text": (
|
| "Registry No.: 2024-001\n"
|
| "1. NAME (First): Liza (Middle): Ramos (Last): Delos Santos\n"
|
| "2. SEX: Female\n"
|
| "3. DATE OF BIRTH: July 7, 1988\n"
|
| "4. PLACE OF BIRTH: Davao City\n"
|
| "7. MAIDEN NAME (First): Perla (Middle): Aquino (Last): Ramos\n"
|
| "8. CITIZENSHIP: Filipino\n"
|
| "14. NAME (First): Manuel (Middle): Santos (Last): Delos Santos\n"
|
| "15. CITIZENSHIP: Filipino"
|
| ),
|
| "expected_labels": [
|
| "F102_REGISTRY_NO", "F102_CHILD_FIRST", "F102_SEX",
|
| "F102_DATE_OF_BIRTH", "F102_PLACE_OF_BIRTH",
|
| "F102_MOTHER_FIRST", "F102_FATHER_FIRST",
|
| ],
|
| "expected_values": {
|
| "registry_number": "2024-001",
|
| }
|
| },
|
| ]
|
|
|
|
|
| FORM_2A_TESTS = [
|
| {
|
| "id": "2A-001",
|
| "desc": "Death certificate β complete fields with all causes",
|
| "text": (
|
| "1. NAME (First): Fernando (Middle): Santos (Last): Cruz\n"
|
| "2. SEX: Male\n"
|
| "4. AGE: 70\n"
|
| "5. PLACE OF DEATH: PGH Manila\n"
|
| "6. DATE OF DEATH: March 3, 2023\n"
|
| "7. CITIZENSHIP: Filipino\n"
|
| "9. CIVIL STATUS: Widowed\n"
|
| "10. OCCUPATION: Retired Teacher\n"
|
| "Immediate cause: Renal Failure\n"
|
| "Antecedent cause: Chronic Kidney Disease\n"
|
| "Underlying cause: Diabetes Mellitus"
|
| ),
|
| "expected_labels": [
|
| "F103_DECEASED_FIRST", "F103_DECEASED_MIDDLE", "F103_DECEASED_LAST",
|
| "F103_SEX", "F103_AGE", "F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
|
| "F103_CITIZENSHIP", "F103_CIVIL_STATUS", "F103_OCCUPATION",
|
| "F103_CAUSE_IMMEDIATE", "F103_CAUSE_ANTECEDENT", "F103_CAUSE_UNDERLYING",
|
| ],
|
| "expected_values": {
|
| "name_of_deceased": "Fernando Santos Cruz",
|
| "age": "70",
|
| "civil_status": "Widowed",
|
| "cause_immediate": "Renal Failure",
|
| }
|
| },
|
| {
|
| "id": "2A-002",
|
| "desc": "Death certificate β female, elderly, natural cause",
|
| "text": (
|
| "1. NAME (First): Josefa (Middle): dela Paz (Last): Gonzales\n"
|
| "2. SEX: Female\n"
|
| "3. RELIGION: Roman Catholic\n"
|
| "4. AGE: 91\n"
|
| "5. PLACE OF DEATH: Batangas City\n"
|
| "6. DATE OF DEATH: December 31, 2021\n"
|
| "7. CITIZENSHIP: Filipino\n"
|
| "9. CIVIL STATUS: Widowed\n"
|
| "Immediate cause: Old Age"
|
| ),
|
| "expected_labels": [
|
| "F103_DECEASED_FIRST", "F103_SEX", "F103_RELIGION",
|
| "F103_AGE", "F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
|
| "F103_CITIZENSHIP", "F103_CIVIL_STATUS", "F103_CAUSE_IMMEDIATE",
|
| ],
|
| "expected_values": {
|
| "name_of_deceased": "Josefa dela Paz Gonzales",
|
| "religion": "Roman Catholic",
|
| }
|
| },
|
| {
|
| "id": "2A-003",
|
| "desc": "Death certificate β with residence field",
|
| "text": (
|
| "1. NAME (First): Benjamin (Middle): Ocampo (Last): Velasquez\n"
|
| "2. SEX: Male\n"
|
| "4. AGE: 48\n"
|
| "5. PLACE OF DEATH: Makati Medical Center\n"
|
| "6. DATE OF DEATH: May 20, 2018\n"
|
| "7. CITIZENSHIP: Filipino\n"
|
| "8. RESIDENCE: 12 Ayala Avenue, Makati City\n"
|
| "9. CIVIL STATUS: Married\n"
|
| "10. OCCUPATION: Accountant\n"
|
| "Immediate cause: Myocardial Infarction"
|
| ),
|
| "expected_labels": [
|
| "F103_DECEASED_FIRST", "F103_SEX", "F103_AGE",
|
| "F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
|
| "F103_CITIZENSHIP", "F103_RESIDENCE", "F103_CIVIL_STATUS",
|
| "F103_OCCUPATION", "F103_CAUSE_IMMEDIATE",
|
| ],
|
| "expected_values": {
|
| "name_of_deceased": "Benjamin Ocampo Velasquez",
|
| "occupation": "Accountant",
|
| }
|
| },
|
| {
|
| "id": "2A-004",
|
| "desc": "Death certificate β young adult, only immediate cause",
|
| "text": (
|
| "1. NAME (First): Cristina (Middle): Evangelista (Last): Sy\n"
|
| "2. SEX: Female\n"
|
| "4. AGE: 29\n"
|
| "5. PLACE OF DEATH: Philippine General Hospital\n"
|
| "6. DATE OF DEATH: June 6, 2016\n"
|
| "7. CITIZENSHIP: Filipino\n"
|
| "9. CIVIL STATUS: Single\n"
|
| "Immediate cause: Dengue Hemorrhagic Fever"
|
| ),
|
| "expected_labels": [
|
| "F103_DECEASED_FIRST", "F103_SEX", "F103_AGE",
|
| "F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
|
| "F103_CITIZENSHIP", "F103_CIVIL_STATUS", "F103_CAUSE_IMMEDIATE",
|
| ],
|
| "expected_values": {
|
| "name_of_deceased": "Cristina Evangelista Sy",
|
| "age": "29",
|
| }
|
| },
|
| {
|
| "id": "2A-005",
|
| "desc": "Death certificate β all three causes of death",
|
| "text": (
|
| "1. NAME (First): Ernesto (Middle): Macapagal (Last): Villafuerte\n"
|
| "2. SEX: Male\n"
|
| "4. AGE: 77\n"
|
| "5. PLACE OF DEATH: Veterans Memorial Medical Center\n"
|
| "6. DATE OF DEATH: November 11, 2017\n"
|
| "7. CITIZENSHIP: Filipino\n"
|
| "9. CIVIL STATUS: Married\n"
|
| "Immediate cause: Multi-Organ Failure\n"
|
| "Antecedent cause: Septicemia\n"
|
| "Underlying cause: Pneumonia"
|
| ),
|
| "expected_labels": [
|
| "F103_DECEASED_FIRST", "F103_AGE", "F103_DATE_OF_DEATH",
|
| "F103_CAUSE_IMMEDIATE", "F103_CAUSE_ANTECEDENT", "F103_CAUSE_UNDERLYING",
|
| ],
|
| "expected_values": {
|
| "cause_immediate": "Multi-Organ Failure",
|
| "cause_antecedent": "Septicemia",
|
| "cause_underlying": "Pneumonia",
|
| }
|
| },
|
| ]
|
|
|
|
|
| FORM_3A_TESTS = [
|
| {
|
| "id": "3A-001",
|
| "desc": "Marriage certificate β complete husband and wife",
|
| "text": (
|
| "Husband (First): Jose (Middle): Cruz (Last): Ramos\n"
|
| "Husband AGE: 28\n"
|
| "Husband CITIZENSHIP: Filipino\n"
|
| "Husband CIVIL STATUS: Single\n"
|
| "Wife (First): Elena (Middle): Bautista (Last): Torres\n"
|
| "Wife AGE: 25\n"
|
| "Wife CITIZENSHIP: Filipino\n"
|
| "Wife CIVIL STATUS: Single\n"
|
| "16. DATE OF MARRIAGE: February 14, 2022\n"
|
| "15. PLACE OF MARRIAGE: Makati City Hall"
|
| ),
|
| "expected_labels": [
|
| "F97_HUSBAND_FIRST", "F97_HUSBAND_MIDDLE", "F97_HUSBAND_LAST",
|
| "F97_HUSBAND_AGE", "F97_HUSBAND_CITIZENSHIP", "F97_HUSBAND_CIVIL_STATUS",
|
| "F97_WIFE_FIRST", "F97_WIFE_MIDDLE", "F97_WIFE_LAST",
|
| "F97_WIFE_AGE", "F97_WIFE_CITIZENSHIP",
|
| "F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
|
| ],
|
| "expected_values": {
|
| "husband_name": "Jose Cruz Ramos",
|
| "wife_name": "Elena Bautista Torres",
|
| "date_of_marriage": "February 14, 2022",
|
| "place_of_marriage": "Makati City Hall",
|
| }
|
| },
|
| {
|
| "id": "3A-002",
|
| "desc": "Marriage certificate β with parents names",
|
| "text": (
|
| "Husband (First): Ricardo (Middle): dela Torre (Last): Magsaysay\n"
|
| "Husband AGE: 35\n"
|
| "Husband CITIZENSHIP: Filipino\n"
|
| "Husband NAME OF FATHER (First): Alfredo (Middle): Cruz (Last): Magsaysay\n"
|
| "Husband NAME OF MOTHER (First): Florencia (Middle): dela (Last): Torre\n"
|
| "Wife (First): Consuelo (Middle): Reyes (Last): Pascual\n"
|
| "Wife AGE: 30\n"
|
| "Wife CITIZENSHIP: Filipino\n"
|
| "DATE OF MARRIAGE: October 4, 2019\n"
|
| "PLACE OF MARRIAGE: Quezon City"
|
| ),
|
| "expected_labels": [
|
| "F97_HUSBAND_FIRST", "F97_HUSBAND_AGE", "F97_HUSBAND_CITIZENSHIP",
|
| "F97_HUSBAND_FATHER_FIRST", "F97_HUSBAND_MOTHER_FIRST",
|
| "F97_WIFE_FIRST", "F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
|
| ],
|
| "expected_values": {
|
| "husband_name": "Ricardo dela Torre Magsaysay",
|
| "wife_name": "Consuelo Reyes Pascual",
|
| }
|
| },
|
| {
|
| "id": "3A-003",
|
| "desc": "Marriage certificate β with place of birth",
|
| "text": (
|
| "Husband (First): Marco (Middle): Villanueva (Last): Concepcion\n"
|
| "Husband PLACE OF BIRTH: Iloilo City\n"
|
| "Husband AGE: 26\n"
|
| "Husband CITIZENSHIP: Filipino\n"
|
| "Wife (First): Patricia (Middle): Guevara (Last): Luna\n"
|
| "Wife PLACE OF BIRTH: Cebu City\n"
|
| "Wife AGE: 24\n"
|
| "Wife CITIZENSHIP: Filipino\n"
|
| "DATE OF MARRIAGE: June 21, 2023\n"
|
| "PLACE OF MARRIAGE: Iloilo City Hall"
|
| ),
|
| "expected_labels": [
|
| "F97_HUSBAND_FIRST", "F97_HUSBAND_PLACE_BIRTH", "F97_HUSBAND_AGE",
|
| "F97_WIFE_FIRST", "F97_WIFE_PLACE_BIRTH", "F97_WIFE_AGE",
|
| "F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
|
| ],
|
| "expected_values": {
|
| "husband_name": "Marco Villanueva Concepcion",
|
| "wife_name": "Patricia Guevara Luna",
|
| }
|
| },
|
| {
|
| "id": "3A-004",
|
| "desc": "Marriage certificate β with religion",
|
| "text": (
|
| "HUSBAND NAME (First): Albert (Middle): Garcia (Last): Santos\n"
|
| "HUSBAND AGE: 40\n"
|
| "HUSBAND CITIZENSHIP: Filipino\n"
|
| "HUSBAND RELIGION: Roman Catholic\n"
|
| "WIFE NAME (First): Rowena (Middle): Alvarez (Last): Reyes\n"
|
| "WIFE AGE: 36\n"
|
| "WIFE CITIZENSHIP: Filipino\n"
|
| "WIFE RELIGION: Roman Catholic\n"
|
| "DATE OF MARRIAGE: March 14, 2010\n"
|
| "PLACE OF MARRIAGE: Victory Christian Center, Pasig"
|
| ),
|
| "expected_labels": [
|
| "F97_HUSBAND_FIRST", "F97_HUSBAND_AGE", "F97_HUSBAND_RELIGION",
|
| "F97_WIFE_FIRST", "F97_WIFE_AGE", "F97_WIFE_RELIGION",
|
| "F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
|
| ],
|
| "expected_values": {
|
| "husband_name": "Albert Garcia Santos",
|
| }
|
| },
|
| {
|
| "id": "3A-005",
|
| "desc": "Marriage certificate β with date of birth",
|
| "text": (
|
| "Husband (First): Miguel (Middle): Santos (Last): dela Cruz\n"
|
| "Husband DATE OF BIRTH: June 15, 1990\n"
|
| "Husband AGE: 31\n"
|
| "Husband CITIZENSHIP: Filipino\n"
|
| "Wife (First): Sofia (Middle): Tan (Last): Lim\n"
|
| "Wife DATE OF BIRTH: March 20, 1993\n"
|
| "Wife AGE: 28\n"
|
| "Wife CITIZENSHIP: Filipino\n"
|
| "16. DATE OF MARRIAGE: December 12, 2021\n"
|
| "15. PLACE OF MARRIAGE: Taguig City"
|
| ),
|
| "expected_labels": [
|
| "F97_HUSBAND_FIRST", "F97_HUSBAND_DOB", "F97_HUSBAND_AGE",
|
| "F97_WIFE_FIRST", "F97_WIFE_DOB", "F97_WIFE_AGE",
|
| "F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
|
| ],
|
| "expected_values": {
|
| "husband_name": "Miguel Santos dela Cruz",
|
| "wife_name": "Sofia Tan Lim",
|
| }
|
| },
|
| ]
|
|
|
|
|
| BLACK_BOX_TESTS = [
|
| {
|
| "id": "BB-001",
|
| "desc": "Empty input β should not crash",
|
| "form": "1A",
|
| "text": "",
|
| "expect_crash": False,
|
| "expect_empty": True,
|
| },
|
| {
|
| "id": "BB-002",
|
| "desc": "Whitespace only β should not crash",
|
| "form": "1A",
|
| "text": " \n\n\t ",
|
| "expect_crash": False,
|
| "expect_empty": True,
|
| },
|
| {
|
| "id": "BB-003",
|
| "desc": "Garbled OCR output β should not crash",
|
| "form": "2A",
|
| "text": "1. N4ME (F1rst): J@an (M1ddle): d3la Cr!z (L@st): $antos\n2. SEX: M@le",
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| {
|
| "id": "BB-004",
|
| "desc": "Partial form β only name fields present",
|
| "form": "1A",
|
| "text": "1. NAME (First): Maria (Middle): Santos (Last): Reyes",
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| {
|
| "id": "BB-005",
|
| "desc": "Very long OCR text β should not crash",
|
| "form": "2A",
|
| "text": "1. NAME (First): Carlos (Last): Cruz\n" * 50,
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| {
|
| "id": "BB-006",
|
| "desc": "Missing colon separators β OCR formatting issue",
|
| "form": "1A",
|
| "text": "NAME First Juan Middle dela Cruz Last Santos\nSEX Male\nDATE OF BIRTH March 15 1990",
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| {
|
| "id": "BB-007",
|
| "desc": "Numbers only β no recognizable form content",
|
| "form": "3A",
|
| "text": "123456789 0987654321 11111 22222 33333",
|
| "expect_crash": False,
|
| "expect_empty": True,
|
| },
|
| {
|
| "id": "BB-008",
|
| "desc": "Valid Form 3A input β pipeline completes",
|
| "form": "3A",
|
| "text": (
|
| "Husband (First): Patrick (Middle): Sy (Last): Chua\n"
|
| "Wife (First): Christine (Middle): Lim (Last): Go\n"
|
| "DATE OF MARRIAGE: July 7, 2023\n"
|
| "PLACE OF MARRIAGE: Binondo Church, Manila"
|
| ),
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| {
|
| "id": "BB-009",
|
| "desc": "Mixed language (Filipino/English) β common in real forms",
|
| "form": "1A",
|
| "text": (
|
| "1. PANGALAN (First): Jose (Middle): dela Cruz (Last): Reyes\n"
|
| "2. SEX: Lalaki\n"
|
| "3. DATE OF BIRTH: Enero 5, 2000\n"
|
| "4. PLACE OF BIRTH: Lungsod ng Maynila"
|
| ),
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| {
|
| "id": "BB-010",
|
| "desc": "Special characters in name β OCR artifact",
|
| "form": "2A",
|
| "text": (
|
| "1. NAME (First): Fe|ipe (Middle): San+os (Last): Cr-uz\n"
|
| "2. SEX: Male\n"
|
| "4. AGE: 55\n"
|
| "6. DATE OF DEATH: May 1, 2020"
|
| ),
|
| "expect_crash": False,
|
| "expect_empty": False,
|
| },
|
| ]
|
|
|
| ALL_FORM_TESTS = FORM_1A_TESTS + FORM_2A_TESTS + FORM_3A_TESTS
|
|
|
|
|
|
|
|
|
|
|
|
|
| def separator(char="β", width=65):
|
| return char * width
|
|
|
| def header(title):
|
| print(f"\n{separator()}")
|
| print(f" {title}")
|
| print(separator())
|
|
|
| def subheader(title):
|
| print(f"\n {'β' * 60}")
|
| print(f" {title}")
|
| print(f" {'β' * 60}")
|
|
|
|
|
| def run_extraction(extractor, filler, form_type, text):
|
| """Run extraction for a given form type. Returns form object."""
|
| if form_type == "1A":
|
| return filler.fill_form_1a(text)
|
| elif form_type == "2A":
|
| return filler.fill_form_2a(text)
|
| elif form_type == "3A":
|
| return filler.fill_form_3a(text)
|
|
|
|
|
| def get_extracted_labels(extractor, form_type, text):
|
| """Get set of extracted NER label keys from raw extraction."""
|
| if form_type == "1A" or "F102" in str(form_type):
|
| return extractor.extract_form_102(text)
|
| elif form_type == "2A" or "F103" in str(form_type):
|
| return extractor.extract_form_103(text)
|
| elif form_type == "3A" or "F97" in str(form_type):
|
| return extractor.extract_form_97(text)
|
| return {}
|
|
|
|
|
| def infer_form_type(labels):
|
| """Guess form type from label prefix."""
|
| for label in labels:
|
| if label.startswith("F102"):
|
| return "1A"
|
| elif label.startswith("F103"):
|
| return "2A"
|
| elif label.startswith("F97"):
|
| return "3A"
|
| return "1A"
|
|
|
|
|
|
|
|
|
|
|
|
|
| def run_accuracy_testing(extractor, filler):
|
| header("1. ACCURACY TESTING")
|
| print(" Measures: how many expected labels were correctly extracted")
|
| print(" Formula: Accuracy = Correct / Total Expected Γ 100%\n")
|
|
|
| results = {
|
| "Form 1A (Birth)": {"correct": 0, "total": 0, "tests": 0},
|
| "Form 2A (Death)": {"correct": 0, "total": 0, "tests": 0},
|
| "Form 3A (Marriage)": {"correct": 0, "total": 0, "tests": 0},
|
| }
|
|
|
| all_label_results = []
|
|
|
| for test_set, form_name in [
|
| (FORM_1A_TESTS, "Form 1A (Birth)"),
|
| (FORM_2A_TESTS, "Form 2A (Death)"),
|
| (FORM_3A_TESTS, "Form 3A (Marriage)"),
|
| ]:
|
| subheader(f"Accuracy β {form_name}")
|
|
|
| for test in test_set:
|
| form_type = test["id"].split("-")[0]
|
| data = get_extracted_labels(extractor, form_type, test["text"])
|
| found_labels = set(data.keys())
|
|
|
| correct = 0
|
| total = len(test["expected_labels"])
|
| missing = []
|
|
|
| for label in test["expected_labels"]:
|
| if label in found_labels:
|
| correct += 1
|
| else:
|
| missing.append(label)
|
|
|
| pct = (correct / total * 100) if total > 0 else 0
|
| status = "β
" if pct >= 70 else ("β οΈ " if pct >= 50 else "β")
|
|
|
| print(f" {status} [{test['id']}] {test['desc']}")
|
| print(f" Score: {correct}/{total} ({pct:.1f}%)")
|
| if missing:
|
| print(f" Missing: {', '.join(missing[:3])}"
|
| + ("..." if len(missing) > 3 else ""))
|
|
|
| results[form_name]["correct"] += correct
|
| results[form_name]["total"] += total
|
| results[form_name]["tests"] += 1
|
| all_label_results.append(pct)
|
|
|
|
|
| subheader("Accuracy Summary")
|
| print(f" {'Form':<30} {'Correct':>8} {'Total':>7} {'Accuracy':>10}")
|
| print(f" {'β'*30} {'β'*8} {'β'*7} {'β'*10}")
|
|
|
| total_correct = 0
|
| total_labels = 0
|
| for form_name, r in results.items():
|
| pct = (r["correct"] / r["total"] * 100) if r["total"] > 0 else 0
|
| mark = "β
" if pct >= 70 else ("β οΈ " if pct >= 50 else "β")
|
| print(f" {mark} {form_name:<28} {r['correct']:>8} {r['total']:>7} {pct:>9.1f}%")
|
| total_correct += r["correct"]
|
| total_labels += r["total"]
|
|
|
| print(f" {'β'*30} {'β'*8} {'β'*7} {'β'*10}")
|
| overall = (total_correct / total_labels * 100) if total_labels > 0 else 0
|
| print(f" {'OVERALL':<30} {total_correct:>8} {total_labels:>7} {overall:>9.1f}%")
|
|
|
| return overall
|
|
|
|
|
|
|
|
|
|
|
|
|
| def run_black_box_testing(extractor, filler):
|
| header("2. BLACK BOX TESTING")
|
| print(" Tests system behavior from external perspective.")
|
| print(" No knowledge of internals β only input β output.\n")
|
| print(" Test categories:")
|
| print(" β’ Valid inputs (normal use)")
|
| print(" β’ Invalid / edge case inputs (empty, garbled, partial)")
|
| print(" β’ Boundary inputs (very long, special chars, mixed language)\n")
|
|
|
| passed = 0
|
| failed = 0
|
| errors = []
|
|
|
| for test in BLACK_BOX_TESTS:
|
| test_passed = True
|
| notes = []
|
|
|
| try:
|
| start = time.time()
|
|
|
|
|
| form_obj = run_extraction(extractor, filler, test["form"], test["text"])
|
| elapsed = time.time() - start
|
|
|
|
|
| if test["expect_crash"]:
|
| test_passed = False
|
| notes.append("Expected crash but system survived")
|
|
|
|
|
| from spacyNER.autofill import AutoFillEngine
|
| result = AutoFillEngine(extractor).to_dict(form_obj)
|
| is_empty = len(result) == 0
|
|
|
| if test["expect_empty"] and not is_empty:
|
|
|
| notes.append(f"Expected empty output but got {len(result)} fields")
|
|
|
| if not test["expect_empty"] and is_empty and test["id"] not in ["BB-007"]:
|
| notes.append("Expected some output but got nothing")
|
|
|
|
|
| if elapsed > 5.0:
|
| test_passed = False
|
| notes.append(f"Too slow: {elapsed:.2f}s (limit: 5s)")
|
|
|
| status_icon = "β
" if test_passed else "β"
|
| timing = f"{elapsed*1000:.0f}ms"
|
|
|
| print(f" {status_icon} [{test['id']}] {test['desc']}")
|
| print(f" Fields found: {len(result)} | Time: {timing}")
|
| if notes:
|
| for note in notes:
|
| print(f" βΉοΈ {note}")
|
|
|
| except Exception as e:
|
| if test["expect_crash"]:
|
| print(f" β
[{test['id']}] {test['desc']}")
|
| print(f" Crashed as expected: {type(e).__name__}")
|
| else:
|
| test_passed = False
|
| errors.append(f"[{test['id']}] {type(e).__name__}: {e}")
|
| print(f" β [{test['id']}] {test['desc']}")
|
| print(f" CRASH: {type(e).__name__}: {e}")
|
| failed += 1
|
| continue
|
|
|
| if test_passed:
|
| passed += 1
|
| else:
|
| failed += 1
|
|
|
| subheader("Black Box Summary")
|
| total = passed + failed
|
| pct = (passed / total * 100) if total > 0 else 0
|
| print(f" Passed: {passed}/{total} ({pct:.1f}%)")
|
| if errors:
|
| print(f" Crashes detected: {len(errors)}")
|
| for e in errors:
|
| print(f" β {e}")
|
| else:
|
| print(f" β
No crashes detected β system is stable")
|
|
|
| return passed, total
|
|
|
|
|
|
|
|
|
|
|
|
|
| def run_confusion_matrix(extractor):
|
| header("3. CONFUSION MATRIX")
|
| print(" Per-label: True Positive (TP), False Positive (FP),")
|
| print(" False Negative (FN), Precision, Recall, F1-Score\n")
|
|
|
|
|
| label_stats = defaultdict(lambda: {"TP": 0, "FP": 0, "FN": 0})
|
|
|
| for test in ALL_FORM_TESTS:
|
| form_type = test["id"].split("-")[0]
|
| data = get_extracted_labels(extractor, form_type, test["text"])
|
| found_labels = set(data.keys())
|
| expected_labels = set(test["expected_labels"])
|
|
|
| for label in expected_labels:
|
| if label in found_labels:
|
| label_stats[label]["TP"] += 1
|
| else:
|
| label_stats[label]["FN"] += 1
|
|
|
|
|
| for label in found_labels:
|
| if label in expected_labels:
|
| pass
|
| elif any(label in t["expected_labels"] for t in ALL_FORM_TESTS):
|
| label_stats[label]["FP"] += 1
|
|
|
|
|
| form_groups = [
|
| ("Form 1A (Birth Certificate)", "F102"),
|
| ("Form 2A (Death Certificate)", "F103"),
|
| ("Form 3A (Marriage Certificate)", "F97"),
|
| ]
|
|
|
| overall_tp = overall_fp = overall_fn = 0
|
|
|
| for form_name, prefix in form_groups:
|
| subheader(f"Confusion Matrix β {form_name}")
|
| form_labels = {k: v for k, v in label_stats.items() if k.startswith(prefix)}
|
|
|
| if not form_labels:
|
| print(" β οΈ No test results for this form yet.")
|
| continue
|
|
|
| print(f" {'Label':<40} {'TP':>4} {'FP':>4} {'FN':>4} {'Precision':>10} {'Recall':>8} {'F1':>8}")
|
| print(f" {'β'*40} {'β'*4} {'β'*4} {'β'*4} {'β'*10} {'β'*8} {'β'*8}")
|
|
|
| form_tp = form_fp = form_fn = 0
|
|
|
| for label, stats in sorted(form_labels.items()):
|
| tp = stats["TP"]
|
| fp = stats["FP"]
|
| fn = stats["FN"]
|
|
|
| precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
| f1 = (2 * precision * recall / (precision + recall)
|
| if (precision + recall) > 0 else 0.0)
|
|
|
| perf = "β
" if f1 >= 0.7 else ("β οΈ " if f1 >= 0.5 else "β")
|
| short_label = label.replace(prefix + "_", "")
|
| print(f" {perf} {short_label:<38} {tp:>4} {fp:>4} {fn:>4} "
|
| f"{precision:>9.2f} {recall:>7.2f} {f1:>7.2f}")
|
|
|
| form_tp += tp; form_fp += fp; form_fn += fn
|
|
|
| form_prec = form_tp / (form_tp + form_fp) if (form_tp + form_fp) > 0 else 0
|
| form_rec = form_tp / (form_tp + form_fn) if (form_tp + form_fn) > 0 else 0
|
| form_f1 = (2 * form_prec * form_rec / (form_prec + form_rec)
|
| if (form_prec + form_rec) > 0 else 0)
|
|
|
| print(f" {'β'*40} {'β'*4} {'β'*4} {'β'*4} {'β'*10} {'β'*8} {'β'*8}")
|
| print(f" {' FORM TOTAL':<40} {form_tp:>4} {form_fp:>4} {form_fn:>4} "
|
| f"{form_prec:>9.2f} {form_rec:>7.2f} {form_f1:>7.2f}")
|
|
|
| overall_tp += form_tp
|
| overall_fp += form_fp
|
| overall_fn += form_fn
|
|
|
|
|
| subheader("Overall Confusion Matrix Summary")
|
| overall_prec = overall_tp / (overall_tp + overall_fp) if (overall_tp + overall_fp) > 0 else 0
|
| overall_rec = overall_tp / (overall_tp + overall_fn) if (overall_tp + overall_fn) > 0 else 0
|
| overall_f1 = (2 * overall_prec * overall_rec / (overall_prec + overall_rec)
|
| if (overall_prec + overall_rec) > 0 else 0)
|
|
|
| print(f" {'Metric':<25} {'Value':>10}")
|
| print(f" {'β'*25} {'β'*10}")
|
| print(f" {'True Positives (TP)':<25} {overall_tp:>10}")
|
| print(f" {'False Positives (FP)':<25} {overall_fp:>10}")
|
| print(f" {'False Negatives (FN)':<25} {overall_fn:>10}")
|
| print(f" {'Precision':<25} {overall_prec:>9.2f}")
|
| print(f" {'Recall':<25} {overall_rec:>9.2f}")
|
| print(f" {'F1-Score':<25} {overall_f1:>9.2f}")
|
|
|
| return overall_f1
|
|
|
|
|
|
|
|
|
|
|
|
|
| def run_reliability_testing(extractor, filler):
|
| header("4. ISO 25010 β RELIABILITY TESTING")
|
| print(" ISO 25010 Reliability sub-characteristics:")
|
| print(" β’ Maturity β consistent results on repeated runs")
|
| print(" β’ Fault Tolerance β handles bad/missing input without crashing")
|
| print(" β’ Recoverability β recovers from error states")
|
| print(" β’ Availability β model loads and responds correctly\n")
|
|
|
| passed = 0
|
| total = 0
|
|
|
|
|
| subheader("4.1 Availability β Model Load & Response")
|
| availability_tests = [
|
| ("Model loaded successfully", extractor is not None),
|
| ("AutoFillEngine initialized", filler is not None),
|
| ("fill_form_1a() is callable", callable(getattr(filler, "fill_form_1a", None))),
|
| ("fill_form_2a() is callable", callable(getattr(filler, "fill_form_2a", None))),
|
| ("fill_form_3a() is callable", callable(getattr(filler, "fill_form_3a", None))),
|
| ("extract_form_102() is callable", callable(getattr(extractor, "extract_form_102", None))),
|
| ("extract_form_103() is callable", callable(getattr(extractor, "extract_form_103", None))),
|
| ("extract_form_97() is callable", callable(getattr(extractor, "extract_form_97", None))),
|
| ]
|
| for desc, condition in availability_tests:
|
| total += 1
|
| if condition:
|
| passed += 1
|
| print(f" β
{desc}")
|
| else:
|
| print(f" β {desc}")
|
|
|
|
|
| subheader("4.2 Fault Tolerance β Bad Input Handling")
|
| fault_inputs = [
|
| ("Empty string", ""),
|
| ("None-like whitespace", " \n "),
|
| ("Random symbols", "@#$%^&*()_+{}|:<>?"),
|
| ("Very long input", "NAME: Juan Santos\n" * 200),
|
| ("Binary-like text", "\x00\x01\x02 NAME First Juan"),
|
| ("Only numbers", "123 456 789 000 111 222"),
|
| ("Repeated newlines", "\n\n\n\n\n"),
|
| ]
|
| for desc, bad_input in fault_inputs:
|
| total += 1
|
| try:
|
| result = filler.fill_form_1a(bad_input)
|
| passed += 1
|
| print(f" β
{desc} β handled gracefully")
|
| except Exception as e:
|
| print(f" β {desc} β CRASH: {type(e).__name__}: {e}")
|
|
|
|
|
| subheader("4.3 Maturity β Consistency on Repeated Runs")
|
| test_text = (
|
| "1. NAME (First): Juan (Middle): dela Cruz (Last): Santos\n"
|
| "2. SEX: Male\n"
|
| "3. DATE OF BIRTH: March 15, 1990\n"
|
| "4. PLACE OF BIRTH: Makati City"
|
| )
|
|
|
| results_across_runs = []
|
| NUM_RUNS = 5
|
| for i in range(NUM_RUNS):
|
| data = extractor.extract_form_102(test_text)
|
| results_across_runs.append(frozenset(data.keys()))
|
|
|
| all_same = len(set(results_across_runs)) == 1
|
| total += 1
|
| if all_same:
|
| passed += 1
|
| print(f" β
{NUM_RUNS} repeated runs β identical results (consistent)")
|
| else:
|
| print(f" β {NUM_RUNS} repeated runs β inconsistent results")
|
|
|
|
|
| subheader("4.4 Recoverability β System Continues After Errors")
|
| recovery_tests = [
|
| ("Run after empty input", ""),
|
| ("Run with valid input after error", (
|
| "1. NAME (First): Maria (Last): Santos\n2. SEX: Female"
|
| )),
|
| ("Run Form 2A after Form 1A error", None),
|
| ]
|
|
|
|
|
| try:
|
| filler.fill_form_1a("")
|
| filler.fill_form_2a("")
|
| form = filler.fill_form_1a(
|
| "1. NAME (First): Test (Last): User\n2. SEX: Male"
|
| )
|
| total += 1
|
| passed += 1
|
| print(f" β
System recovers after empty input β continues processing")
|
| except Exception as e:
|
| total += 1
|
| print(f" β System did not recover: {e}")
|
|
|
| try:
|
| for _ in range(3):
|
| filler.fill_form_2a("GARBAGE INPUT @#$%")
|
| filler.fill_form_2a(
|
| "1. NAME (First): Carlos (Last): Cruz\n4. AGE: 65"
|
| )
|
| total += 1
|
| passed += 1
|
| print(f" β
System processes valid input after multiple bad inputs")
|
| except Exception as e:
|
| total += 1
|
| print(f" β System failed after bad inputs: {e}")
|
|
|
| subheader("ISO 25010 Reliability Summary")
|
| pct = (passed / total * 100) if total > 0 else 0
|
| print(f" Passed: {passed}/{total} ({pct:.1f}%)")
|
| if pct >= 90:
|
| print(f" β
RELIABILITY: EXCELLENT β meets ISO 25010 standard")
|
| elif pct >= 75:
|
| print(f" β οΈ RELIABILITY: ACCEPTABLE β minor issues found")
|
| else:
|
| print(f" β RELIABILITY: NEEDS IMPROVEMENT")
|
|
|
| return passed, total
|
|
|
|
|
|
|
|
|
|
|
|
|
| def run_usability_testing(extractor, filler):
|
| header("5. ISO 25010 β USABILITY TESTING")
|
| print(" ISO 25010 Usability sub-characteristics:")
|
| print(" β’ Learnability β consistent, predictable output format")
|
| print(" β’ Operability β pipeline runs end-to-end without manual steps")
|
| print(" β’ Accessibility β output is readable and usable by calling code")
|
| print(" β’ User error protection β handles mistakes without data corruption\n")
|
|
|
| passed = 0
|
| total = 0
|
|
|
| sample_text_102 = (
|
| "1. NAME (First): Juan (Middle): dela Cruz (Last): Santos\n"
|
| "2. SEX: Male\n"
|
| "3. DATE OF BIRTH: March 15, 1990\n"
|
| "4. PLACE OF BIRTH: Makati City\n"
|
| "7. MAIDEN NAME (First): Maria (Middle): Reyes (Last): dela Cruz\n"
|
| "8. CITIZENSHIP: Filipino\n"
|
| "14. NAME (First): Pedro (Middle): Cruz (Last): Santos"
|
| )
|
|
|
| sample_text_103 = (
|
| "1. NAME (First): Carlos (Middle): Reyes (Last): Mendoza\n"
|
| "2. SEX: Male\n4. AGE: 65\n"
|
| "5. PLACE OF DEATH: Manila\n"
|
| "6. DATE OF DEATH: January 1, 2020\n"
|
| "Immediate cause: Heart Attack"
|
| )
|
|
|
| sample_text_97 = (
|
| "Husband (First): Jose (Middle): Cruz (Last): Ramos\n"
|
| "Wife (First): Elena (Middle): Bautista (Last): Torres\n"
|
| "DATE OF MARRIAGE: February 14, 2022\n"
|
| "PLACE OF MARRIAGE: Manila City Hall"
|
| )
|
|
|
|
|
| subheader("5.1 Learnability β Output Format Consistency")
|
|
|
| learn_tests = [
|
| ("Form1A has name_of_child field",
|
| lambda: hasattr(filler.fill_form_1a(sample_text_102), "name_of_child")),
|
| ("Form1A name_of_child is string or None",
|
| lambda: isinstance(filler.fill_form_1a(sample_text_102).name_of_child, (str, type(None)))),
|
| ("Form2A has name_of_deceased field",
|
| lambda: hasattr(filler.fill_form_2a(sample_text_103), "name_of_deceased")),
|
| ("Form3A has husband and wife fields",
|
| lambda: hasattr(filler.fill_form_3a(sample_text_97), "husband") and
|
| hasattr(filler.fill_form_3a(sample_text_97), "wife")),
|
| ("to_dict() returns a dictionary",
|
| lambda: isinstance(filler.to_dict(filler.fill_form_1a(sample_text_102)), dict)),
|
| ("Same input always gives same output type",
|
| lambda: type(filler.fill_form_1a(sample_text_102)) == type(filler.fill_form_1a(sample_text_102))),
|
| ("Form1A output is a Form1A instance",
|
| lambda: isinstance(filler.fill_form_1a(sample_text_102), Form1A)),
|
| ("Form2A output is a Form2A instance",
|
| lambda: isinstance(filler.fill_form_2a(sample_text_103), Form2A)),
|
| ("Form3A output is a Form3A instance",
|
| lambda: isinstance(filler.fill_form_3a(sample_text_97), Form3A)),
|
| ]
|
|
|
| for desc, test_fn in learn_tests:
|
| total += 1
|
| try:
|
| result = test_fn()
|
| if result:
|
| passed += 1
|
| print(f" β
{desc}")
|
| else:
|
| print(f" β {desc}")
|
| except Exception as e:
|
| print(f" β {desc} β {type(e).__name__}: {e}")
|
|
|
|
|
| subheader("5.2 Operability β End-to-End Pipeline")
|
|
|
| operability_tests = [
|
| ("Form 1A pipeline completes (text β Form1A object)",
|
| lambda: filler.fill_form_1a(sample_text_102) is not None),
|
| ("Form 2A pipeline completes (text β Form2A object)",
|
| lambda: filler.fill_form_2a(sample_text_103) is not None),
|
| ("Form 3A pipeline completes (text β Form3A object)",
|
| lambda: filler.fill_form_3a(sample_text_97) is not None),
|
| ("to_dict() converts Form1A without errors",
|
| lambda: filler.to_dict(filler.fill_form_1a(sample_text_102)) is not None),
|
| ("to_dict() converts Form2A without errors",
|
| lambda: filler.to_dict(filler.fill_form_2a(sample_text_103)) is not None),
|
| ("to_dict() converts Form3A without errors",
|
| lambda: filler.to_dict(filler.fill_form_3a(sample_text_97)) is not None),
|
| ("Pipeline handles empty text without crash",
|
| lambda: filler.fill_form_1a("") is not None),
|
| ("Pipeline handles all 3 forms in sequence",
|
| lambda: all([
|
| filler.fill_form_1a(sample_text_102) is not None,
|
| filler.fill_form_2a(sample_text_103) is not None,
|
| filler.fill_form_3a(sample_text_97) is not None,
|
| ])),
|
| ]
|
|
|
| for desc, test_fn in operability_tests:
|
| total += 1
|
| try:
|
| start = time.time()
|
| result = test_fn()
|
| elapsed = time.time() - start
|
| if result:
|
| passed += 1
|
| print(f" β
{desc} ({elapsed*1000:.0f}ms)")
|
| else:
|
| print(f" β {desc}")
|
| except Exception as e:
|
| print(f" β {desc} β {type(e).__name__}: {e}")
|
|
|
|
|
| subheader("5.3 Accessibility β Output Readability")
|
|
|
| form_1a = filler.fill_form_1a(sample_text_102)
|
| form_2a = filler.fill_form_2a(sample_text_103)
|
| form_3a = filler.fill_form_3a(sample_text_97)
|
| dict_1a = filler.to_dict(form_1a)
|
|
|
| accessibility_tests = [
|
| ("Form1A dict keys are human-readable strings",
|
| lambda: all(isinstance(k, str) for k in dict_1a.keys())),
|
| ("Form1A dict values are strings or None",
|
| lambda: all(isinstance(v, (str, type(None))) for v in dict_1a.values())),
|
| ("Form3A.husband is accessible as attribute",
|
| lambda: form_3a.husband is not None),
|
| ("Form3A.wife is accessible as attribute",
|
| lambda: form_3a.wife is not None),
|
| ("Form3A.husband.name is string or None",
|
| lambda: isinstance(form_3a.husband.name, (str, type(None)))),
|
| ("Name fields use First Middle Last order",
|
| lambda: (form_1a.name_of_child or "").count(" ") == 0),
|
| ("Empty form produces empty dict (no None values in dict)",
|
| lambda: all(v is not None for v in filler.to_dict(filler.fill_form_1a("")).values())),
|
| ]
|
|
|
| for desc, test_fn in accessibility_tests:
|
| total += 1
|
| try:
|
| result = test_fn()
|
| if result:
|
| passed += 1
|
| print(f" β
{desc}")
|
| else:
|
| print(f" β {desc}")
|
| except Exception as e:
|
| print(f" β {desc} β {type(e).__name__}: {e}")
|
|
|
|
|
| subheader("5.4 User Error Protection β Input Mistakes")
|
|
|
| error_protection_tests = [
|
| ("Calling wrong form type does not corrupt other forms",
|
| lambda: (filler.fill_form_1a(sample_text_103) is not None and
|
| filler.fill_form_1a(sample_text_102) is not None)),
|
| ("Processing bad input does not affect next call",
|
| lambda: (filler.fill_form_1a("GARBAGE") is not None and
|
| filler.fill_form_1a(sample_text_102) is not None)),
|
| ("Multiple calls do not accumulate state errors",
|
| lambda: len([filler.fill_form_2a(sample_text_103) for _ in range(5)]) == 5),
|
| ]
|
|
|
| for desc, test_fn in error_protection_tests:
|
| total += 1
|
| try:
|
| result = test_fn()
|
| if result:
|
| passed += 1
|
| print(f" β
{desc}")
|
| else:
|
| print(f" β {desc}")
|
| except Exception as e:
|
| print(f" β {desc} β {type(e).__name__}: {e}")
|
|
|
| subheader("ISO 25010 Usability Summary")
|
| pct = (passed / total * 100) if total > 0 else 0
|
| print(f" Passed: {passed}/{total} ({pct:.1f}%)")
|
| if pct >= 90:
|
| print(f" β
USABILITY: EXCELLENT β meets ISO 25010 standard")
|
| elif pct >= 75:
|
| print(f" β οΈ USABILITY: ACCEPTABLE β minor issues found")
|
| else:
|
| print(f" β USABILITY: NEEDS IMPROVEMENT")
|
|
|
| return passed, total
|
|
|
|
|
|
|
|
|
|
|
|
|
| def print_final_report(model_path, accuracy, bb_pass, bb_total,
|
| f1_score, rel_pass, rel_total,
|
| usa_pass, usa_total, total_time):
|
| header("FINAL TEST REPORT")
|
| print(f" Model: {model_path}")
|
| print(f" Date/Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| print(f" Duration: {total_time:.2f} seconds\n")
|
|
|
| def grade(pct):
|
| if pct >= 90: return "EXCELLENT β
"
|
| if pct >= 75: return "GOOD β
"
|
| if pct >= 60: return "ACCEPTABLE β οΈ "
|
| return "NEEDS IMPROVEMENT β"
|
|
|
| bb_pct = (bb_pass / bb_total * 100) if bb_total > 0 else 0
|
| rel_pct = (rel_pass / rel_total * 100) if rel_total > 0 else 0
|
| usa_pct = (usa_pass / usa_total * 100) if usa_total > 0 else 0
|
|
|
| print(f" {'Test':<35} {'Score':>12} {'Grade'}")
|
| print(f" {'β'*35} {'β'*12} {'β'*20}")
|
| print(f" {'1. Accuracy Testing':<35} {accuracy:>10.1f}% {grade(accuracy)}")
|
| print(f" {'2. Black Box Testing':<35} {bb_pct:>10.1f}% {grade(bb_pct)}")
|
| print(f" {'3. Confusion Matrix (F1)':<35} {f1_score*100:>10.1f}% {grade(f1_score*100)}")
|
| print(f" {'4. ISO 25010 Reliability':<35} {rel_pct:>10.1f}% {grade(rel_pct)}")
|
| print(f" {'5. ISO 25010 Usability':<35} {usa_pct:>10.1f}% {grade(usa_pct)}")
|
|
|
| overall = (accuracy + bb_pct + f1_score*100 + rel_pct + usa_pct) / 5
|
| print(f" {'β'*35} {'β'*12} {'β'*20}")
|
| print(f" {'OVERALL SYSTEM SCORE':<35} {overall:>10.1f}% {grade(overall)}")
|
|
|
| print(f"\n {'β'*60}")
|
| if overall >= 75:
|
| print(f" β
SYSTEM PASSES all testing objectives")
|
| else:
|
| print(f" β οΈ SYSTEM NEEDS IMPROVEMENT in some areas")
|
| print(f" β Add more annotated training examples")
|
| print(f" β Re-run training and evaluate again")
|
| print(f" {'β'*60}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| def main():
|
| parser = argparse.ArgumentParser(
|
| description="Civil Registry NER β Complete Test Suite"
|
| )
|
| parser.add_argument(
|
| "--model",
|
| default="./models/civil_registry_model/model-best",
|
| help="Path to spaCy model (default: trained model)"
|
| )
|
| args = parser.parse_args()
|
|
|
| print(separator("β"))
|
| print(" CIVIL REGISTRY NER β COMPLETE TEST SUITE")
|
| print(" ISO 25010 Compliance Testing")
|
| print(separator("β"))
|
| print(f"\n Model: {args.model}")
|
| print(f" Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
|
|
|
| print(" Loading model...")
|
| try:
|
| extractor = CivilRegistryNER(model_path=args.model)
|
| filler = AutoFillEngine(extractor)
|
| print(f" β
Model loaded: {args.model}\n")
|
| except Exception as e:
|
| print(f" β Could not load model: {e}")
|
| print(f" β Try: python testing/test_suite.py --model en_core_web_sm")
|
| sys.exit(1)
|
|
|
| start_time = time.time()
|
|
|
|
|
| accuracy = run_accuracy_testing(extractor, filler)
|
| bb_pass, bb_total = run_black_box_testing(extractor, filler)
|
| f1_score = run_confusion_matrix(extractor)
|
| rel_pass, rel_total = run_reliability_testing(extractor, filler)
|
| usa_pass, usa_total = run_usability_testing(extractor, filler)
|
|
|
| total_time = time.time() - start_time
|
|
|
| print_final_report(
|
| args.model, accuracy,
|
| bb_pass, bb_total,
|
| f1_score,
|
| rel_pass, rel_total,
|
| usa_pass, usa_total,
|
| total_time
|
| )
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|