File size: 52,541 Bytes
486ed05 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 | # ============================================================
# testing/test_suite.py
# ------------------------------------------------------------
# COMPLETE TEST SUITE for Civil Registry NER System
#
# Covers ALL thesis testing requirements:
#
# 1. ACCURACY TESTING
# - Per-label accuracy (precision, recall, F1)
# - Per-form accuracy (Form 1A, 2A, 3A)
# - Overall system accuracy %
#
# 2. BLACK BOX TESTING
# - Input/output tests (no knowledge of internals)
# - Valid input tests
# - Invalid / edge case input tests
# - Boundary tests (empty, partial, garbled OCR)
#
# 3. CONFUSION MATRIX
# - Per-label true positive / false positive / false negative
# - Visual confusion matrix table
# - Per-form confusion matrix
#
# 4. ISO 25010 RELIABILITY TESTING
# - Fault tolerance (bad input, missing fields)
# - Recoverability (system doesn't crash on errors)
# - Maturity (consistent results on repeated runs)
# - Availability (model loads successfully)
#
# 5. ISO 25010 USABILITY TESTING
# - Learnability (consistent output format)
# - Operability (pipeline runs end-to-end)
# - Accessibility (output readable as dict/dataclass)
# - Error handling (clear messages on failure)
#
# How to run:
# python testing/test_suite.py
# python testing/test_suite.py --model ./models/civil_registry_model/model-best
# python testing/test_suite.py --model en_core_web_sm (baseline before training)
# ============================================================
import sys
import os
import time
import argparse
from collections import defaultdict
from pathlib import Path
from datetime import datetime
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from spacyNER.extractor import CivilRegistryNER
from spacyNER.autofill import AutoFillEngine
from spacyNER.models import Form1A, Form2A, Form3A
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# TEST DATA
# Each test case has: input text, expected labels, form type
# These simulate real CRNN+CTC OCR output from scanned forms.
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ββ Form 1A Test Cases βββββββββββββββββββββββββββββββββββββ
FORM_1A_TESTS = [
{
"id": "1A-001",
"desc": "Standard birth certificate β complete fields",
"text": (
"1. NAME (First): Juan (Middle): dela Cruz (Last): Santos\n"
"2. SEX: Male\n"
"3. DATE OF BIRTH: March 15, 1990\n"
"4. PLACE OF BIRTH: Makati City\n"
"7. MAIDEN NAME (First): Maria (Middle): Reyes (Last): dela Cruz\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Pedro (Middle): Cruz (Last): Santos\n"
"15. CITIZENSHIP: Filipino\n"
"20a. DATE: June 10, 1985\n"
"20b. PLACE: Manila"
),
"expected_labels": [
"F102_CHILD_FIRST", "F102_CHILD_MIDDLE", "F102_CHILD_LAST",
"F102_SEX", "F102_DATE_OF_BIRTH", "F102_PLACE_OF_BIRTH",
"F102_MOTHER_FIRST", "F102_MOTHER_CITIZENSHIP",
"F102_FATHER_FIRST", "F102_FATHER_CITIZENSHIP",
],
"expected_values": {
"name_of_child": "Juan dela Cruz Santos",
"sex": "Male",
"name_of_mother": "Maria Reyes dela Cruz",
"name_of_father": "Pedro Cruz Santos",
}
},
{
"id": "1A-002",
"desc": "Birth certificate β female child, twin birth",
"text": (
"1. NAME (First): Ana (Middle): Garcia (Last): Reyes\n"
"2. SEX: Female\n"
"3. DATE OF BIRTH: August 21, 1995\n"
"4. PLACE OF BIRTH: Pasig City\n"
"5a. TYPE OF BIRTH: Twin\n"
"7. MAIDEN NAME (First): Gloria (Middle): Santos (Last): Garcia\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Ramon (Middle): Cruz (Last): Reyes\n"
"15. CITIZENSHIP: Filipino"
),
"expected_labels": [
"F102_CHILD_FIRST", "F102_SEX", "F102_DATE_OF_BIRTH",
"F102_PLACE_OF_BIRTH", "F102_TYPE_OF_BIRTH",
"F102_MOTHER_FIRST", "F102_FATHER_FIRST",
],
"expected_values": {
"name_of_child": "Ana Garcia Reyes",
"sex": "Female",
"type_of_birth": "Twin",
}
},
{
"id": "1A-003",
"desc": "Birth certificate β no middle name (mother)",
"text": (
"1. NAME (First): Carlo (Middle): Santos (Last): Lim\n"
"2. SEX: Male\n"
"3. DATE OF BIRTH: December 1, 2010\n"
"4. PLACE OF BIRTH: Cebu City\n"
"7. MAIDEN NAME (First): Rosa (Middle): (Last): Santos\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Bernard (Middle): Cruz (Last): Lim\n"
"15. CITIZENSHIP: Filipino"
),
"expected_labels": [
"F102_CHILD_FIRST", "F102_SEX", "F102_DATE_OF_BIRTH",
"F102_MOTHER_FIRST", "F102_FATHER_FIRST",
],
"expected_values": {
"name_of_child": "Carlo Santos Lim",
}
},
{
"id": "1A-004",
"desc": "Birth certificate β hyphenated last name",
"text": (
"1. NAME (First): Sofia (Middle): Mendoza (Last): Santos-Cruz\n"
"2. SEX: Female\n"
"3. DATE OF BIRTH: November 30, 2005\n"
"4. PLACE OF BIRTH: Quezon City\n"
"7. MAIDEN NAME (First): Carmen (Middle): Uy (Last): Mendoza\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Roberto (Middle): Cruz (Last): Santos-Cruz\n"
"15. CITIZENSHIP: Filipino"
),
"expected_labels": [
"F102_CHILD_FIRST", "F102_CHILD_LAST", "F102_SEX",
"F102_MOTHER_FIRST", "F102_FATHER_FIRST",
],
"expected_values": {
"name_of_child": "Sofia Mendoza Santos-Cruz",
}
},
{
"id": "1A-005",
"desc": "Birth certificate β with registry number",
"text": (
"Registry No.: 2024-001\n"
"1. NAME (First): Liza (Middle): Ramos (Last): Delos Santos\n"
"2. SEX: Female\n"
"3. DATE OF BIRTH: July 7, 1988\n"
"4. PLACE OF BIRTH: Davao City\n"
"7. MAIDEN NAME (First): Perla (Middle): Aquino (Last): Ramos\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Manuel (Middle): Santos (Last): Delos Santos\n"
"15. CITIZENSHIP: Filipino"
),
"expected_labels": [
"F102_REGISTRY_NO", "F102_CHILD_FIRST", "F102_SEX",
"F102_DATE_OF_BIRTH", "F102_PLACE_OF_BIRTH",
"F102_MOTHER_FIRST", "F102_FATHER_FIRST",
],
"expected_values": {
"registry_number": "2024-001",
}
},
]
# ββ Form 2A Test Cases βββββββββββββββββββββββββββββββββββββ
FORM_2A_TESTS = [
{
"id": "2A-001",
"desc": "Death certificate β complete fields with all causes",
"text": (
"1. NAME (First): Fernando (Middle): Santos (Last): Cruz\n"
"2. SEX: Male\n"
"4. AGE: 70\n"
"5. PLACE OF DEATH: PGH Manila\n"
"6. DATE OF DEATH: March 3, 2023\n"
"7. CITIZENSHIP: Filipino\n"
"9. CIVIL STATUS: Widowed\n"
"10. OCCUPATION: Retired Teacher\n"
"Immediate cause: Renal Failure\n"
"Antecedent cause: Chronic Kidney Disease\n"
"Underlying cause: Diabetes Mellitus"
),
"expected_labels": [
"F103_DECEASED_FIRST", "F103_DECEASED_MIDDLE", "F103_DECEASED_LAST",
"F103_SEX", "F103_AGE", "F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
"F103_CITIZENSHIP", "F103_CIVIL_STATUS", "F103_OCCUPATION",
"F103_CAUSE_IMMEDIATE", "F103_CAUSE_ANTECEDENT", "F103_CAUSE_UNDERLYING",
],
"expected_values": {
"name_of_deceased": "Fernando Santos Cruz",
"age": "70",
"civil_status": "Widowed",
"cause_immediate": "Renal Failure",
}
},
{
"id": "2A-002",
"desc": "Death certificate β female, elderly, natural cause",
"text": (
"1. NAME (First): Josefa (Middle): dela Paz (Last): Gonzales\n"
"2. SEX: Female\n"
"3. RELIGION: Roman Catholic\n"
"4. AGE: 91\n"
"5. PLACE OF DEATH: Batangas City\n"
"6. DATE OF DEATH: December 31, 2021\n"
"7. CITIZENSHIP: Filipino\n"
"9. CIVIL STATUS: Widowed\n"
"Immediate cause: Old Age"
),
"expected_labels": [
"F103_DECEASED_FIRST", "F103_SEX", "F103_RELIGION",
"F103_AGE", "F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
"F103_CITIZENSHIP", "F103_CIVIL_STATUS", "F103_CAUSE_IMMEDIATE",
],
"expected_values": {
"name_of_deceased": "Josefa dela Paz Gonzales",
"religion": "Roman Catholic",
}
},
{
"id": "2A-003",
"desc": "Death certificate β with residence field",
"text": (
"1. NAME (First): Benjamin (Middle): Ocampo (Last): Velasquez\n"
"2. SEX: Male\n"
"4. AGE: 48\n"
"5. PLACE OF DEATH: Makati Medical Center\n"
"6. DATE OF DEATH: May 20, 2018\n"
"7. CITIZENSHIP: Filipino\n"
"8. RESIDENCE: 12 Ayala Avenue, Makati City\n"
"9. CIVIL STATUS: Married\n"
"10. OCCUPATION: Accountant\n"
"Immediate cause: Myocardial Infarction"
),
"expected_labels": [
"F103_DECEASED_FIRST", "F103_SEX", "F103_AGE",
"F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
"F103_CITIZENSHIP", "F103_RESIDENCE", "F103_CIVIL_STATUS",
"F103_OCCUPATION", "F103_CAUSE_IMMEDIATE",
],
"expected_values": {
"name_of_deceased": "Benjamin Ocampo Velasquez",
"occupation": "Accountant",
}
},
{
"id": "2A-004",
"desc": "Death certificate β young adult, only immediate cause",
"text": (
"1. NAME (First): Cristina (Middle): Evangelista (Last): Sy\n"
"2. SEX: Female\n"
"4. AGE: 29\n"
"5. PLACE OF DEATH: Philippine General Hospital\n"
"6. DATE OF DEATH: June 6, 2016\n"
"7. CITIZENSHIP: Filipino\n"
"9. CIVIL STATUS: Single\n"
"Immediate cause: Dengue Hemorrhagic Fever"
),
"expected_labels": [
"F103_DECEASED_FIRST", "F103_SEX", "F103_AGE",
"F103_PLACE_OF_DEATH", "F103_DATE_OF_DEATH",
"F103_CITIZENSHIP", "F103_CIVIL_STATUS", "F103_CAUSE_IMMEDIATE",
],
"expected_values": {
"name_of_deceased": "Cristina Evangelista Sy",
"age": "29",
}
},
{
"id": "2A-005",
"desc": "Death certificate β all three causes of death",
"text": (
"1. NAME (First): Ernesto (Middle): Macapagal (Last): Villafuerte\n"
"2. SEX: Male\n"
"4. AGE: 77\n"
"5. PLACE OF DEATH: Veterans Memorial Medical Center\n"
"6. DATE OF DEATH: November 11, 2017\n"
"7. CITIZENSHIP: Filipino\n"
"9. CIVIL STATUS: Married\n"
"Immediate cause: Multi-Organ Failure\n"
"Antecedent cause: Septicemia\n"
"Underlying cause: Pneumonia"
),
"expected_labels": [
"F103_DECEASED_FIRST", "F103_AGE", "F103_DATE_OF_DEATH",
"F103_CAUSE_IMMEDIATE", "F103_CAUSE_ANTECEDENT", "F103_CAUSE_UNDERLYING",
],
"expected_values": {
"cause_immediate": "Multi-Organ Failure",
"cause_antecedent": "Septicemia",
"cause_underlying": "Pneumonia",
}
},
]
# ββ Form 3A Test Cases βββββββββββββββββββββββββββββββββββββ
FORM_3A_TESTS = [
{
"id": "3A-001",
"desc": "Marriage certificate β complete husband and wife",
"text": (
"Husband (First): Jose (Middle): Cruz (Last): Ramos\n"
"Husband AGE: 28\n"
"Husband CITIZENSHIP: Filipino\n"
"Husband CIVIL STATUS: Single\n"
"Wife (First): Elena (Middle): Bautista (Last): Torres\n"
"Wife AGE: 25\n"
"Wife CITIZENSHIP: Filipino\n"
"Wife CIVIL STATUS: Single\n"
"16. DATE OF MARRIAGE: February 14, 2022\n"
"15. PLACE OF MARRIAGE: Makati City Hall"
),
"expected_labels": [
"F97_HUSBAND_FIRST", "F97_HUSBAND_MIDDLE", "F97_HUSBAND_LAST",
"F97_HUSBAND_AGE", "F97_HUSBAND_CITIZENSHIP", "F97_HUSBAND_CIVIL_STATUS",
"F97_WIFE_FIRST", "F97_WIFE_MIDDLE", "F97_WIFE_LAST",
"F97_WIFE_AGE", "F97_WIFE_CITIZENSHIP",
"F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
],
"expected_values": {
"husband_name": "Jose Cruz Ramos",
"wife_name": "Elena Bautista Torres",
"date_of_marriage": "February 14, 2022",
"place_of_marriage": "Makati City Hall",
}
},
{
"id": "3A-002",
"desc": "Marriage certificate β with parents names",
"text": (
"Husband (First): Ricardo (Middle): dela Torre (Last): Magsaysay\n"
"Husband AGE: 35\n"
"Husband CITIZENSHIP: Filipino\n"
"Husband NAME OF FATHER (First): Alfredo (Middle): Cruz (Last): Magsaysay\n"
"Husband NAME OF MOTHER (First): Florencia (Middle): dela (Last): Torre\n"
"Wife (First): Consuelo (Middle): Reyes (Last): Pascual\n"
"Wife AGE: 30\n"
"Wife CITIZENSHIP: Filipino\n"
"DATE OF MARRIAGE: October 4, 2019\n"
"PLACE OF MARRIAGE: Quezon City"
),
"expected_labels": [
"F97_HUSBAND_FIRST", "F97_HUSBAND_AGE", "F97_HUSBAND_CITIZENSHIP",
"F97_HUSBAND_FATHER_FIRST", "F97_HUSBAND_MOTHER_FIRST",
"F97_WIFE_FIRST", "F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
],
"expected_values": {
"husband_name": "Ricardo dela Torre Magsaysay",
"wife_name": "Consuelo Reyes Pascual",
}
},
{
"id": "3A-003",
"desc": "Marriage certificate β with place of birth",
"text": (
"Husband (First): Marco (Middle): Villanueva (Last): Concepcion\n"
"Husband PLACE OF BIRTH: Iloilo City\n"
"Husband AGE: 26\n"
"Husband CITIZENSHIP: Filipino\n"
"Wife (First): Patricia (Middle): Guevara (Last): Luna\n"
"Wife PLACE OF BIRTH: Cebu City\n"
"Wife AGE: 24\n"
"Wife CITIZENSHIP: Filipino\n"
"DATE OF MARRIAGE: June 21, 2023\n"
"PLACE OF MARRIAGE: Iloilo City Hall"
),
"expected_labels": [
"F97_HUSBAND_FIRST", "F97_HUSBAND_PLACE_BIRTH", "F97_HUSBAND_AGE",
"F97_WIFE_FIRST", "F97_WIFE_PLACE_BIRTH", "F97_WIFE_AGE",
"F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
],
"expected_values": {
"husband_name": "Marco Villanueva Concepcion",
"wife_name": "Patricia Guevara Luna",
}
},
{
"id": "3A-004",
"desc": "Marriage certificate β with religion",
"text": (
"HUSBAND NAME (First): Albert (Middle): Garcia (Last): Santos\n"
"HUSBAND AGE: 40\n"
"HUSBAND CITIZENSHIP: Filipino\n"
"HUSBAND RELIGION: Roman Catholic\n"
"WIFE NAME (First): Rowena (Middle): Alvarez (Last): Reyes\n"
"WIFE AGE: 36\n"
"WIFE CITIZENSHIP: Filipino\n"
"WIFE RELIGION: Roman Catholic\n"
"DATE OF MARRIAGE: March 14, 2010\n"
"PLACE OF MARRIAGE: Victory Christian Center, Pasig"
),
"expected_labels": [
"F97_HUSBAND_FIRST", "F97_HUSBAND_AGE", "F97_HUSBAND_RELIGION",
"F97_WIFE_FIRST", "F97_WIFE_AGE", "F97_WIFE_RELIGION",
"F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
],
"expected_values": {
"husband_name": "Albert Garcia Santos",
}
},
{
"id": "3A-005",
"desc": "Marriage certificate β with date of birth",
"text": (
"Husband (First): Miguel (Middle): Santos (Last): dela Cruz\n"
"Husband DATE OF BIRTH: June 15, 1990\n"
"Husband AGE: 31\n"
"Husband CITIZENSHIP: Filipino\n"
"Wife (First): Sofia (Middle): Tan (Last): Lim\n"
"Wife DATE OF BIRTH: March 20, 1993\n"
"Wife AGE: 28\n"
"Wife CITIZENSHIP: Filipino\n"
"16. DATE OF MARRIAGE: December 12, 2021\n"
"15. PLACE OF MARRIAGE: Taguig City"
),
"expected_labels": [
"F97_HUSBAND_FIRST", "F97_HUSBAND_DOB", "F97_HUSBAND_AGE",
"F97_WIFE_FIRST", "F97_WIFE_DOB", "F97_WIFE_AGE",
"F97_DATE_OF_MARRIAGE", "F97_PLACE_OF_MARRIAGE",
],
"expected_values": {
"husband_name": "Miguel Santos dela Cruz",
"wife_name": "Sofia Tan Lim",
}
},
]
# ββ Black Box Edge Case Tests ββββββββββββββββββββββββββββββ
BLACK_BOX_TESTS = [
{
"id": "BB-001",
"desc": "Empty input β should not crash",
"form": "1A",
"text": "",
"expect_crash": False,
"expect_empty": True,
},
{
"id": "BB-002",
"desc": "Whitespace only β should not crash",
"form": "1A",
"text": " \n\n\t ",
"expect_crash": False,
"expect_empty": True,
},
{
"id": "BB-003",
"desc": "Garbled OCR output β should not crash",
"form": "2A",
"text": "1. N4ME (F1rst): J@an (M1ddle): d3la Cr!z (L@st): $antos\n2. SEX: M@le",
"expect_crash": False,
"expect_empty": False,
},
{
"id": "BB-004",
"desc": "Partial form β only name fields present",
"form": "1A",
"text": "1. NAME (First): Maria (Middle): Santos (Last): Reyes",
"expect_crash": False,
"expect_empty": False,
},
{
"id": "BB-005",
"desc": "Very long OCR text β should not crash",
"form": "2A",
"text": "1. NAME (First): Carlos (Last): Cruz\n" * 50,
"expect_crash": False,
"expect_empty": False,
},
{
"id": "BB-006",
"desc": "Missing colon separators β OCR formatting issue",
"form": "1A",
"text": "NAME First Juan Middle dela Cruz Last Santos\nSEX Male\nDATE OF BIRTH March 15 1990",
"expect_crash": False,
"expect_empty": False,
},
{
"id": "BB-007",
"desc": "Numbers only β no recognizable form content",
"form": "3A",
"text": "123456789 0987654321 11111 22222 33333",
"expect_crash": False,
"expect_empty": True,
},
{
"id": "BB-008",
"desc": "Valid Form 3A input β pipeline completes",
"form": "3A",
"text": (
"Husband (First): Patrick (Middle): Sy (Last): Chua\n"
"Wife (First): Christine (Middle): Lim (Last): Go\n"
"DATE OF MARRIAGE: July 7, 2023\n"
"PLACE OF MARRIAGE: Binondo Church, Manila"
),
"expect_crash": False,
"expect_empty": False,
},
{
"id": "BB-009",
"desc": "Mixed language (Filipino/English) β common in real forms",
"form": "1A",
"text": (
"1. PANGALAN (First): Jose (Middle): dela Cruz (Last): Reyes\n"
"2. SEX: Lalaki\n"
"3. DATE OF BIRTH: Enero 5, 2000\n"
"4. PLACE OF BIRTH: Lungsod ng Maynila"
),
"expect_crash": False,
"expect_empty": False,
},
{
"id": "BB-010",
"desc": "Special characters in name β OCR artifact",
"form": "2A",
"text": (
"1. NAME (First): Fe|ipe (Middle): San+os (Last): Cr-uz\n"
"2. SEX: Male\n"
"4. AGE: 55\n"
"6. DATE OF DEATH: May 1, 2020"
),
"expect_crash": False,
"expect_empty": False,
},
]
ALL_FORM_TESTS = FORM_1A_TESTS + FORM_2A_TESTS + FORM_3A_TESTS
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# HELPER FUNCTIONS
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def separator(char="β", width=65):
return char * width
def header(title):
print(f"\n{separator()}")
print(f" {title}")
print(separator())
def subheader(title):
print(f"\n {'β' * 60}")
print(f" {title}")
print(f" {'β' * 60}")
def run_extraction(extractor, filler, form_type, text):
"""Run extraction for a given form type. Returns form object."""
if form_type == "1A":
return filler.fill_form_1a(text)
elif form_type == "2A":
return filler.fill_form_2a(text)
elif form_type == "3A":
return filler.fill_form_3a(text)
def get_extracted_labels(extractor, form_type, text):
"""Get set of extracted NER label keys from raw extraction."""
if form_type == "1A" or "F102" in str(form_type):
return extractor.extract_form_102(text)
elif form_type == "2A" or "F103" in str(form_type):
return extractor.extract_form_103(text)
elif form_type == "3A" or "F97" in str(form_type):
return extractor.extract_form_97(text)
return {}
def infer_form_type(labels):
"""Guess form type from label prefix."""
for label in labels:
if label.startswith("F102"):
return "1A"
elif label.startswith("F103"):
return "2A"
elif label.startswith("F97"):
return "3A"
return "1A"
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 1. ACCURACY TESTING
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_accuracy_testing(extractor, filler):
header("1. ACCURACY TESTING")
print(" Measures: how many expected labels were correctly extracted")
print(" Formula: Accuracy = Correct / Total Expected Γ 100%\n")
results = {
"Form 1A (Birth)": {"correct": 0, "total": 0, "tests": 0},
"Form 2A (Death)": {"correct": 0, "total": 0, "tests": 0},
"Form 3A (Marriage)": {"correct": 0, "total": 0, "tests": 0},
}
all_label_results = []
for test_set, form_name in [
(FORM_1A_TESTS, "Form 1A (Birth)"),
(FORM_2A_TESTS, "Form 2A (Death)"),
(FORM_3A_TESTS, "Form 3A (Marriage)"),
]:
subheader(f"Accuracy β {form_name}")
for test in test_set:
form_type = test["id"].split("-")[0]
data = get_extracted_labels(extractor, form_type, test["text"])
found_labels = set(data.keys())
correct = 0
total = len(test["expected_labels"])
missing = []
for label in test["expected_labels"]:
if label in found_labels:
correct += 1
else:
missing.append(label)
pct = (correct / total * 100) if total > 0 else 0
status = "β
" if pct >= 70 else ("β οΈ " if pct >= 50 else "β")
print(f" {status} [{test['id']}] {test['desc']}")
print(f" Score: {correct}/{total} ({pct:.1f}%)")
if missing:
print(f" Missing: {', '.join(missing[:3])}"
+ ("..." if len(missing) > 3 else ""))
results[form_name]["correct"] += correct
results[form_name]["total"] += total
results[form_name]["tests"] += 1
all_label_results.append(pct)
# Summary table
subheader("Accuracy Summary")
print(f" {'Form':<30} {'Correct':>8} {'Total':>7} {'Accuracy':>10}")
print(f" {'β'*30} {'β'*8} {'β'*7} {'β'*10}")
total_correct = 0
total_labels = 0
for form_name, r in results.items():
pct = (r["correct"] / r["total"] * 100) if r["total"] > 0 else 0
mark = "β
" if pct >= 70 else ("β οΈ " if pct >= 50 else "β")
print(f" {mark} {form_name:<28} {r['correct']:>8} {r['total']:>7} {pct:>9.1f}%")
total_correct += r["correct"]
total_labels += r["total"]
print(f" {'β'*30} {'β'*8} {'β'*7} {'β'*10}")
overall = (total_correct / total_labels * 100) if total_labels > 0 else 0
print(f" {'OVERALL':<30} {total_correct:>8} {total_labels:>7} {overall:>9.1f}%")
return overall
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 2. BLACK BOX TESTING
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_black_box_testing(extractor, filler):
header("2. BLACK BOX TESTING")
print(" Tests system behavior from external perspective.")
print(" No knowledge of internals β only input β output.\n")
print(" Test categories:")
print(" β’ Valid inputs (normal use)")
print(" β’ Invalid / edge case inputs (empty, garbled, partial)")
print(" β’ Boundary inputs (very long, special chars, mixed language)\n")
passed = 0
failed = 0
errors = []
for test in BLACK_BOX_TESTS:
test_passed = True
notes = []
try:
start = time.time()
# Run the full pipeline
form_obj = run_extraction(extractor, filler, test["form"], test["text"])
elapsed = time.time() - start
# Check: did it crash? (it didn't if we're here)
if test["expect_crash"]:
test_passed = False
notes.append("Expected crash but system survived")
# Check: is output empty when expected?
from spacyNER.autofill import AutoFillEngine
result = AutoFillEngine(extractor).to_dict(form_obj)
is_empty = len(result) == 0
if test["expect_empty"] and not is_empty:
# Soft warning β not a hard fail for edge cases
notes.append(f"Expected empty output but got {len(result)} fields")
if not test["expect_empty"] and is_empty and test["id"] not in ["BB-007"]:
notes.append("Expected some output but got nothing")
# Performance check β must respond within 5 seconds
if elapsed > 5.0:
test_passed = False
notes.append(f"Too slow: {elapsed:.2f}s (limit: 5s)")
status_icon = "β
" if test_passed else "β"
timing = f"{elapsed*1000:.0f}ms"
print(f" {status_icon} [{test['id']}] {test['desc']}")
print(f" Fields found: {len(result)} | Time: {timing}")
if notes:
for note in notes:
print(f" βΉοΈ {note}")
except Exception as e:
if test["expect_crash"]:
print(f" β
[{test['id']}] {test['desc']}")
print(f" Crashed as expected: {type(e).__name__}")
else:
test_passed = False
errors.append(f"[{test['id']}] {type(e).__name__}: {e}")
print(f" β [{test['id']}] {test['desc']}")
print(f" CRASH: {type(e).__name__}: {e}")
failed += 1
continue
if test_passed:
passed += 1
else:
failed += 1
subheader("Black Box Summary")
total = passed + failed
pct = (passed / total * 100) if total > 0 else 0
print(f" Passed: {passed}/{total} ({pct:.1f}%)")
if errors:
print(f" Crashes detected: {len(errors)}")
for e in errors:
print(f" β {e}")
else:
print(f" β
No crashes detected β system is stable")
return passed, total
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 3. CONFUSION MATRIX
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_confusion_matrix(extractor):
header("3. CONFUSION MATRIX")
print(" Per-label: True Positive (TP), False Positive (FP),")
print(" False Negative (FN), Precision, Recall, F1-Score\n")
# Collect TP/FP/FN for every label across all test cases
label_stats = defaultdict(lambda: {"TP": 0, "FP": 0, "FN": 0})
for test in ALL_FORM_TESTS:
form_type = test["id"].split("-")[0]
data = get_extracted_labels(extractor, form_type, test["text"])
found_labels = set(data.keys())
expected_labels = set(test["expected_labels"])
for label in expected_labels:
if label in found_labels:
label_stats[label]["TP"] += 1 # Correctly found
else:
label_stats[label]["FN"] += 1 # Missed
# False positives: found labels not in expected
for label in found_labels:
if label in expected_labels:
pass # already counted as TP
elif any(label in t["expected_labels"] for t in ALL_FORM_TESTS):
label_stats[label]["FP"] += 1 # Found but not expected here
# Print per-form confusion matrices
form_groups = [
("Form 1A (Birth Certificate)", "F102"),
("Form 2A (Death Certificate)", "F103"),
("Form 3A (Marriage Certificate)", "F97"),
]
overall_tp = overall_fp = overall_fn = 0
for form_name, prefix in form_groups:
subheader(f"Confusion Matrix β {form_name}")
form_labels = {k: v for k, v in label_stats.items() if k.startswith(prefix)}
if not form_labels:
print(" β οΈ No test results for this form yet.")
continue
print(f" {'Label':<40} {'TP':>4} {'FP':>4} {'FN':>4} {'Precision':>10} {'Recall':>8} {'F1':>8}")
print(f" {'β'*40} {'β'*4} {'β'*4} {'β'*4} {'β'*10} {'β'*8} {'β'*8}")
form_tp = form_fp = form_fn = 0
for label, stats in sorted(form_labels.items()):
tp = stats["TP"]
fp = stats["FP"]
fn = stats["FN"]
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
f1 = (2 * precision * recall / (precision + recall)
if (precision + recall) > 0 else 0.0)
perf = "β
" if f1 >= 0.7 else ("β οΈ " if f1 >= 0.5 else "β")
short_label = label.replace(prefix + "_", "")
print(f" {perf} {short_label:<38} {tp:>4} {fp:>4} {fn:>4} "
f"{precision:>9.2f} {recall:>7.2f} {f1:>7.2f}")
form_tp += tp; form_fp += fp; form_fn += fn
form_prec = form_tp / (form_tp + form_fp) if (form_tp + form_fp) > 0 else 0
form_rec = form_tp / (form_tp + form_fn) if (form_tp + form_fn) > 0 else 0
form_f1 = (2 * form_prec * form_rec / (form_prec + form_rec)
if (form_prec + form_rec) > 0 else 0)
print(f" {'β'*40} {'β'*4} {'β'*4} {'β'*4} {'β'*10} {'β'*8} {'β'*8}")
print(f" {' FORM TOTAL':<40} {form_tp:>4} {form_fp:>4} {form_fn:>4} "
f"{form_prec:>9.2f} {form_rec:>7.2f} {form_f1:>7.2f}")
overall_tp += form_tp
overall_fp += form_fp
overall_fn += form_fn
# Overall confusion matrix summary
subheader("Overall Confusion Matrix Summary")
overall_prec = overall_tp / (overall_tp + overall_fp) if (overall_tp + overall_fp) > 0 else 0
overall_rec = overall_tp / (overall_tp + overall_fn) if (overall_tp + overall_fn) > 0 else 0
overall_f1 = (2 * overall_prec * overall_rec / (overall_prec + overall_rec)
if (overall_prec + overall_rec) > 0 else 0)
print(f" {'Metric':<25} {'Value':>10}")
print(f" {'β'*25} {'β'*10}")
print(f" {'True Positives (TP)':<25} {overall_tp:>10}")
print(f" {'False Positives (FP)':<25} {overall_fp:>10}")
print(f" {'False Negatives (FN)':<25} {overall_fn:>10}")
print(f" {'Precision':<25} {overall_prec:>9.2f}")
print(f" {'Recall':<25} {overall_rec:>9.2f}")
print(f" {'F1-Score':<25} {overall_f1:>9.2f}")
return overall_f1
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 4. ISO 25010 RELIABILITY TESTING
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_reliability_testing(extractor, filler):
header("4. ISO 25010 β RELIABILITY TESTING")
print(" ISO 25010 Reliability sub-characteristics:")
print(" β’ Maturity β consistent results on repeated runs")
print(" β’ Fault Tolerance β handles bad/missing input without crashing")
print(" β’ Recoverability β recovers from error states")
print(" β’ Availability β model loads and responds correctly\n")
passed = 0
total = 0
# ββ 4.1 Availability ββββββββββββββββββββββββββββββββββ
subheader("4.1 Availability β Model Load & Response")
availability_tests = [
("Model loaded successfully", extractor is not None),
("AutoFillEngine initialized", filler is not None),
("fill_form_1a() is callable", callable(getattr(filler, "fill_form_1a", None))),
("fill_form_2a() is callable", callable(getattr(filler, "fill_form_2a", None))),
("fill_form_3a() is callable", callable(getattr(filler, "fill_form_3a", None))),
("extract_form_102() is callable", callable(getattr(extractor, "extract_form_102", None))),
("extract_form_103() is callable", callable(getattr(extractor, "extract_form_103", None))),
("extract_form_97() is callable", callable(getattr(extractor, "extract_form_97", None))),
]
for desc, condition in availability_tests:
total += 1
if condition:
passed += 1
print(f" β
{desc}")
else:
print(f" β {desc}")
# ββ 4.2 Fault Tolerance βββββββββββββββββββββββββββββββ
subheader("4.2 Fault Tolerance β Bad Input Handling")
fault_inputs = [
("Empty string", ""),
("None-like whitespace", " \n "),
("Random symbols", "@#$%^&*()_+{}|:<>?"),
("Very long input", "NAME: Juan Santos\n" * 200),
("Binary-like text", "\x00\x01\x02 NAME First Juan"),
("Only numbers", "123 456 789 000 111 222"),
("Repeated newlines", "\n\n\n\n\n"),
]
for desc, bad_input in fault_inputs:
total += 1
try:
result = filler.fill_form_1a(bad_input)
passed += 1
print(f" β
{desc} β handled gracefully")
except Exception as e:
print(f" β {desc} β CRASH: {type(e).__name__}: {e}")
# ββ 4.3 Maturity (Consistency) ββββββββββββββββββββββββ
subheader("4.3 Maturity β Consistency on Repeated Runs")
test_text = (
"1. NAME (First): Juan (Middle): dela Cruz (Last): Santos\n"
"2. SEX: Male\n"
"3. DATE OF BIRTH: March 15, 1990\n"
"4. PLACE OF BIRTH: Makati City"
)
results_across_runs = []
NUM_RUNS = 5
for i in range(NUM_RUNS):
data = extractor.extract_form_102(test_text)
results_across_runs.append(frozenset(data.keys()))
all_same = len(set(results_across_runs)) == 1
total += 1
if all_same:
passed += 1
print(f" β
{NUM_RUNS} repeated runs β identical results (consistent)")
else:
print(f" β {NUM_RUNS} repeated runs β inconsistent results")
# ββ 4.4 Recoverability ββββββββββββββββββββββββββββββββ
subheader("4.4 Recoverability β System Continues After Errors")
recovery_tests = [
("Run after empty input", ""),
("Run with valid input after error", (
"1. NAME (First): Maria (Last): Santos\n2. SEX: Female"
)),
("Run Form 2A after Form 1A error", None),
]
# Test that system continues working after errors
try:
filler.fill_form_1a("") # potential error
filler.fill_form_2a("") # should still work
form = filler.fill_form_1a( # should recover
"1. NAME (First): Test (Last): User\n2. SEX: Male"
)
total += 1
passed += 1
print(f" β
System recovers after empty input β continues processing")
except Exception as e:
total += 1
print(f" β System did not recover: {e}")
try:
for _ in range(3):
filler.fill_form_2a("GARBAGE INPUT @#$%")
filler.fill_form_2a(
"1. NAME (First): Carlos (Last): Cruz\n4. AGE: 65"
)
total += 1
passed += 1
print(f" β
System processes valid input after multiple bad inputs")
except Exception as e:
total += 1
print(f" β System failed after bad inputs: {e}")
subheader("ISO 25010 Reliability Summary")
pct = (passed / total * 100) if total > 0 else 0
print(f" Passed: {passed}/{total} ({pct:.1f}%)")
if pct >= 90:
print(f" β
RELIABILITY: EXCELLENT β meets ISO 25010 standard")
elif pct >= 75:
print(f" β οΈ RELIABILITY: ACCEPTABLE β minor issues found")
else:
print(f" β RELIABILITY: NEEDS IMPROVEMENT")
return passed, total
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 5. ISO 25010 USABILITY TESTING
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_usability_testing(extractor, filler):
header("5. ISO 25010 β USABILITY TESTING")
print(" ISO 25010 Usability sub-characteristics:")
print(" β’ Learnability β consistent, predictable output format")
print(" β’ Operability β pipeline runs end-to-end without manual steps")
print(" β’ Accessibility β output is readable and usable by calling code")
print(" β’ User error protection β handles mistakes without data corruption\n")
passed = 0
total = 0
sample_text_102 = (
"1. NAME (First): Juan (Middle): dela Cruz (Last): Santos\n"
"2. SEX: Male\n"
"3. DATE OF BIRTH: March 15, 1990\n"
"4. PLACE OF BIRTH: Makati City\n"
"7. MAIDEN NAME (First): Maria (Middle): Reyes (Last): dela Cruz\n"
"8. CITIZENSHIP: Filipino\n"
"14. NAME (First): Pedro (Middle): Cruz (Last): Santos"
)
sample_text_103 = (
"1. NAME (First): Carlos (Middle): Reyes (Last): Mendoza\n"
"2. SEX: Male\n4. AGE: 65\n"
"5. PLACE OF DEATH: Manila\n"
"6. DATE OF DEATH: January 1, 2020\n"
"Immediate cause: Heart Attack"
)
sample_text_97 = (
"Husband (First): Jose (Middle): Cruz (Last): Ramos\n"
"Wife (First): Elena (Middle): Bautista (Last): Torres\n"
"DATE OF MARRIAGE: February 14, 2022\n"
"PLACE OF MARRIAGE: Manila City Hall"
)
# ββ 5.1 Learnability ββββββββββββββββββββββββββββββββββ
subheader("5.1 Learnability β Output Format Consistency")
learn_tests = [
("Form1A has name_of_child field",
lambda: hasattr(filler.fill_form_1a(sample_text_102), "name_of_child")),
("Form1A name_of_child is string or None",
lambda: isinstance(filler.fill_form_1a(sample_text_102).name_of_child, (str, type(None)))),
("Form2A has name_of_deceased field",
lambda: hasattr(filler.fill_form_2a(sample_text_103), "name_of_deceased")),
("Form3A has husband and wife fields",
lambda: hasattr(filler.fill_form_3a(sample_text_97), "husband") and
hasattr(filler.fill_form_3a(sample_text_97), "wife")),
("to_dict() returns a dictionary",
lambda: isinstance(filler.to_dict(filler.fill_form_1a(sample_text_102)), dict)),
("Same input always gives same output type",
lambda: type(filler.fill_form_1a(sample_text_102)) == type(filler.fill_form_1a(sample_text_102))),
("Form1A output is a Form1A instance",
lambda: isinstance(filler.fill_form_1a(sample_text_102), Form1A)),
("Form2A output is a Form2A instance",
lambda: isinstance(filler.fill_form_2a(sample_text_103), Form2A)),
("Form3A output is a Form3A instance",
lambda: isinstance(filler.fill_form_3a(sample_text_97), Form3A)),
]
for desc, test_fn in learn_tests:
total += 1
try:
result = test_fn()
if result:
passed += 1
print(f" β
{desc}")
else:
print(f" β {desc}")
except Exception as e:
print(f" β {desc} β {type(e).__name__}: {e}")
# ββ 5.2 Operability βββββββββββββββββββββββββββββββββββ
subheader("5.2 Operability β End-to-End Pipeline")
operability_tests = [
("Form 1A pipeline completes (text β Form1A object)",
lambda: filler.fill_form_1a(sample_text_102) is not None),
("Form 2A pipeline completes (text β Form2A object)",
lambda: filler.fill_form_2a(sample_text_103) is not None),
("Form 3A pipeline completes (text β Form3A object)",
lambda: filler.fill_form_3a(sample_text_97) is not None),
("to_dict() converts Form1A without errors",
lambda: filler.to_dict(filler.fill_form_1a(sample_text_102)) is not None),
("to_dict() converts Form2A without errors",
lambda: filler.to_dict(filler.fill_form_2a(sample_text_103)) is not None),
("to_dict() converts Form3A without errors",
lambda: filler.to_dict(filler.fill_form_3a(sample_text_97)) is not None),
("Pipeline handles empty text without crash",
lambda: filler.fill_form_1a("") is not None),
("Pipeline handles all 3 forms in sequence",
lambda: all([
filler.fill_form_1a(sample_text_102) is not None,
filler.fill_form_2a(sample_text_103) is not None,
filler.fill_form_3a(sample_text_97) is not None,
])),
]
for desc, test_fn in operability_tests:
total += 1
try:
start = time.time()
result = test_fn()
elapsed = time.time() - start
if result:
passed += 1
print(f" β
{desc} ({elapsed*1000:.0f}ms)")
else:
print(f" β {desc}")
except Exception as e:
print(f" β {desc} β {type(e).__name__}: {e}")
# ββ 5.3 Accessibility βββββββββββββββββββββββββββββββββ
subheader("5.3 Accessibility β Output Readability")
form_1a = filler.fill_form_1a(sample_text_102)
form_2a = filler.fill_form_2a(sample_text_103)
form_3a = filler.fill_form_3a(sample_text_97)
dict_1a = filler.to_dict(form_1a)
accessibility_tests = [
("Form1A dict keys are human-readable strings",
lambda: all(isinstance(k, str) for k in dict_1a.keys())),
("Form1A dict values are strings or None",
lambda: all(isinstance(v, (str, type(None))) for v in dict_1a.values())),
("Form3A.husband is accessible as attribute",
lambda: form_3a.husband is not None),
("Form3A.wife is accessible as attribute",
lambda: form_3a.wife is not None),
("Form3A.husband.name is string or None",
lambda: isinstance(form_3a.husband.name, (str, type(None)))),
("Name fields use First Middle Last order",
lambda: (form_1a.name_of_child or "").count(" ") == 0),
("Empty form produces empty dict (no None values in dict)",
lambda: all(v is not None for v in filler.to_dict(filler.fill_form_1a("")).values())),
]
for desc, test_fn in accessibility_tests:
total += 1
try:
result = test_fn()
if result:
passed += 1
print(f" β
{desc}")
else:
print(f" β {desc}")
except Exception as e:
print(f" β {desc} β {type(e).__name__}: {e}")
# ββ 5.4 User Error Protection βββββββββββββββββββββββββ
subheader("5.4 User Error Protection β Input Mistakes")
error_protection_tests = [
("Calling wrong form type does not corrupt other forms",
lambda: (filler.fill_form_1a(sample_text_103) is not None and
filler.fill_form_1a(sample_text_102) is not None)),
("Processing bad input does not affect next call",
lambda: (filler.fill_form_1a("GARBAGE") is not None and
filler.fill_form_1a(sample_text_102) is not None)),
("Multiple calls do not accumulate state errors",
lambda: len([filler.fill_form_2a(sample_text_103) for _ in range(5)]) == 5),
]
for desc, test_fn in error_protection_tests:
total += 1
try:
result = test_fn()
if result:
passed += 1
print(f" β
{desc}")
else:
print(f" β {desc}")
except Exception as e:
print(f" β {desc} β {type(e).__name__}: {e}")
subheader("ISO 25010 Usability Summary")
pct = (passed / total * 100) if total > 0 else 0
print(f" Passed: {passed}/{total} ({pct:.1f}%)")
if pct >= 90:
print(f" β
USABILITY: EXCELLENT β meets ISO 25010 standard")
elif pct >= 75:
print(f" β οΈ USABILITY: ACCEPTABLE β minor issues found")
else:
print(f" β USABILITY: NEEDS IMPROVEMENT")
return passed, total
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# FINAL REPORT
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def print_final_report(model_path, accuracy, bb_pass, bb_total,
f1_score, rel_pass, rel_total,
usa_pass, usa_total, total_time):
header("FINAL TEST REPORT")
print(f" Model: {model_path}")
print(f" Date/Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f" Duration: {total_time:.2f} seconds\n")
def grade(pct):
if pct >= 90: return "EXCELLENT β
"
if pct >= 75: return "GOOD β
"
if pct >= 60: return "ACCEPTABLE β οΈ "
return "NEEDS IMPROVEMENT β"
bb_pct = (bb_pass / bb_total * 100) if bb_total > 0 else 0
rel_pct = (rel_pass / rel_total * 100) if rel_total > 0 else 0
usa_pct = (usa_pass / usa_total * 100) if usa_total > 0 else 0
print(f" {'Test':<35} {'Score':>12} {'Grade'}")
print(f" {'β'*35} {'β'*12} {'β'*20}")
print(f" {'1. Accuracy Testing':<35} {accuracy:>10.1f}% {grade(accuracy)}")
print(f" {'2. Black Box Testing':<35} {bb_pct:>10.1f}% {grade(bb_pct)}")
print(f" {'3. Confusion Matrix (F1)':<35} {f1_score*100:>10.1f}% {grade(f1_score*100)}")
print(f" {'4. ISO 25010 Reliability':<35} {rel_pct:>10.1f}% {grade(rel_pct)}")
print(f" {'5. ISO 25010 Usability':<35} {usa_pct:>10.1f}% {grade(usa_pct)}")
overall = (accuracy + bb_pct + f1_score*100 + rel_pct + usa_pct) / 5
print(f" {'β'*35} {'β'*12} {'β'*20}")
print(f" {'OVERALL SYSTEM SCORE':<35} {overall:>10.1f}% {grade(overall)}")
print(f"\n {'β'*60}")
if overall >= 75:
print(f" β
SYSTEM PASSES all testing objectives")
else:
print(f" β οΈ SYSTEM NEEDS IMPROVEMENT in some areas")
print(f" β Add more annotated training examples")
print(f" β Re-run training and evaluate again")
print(f" {'β'*60}")
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# MAIN
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def main():
parser = argparse.ArgumentParser(
description="Civil Registry NER β Complete Test Suite"
)
parser.add_argument(
"--model",
default="./models/civil_registry_model/model-best",
help="Path to spaCy model (default: trained model)"
)
args = parser.parse_args()
print(separator("β"))
print(" CIVIL REGISTRY NER β COMPLETE TEST SUITE")
print(" ISO 25010 Compliance Testing")
print(separator("β"))
print(f"\n Model: {args.model}")
print(f" Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
# Load model
print(" Loading model...")
try:
extractor = CivilRegistryNER(model_path=args.model)
filler = AutoFillEngine(extractor)
print(f" β
Model loaded: {args.model}\n")
except Exception as e:
print(f" β Could not load model: {e}")
print(f" β Try: python testing/test_suite.py --model en_core_web_sm")
sys.exit(1)
start_time = time.time()
# Run all 5 test sections
accuracy = run_accuracy_testing(extractor, filler)
bb_pass, bb_total = run_black_box_testing(extractor, filler)
f1_score = run_confusion_matrix(extractor)
rel_pass, rel_total = run_reliability_testing(extractor, filler)
usa_pass, usa_total = run_usability_testing(extractor, filler)
total_time = time.time() - start_time
print_final_report(
args.model, accuracy,
bb_pass, bb_total,
f1_score,
rel_pass, rel_total,
usa_pass, usa_total,
total_time
)
if __name__ == "__main__":
main()
|