File size: 16,815 Bytes
7111e1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# bridge.py
# ============================================================
# BRIDGE β€” connects the three algorithms
#
#   CRNN+CTC  (Irish)     β†’  field dict from field_extractor.py
#   MNB       (Princess)  β†’  classifies form type
#   spacyNER  (Shane)     β†’  extracts + assembles fields
#
# DROP THIS FILE in the ROOT of your project:
#
#   LCR-Document-Digitization-System/
#   β”œβ”€β”€ CRNN+CTC/
#   β”œβ”€β”€ MNB/
#   β”œβ”€β”€ spacyNER/
#   β”œβ”€β”€ bridge.py          ← HERE
#   └── pipeline.py
#
# NOTE: nationality = citizenship (same field, different label per form)
#       The _get() helper handles both names automatically.
# ============================================================

import sys
import os
from pathlib import Path

# ── Make all three algorithm folders importable ──────────────
_ROOT = Path(__file__).resolve().parent

for folder in ["CRNN+CTC", "MNB", "spacyNER"]:
    p = str(_ROOT / folder)
    if p not in sys.path:
        sys.path.insert(0, p)

if str(_ROOT) not in sys.path:
    sys.path.insert(0, str(_ROOT))

# ── Imports ──────────────────────────────────────────────────
from spacyNER.extractor import CivilRegistryNER
from spacyNER.autofill  import AutoFillEngine
from MNB.classifier     import MNBClassifier

# ── Default paths ────────────────────────────────────────────
NER_MODEL_PATH = str(_ROOT / "spacyNER" / "models" / "civil_registry_model" / "model-best")
MNB_MODEL_DIR  = str(_ROOT / "MNB" / "models")


# ════════════════════════════════════════════════════════════
# HELPER β€” nationality/citizenship alias
# Tries multiple key names, returns first non-empty value.
# nationality = citizenship β€” same field, different label per form.
# ════════════════════════════════════════════════════════════

def _get(f: dict, *keys, default='') -> str:
    for k in keys:
        v = f.get(k, '')
        if v and str(v).strip():
            return str(v).strip()
    return default


# ════════════════════════════════════════════════════════════
# CRNN FIELD DICT β†’ TEXT CONVERTERS
# Turns Irish's field dict into readable text that NER can read.
# Handles both old field names and new dynamic_field_extractor names.
# ════════════════════════════════════════════════════════════

def crnn_birth_to_text(f: dict) -> str:
    """Form 102 β†’ Form 1A text.
    Fields needed:
      Registry Number, Date of Registration,
      Name of Child, Sex, Date of Birth, Place of Birth,
      Name of Mother, Nationality/Citizenship of Mother,
      Name of Father, Nationality/Citizenship of Father,
      Date of Marriage of Parents, Place of Marriage of Parents
    """
    return (
        f"Registry No.: {_get(f, 'registry_number', 'registry_no')}\n"
        f"Date of Registration: {_get(f, 'date_of_registration')}\n"
        f"1. NAME (First): {_get(f, 'child_first_name')}  "
        f"(Middle): {_get(f, 'child_middle_name')}  "
        f"(Last): {_get(f, 'child_last_name')}\n"
        f"2. SEX: {_get(f, 'sex')}\n"
        f"3. DATE OF BIRTH: {_get(f, 'dob_month')} {_get(f, 'dob_day')}, {_get(f, 'dob_year')}\n"
        f"4. PLACE OF BIRTH: {_get(f, 'place_birth_hospital')} "
        f"{_get(f, 'place_birth_city')} {_get(f, 'place_birth_province')}\n"
        f"MOTHER:\n"
        f"7. MAIDEN NAME (First): {_get(f, 'mother_first_name')}  "
        f"(Middle): {_get(f, 'mother_middle_name')}  "
        f"(Last): {_get(f, 'mother_last_name')}\n"
        f"8. CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'nationality_of_mother', 'mother_citizenship', 'mother_nationality')}\n"
        f"FATHER:\n"
        f"14. NAME (First): {_get(f, 'father_first_name')}  "
        f"(Middle): {_get(f, 'father_middle_name')}  "
        f"(Last): {_get(f, 'father_last_name')}\n"
        f"15. CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'nationality_of_father', 'father_citizenship', 'father_nationality')}\n"
        f"MARRIAGE OF PARENTS:\n"
        f"20a. DATE: {_get(f, 'parents_marriage_month')} "
        f"{_get(f, 'parents_marriage_day')}, {_get(f, 'parents_marriage_year')}\n"
        f"20b. PLACE: {_get(f, 'parents_marriage_city')} "
        f"{_get(f, 'parents_marriage_province')}\n"
    )


def crnn_death_to_text(f: dict) -> str:
    """Form 103 β†’ Form 2A text.
    Fields needed:
      Registry Number, Date of Registration,
      Name of Deceased, Sex, Age, Civil Status,
      Nationality/Citizenship, Date of Death, Place of Death,
      Cause of Death
    """
    return (
        f"Registry No.: {_get(f, 'registry_number', 'registry_no')}\n"
        f"Date of Registration: {_get(f, 'date_of_registration')}\n"
        f"1. NAME (First): {_get(f, 'deceased_first_name')}  "
        f"(Middle): {_get(f, 'deceased_middle_name')}  "
        f"(Last): {_get(f, 'deceased_last_name')}\n"
        f"2. SEX: {_get(f, 'sex')}\n"
        f"4. AGE: {_get(f, 'age', 'age_years')}\n"
        f"9. CIVIL STATUS: {_get(f, 'civil_status')}\n"
        f"7. CITIZENSHIP/NATIONALITY: {_get(f, 'nationality', 'citizenship')}\n"
        f"6. DATE OF DEATH: {_get(f, 'dod_month')} {_get(f, 'dod_day')}, {_get(f, 'dod_year')}\n"
        f"5. PLACE OF DEATH: {_get(f, 'place_death_hospital')} "
        f"{_get(f, 'place_death_city')} {_get(f, 'place_death_province')}\n"
        f"17. CAUSE OF DEATH: {_get(f, 'cause_of_death', 'cause_immediate')}\n"
        f"Antecedent cause: {_get(f, 'cause_antecedent')}\n"
        f"Underlying cause: {_get(f, 'cause_underlying')}\n"
    )


def crnn_marriage_to_text(f: dict) -> str:
    """Form 97 β†’ Form 3A text.
    Fields needed (both husband and wife):
      Name, Age, Nationality/Citizenship,
      Name of Mother, Nationality/Citizenship of Mother,
      Name of Father, Nationality/Citizenship of Father,
      Registry Number, Date of Registration,
      Date of Marriage, Place of Marriage
    """
    return (
        f"Registry No.: {_get(f, 'registry_number', 'registry_no')}\n"
        f"Date of Registration: {_get(f, 'date_of_registration')}\n"
        f"HUSBAND:\n"
        f"1. NAME (First): {_get(f, 'husband_first_name')}  "
        f"(Middle): {_get(f, 'husband_middle_name')}  "
        f"(Last): {_get(f, 'husband_last_name')}\n"
        f"2b. AGE: {_get(f, 'husband_age')}\n"
        f"4b. CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'husband_nationality', 'husband_citizenship')}\n"
        f"8. NAME OF FATHER (First): {_get(f, 'husband_father_first')}  "
        f"(Middle): {_get(f, 'husband_father_middle')}  "
        f"(Last): {_get(f, 'husband_father_last')}\n"
        f"8b. FATHER CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'husband_father_nationality', 'husband_father_citizenship')}\n"
        f"10. NAME OF MOTHER (First): {_get(f, 'husband_mother_first')}  "
        f"(Middle): {_get(f, 'husband_mother_middle')}  "
        f"(Last): {_get(f, 'husband_mother_last')}\n"
        f"10b. MOTHER CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'husband_mother_nationality', 'husband_mother_citizenship')}\n"
        f"WIFE:\n"
        f"1. NAME (First): {_get(f, 'wife_first_name')}  "
        f"(Middle): {_get(f, 'wife_middle_name')}  "
        f"(Last): {_get(f, 'wife_last_name')}\n"
        f"2b. AGE: {_get(f, 'wife_age')}\n"
        f"4b. CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'wife_nationality', 'wife_citizenship')}\n"
        f"8. NAME OF FATHER (First): {_get(f, 'wife_father_first')}  "
        f"(Middle): {_get(f, 'wife_father_middle')}  "
        f"(Last): {_get(f, 'wife_father_last')}\n"
        f"8b. FATHER CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'wife_father_nationality', 'wife_father_citizenship')}\n"
        f"10. NAME OF MOTHER (First): {_get(f, 'wife_mother_first')}  "
        f"(Middle): {_get(f, 'wife_mother_middle')}  "
        f"(Last): {_get(f, 'wife_mother_last')}\n"
        f"10b. MOTHER CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'wife_mother_nationality', 'wife_mother_citizenship')}\n"
        f"15. PLACE OF MARRIAGE: "
        f"{_get(f, 'place_marriage_city')} {_get(f, 'place_marriage_province')}\n"
        f"16. DATE OF MARRIAGE: {_get(f, 'date_marriage_month')} "
        f"{_get(f, 'date_marriage_day')}, {_get(f, 'date_marriage_year')}\n"
    )


def crnn_birth_to_form90_text(f: dict, role: str = 'groom') -> str:
    """Birth cert of groom or bride β†’ Form 90 text.
    Fields needed:
      Name, Date of Birth, Place of Birth, Sex,
      Citizenship/Nationality,
      Name of Father, Citizenship of Father,
      Name of Mother, Citizenship of Mother
    role: 'groom' or 'bride'
    """
    return (
        f"{role.upper()}:\n"
        f"1. NAME (First): {_get(f, 'first_name', 'child_first_name')}  "
        f"(Middle): {_get(f, 'middle_name', 'child_middle_name')}  "
        f"(Last): {_get(f, 'last_name', 'child_last_name')}\n"
        f"2. DATE OF BIRTH: {_get(f, 'dob_month')} {_get(f, 'dob_day')}, {_get(f, 'dob_year')}\n"
        f"3. PLACE OF BIRTH: {_get(f, 'place_birth_hospital')} "
        f"{_get(f, 'place_birth_city')} {_get(f, 'place_birth_province')}\n"
        f"4. SEX: {_get(f, 'sex')}\n"
        f"5. CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'citizenship', 'nationality', 'nationality_of_mother', 'mother_citizenship')}\n"
        f"NAME OF FATHER (First): {_get(f, 'father_first_name')}  "
        f"(Middle): {_get(f, 'father_middle_name')}  "
        f"(Last): {_get(f, 'father_last_name')}\n"
        f"FATHER CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'father_citizenship', 'father_nationality')}\n"
        f"NAME OF MOTHER (First): {_get(f, 'mother_first_name')}  "
        f"(Middle): {_get(f, 'mother_middle_name')}  "
        f"(Last): {_get(f, 'mother_last_name')}\n"
        f"MOTHER CITIZENSHIP/NATIONALITY: "
        f"{_get(f, 'mother_citizenship', 'mother_nationality')}\n"
    )


# ── Auto-detect form type from CRNN field keys ───────────────
_BIRTH_KEYS    = {'child_first_name', 'mother_first_name', 'dob_day',
                  'registry_number', 'nationality_of_mother'}
_DEATH_KEYS    = {'deceased_first_name', 'cause_of_death', 'dod_day',
                  'cause_immediate', 'nationality'}
_MARRIAGE_KEYS = {'husband_first_name', 'wife_first_name', 'date_marriage_day',
                  'husband_nationality', 'wife_nationality'}

_CONVERTERS = {
    'birth':    crnn_birth_to_text,
    'death':    crnn_death_to_text,
    'marriage': crnn_marriage_to_text,
}

def _detect_form_type(fields: dict) -> str:
    keys = set(fields.keys())
    if keys & _BIRTH_KEYS:    return 'birth'
    if keys & _DEATH_KEYS:    return 'death'
    if keys & _MARRIAGE_KEYS: return 'marriage'
    return 'birth'


# ════════════════════════════════════════════════════════════
# BRIDGE CLASS
# ════════════════════════════════════════════════════════════

class CivilRegistryBridge:
    """
    The single connection point between the three algorithms.

    Usage:
        from bridge import CivilRegistryBridge

        bridge = CivilRegistryBridge()

        # Path A β€” birth / death / marriage cert
        form = bridge.process(crnn_fields, form_hint="birth")
        print(form.to_dict())

        # Path B β€” Form 90 (two birth certs)
        form90 = bridge.process_marriage_license(
            groom_crnn_fields,
            bride_crnn_fields
        )
    """

    def __init__(self,
                 ner_model_path: str = NER_MODEL_PATH,
                 mnb_model_dir:  str = MNB_MODEL_DIR):

        # Princess's MNB classifier
        self.mnb = MNBClassifier(model_dir=mnb_model_dir)

        # Shane's NER extractor
        self.extractor = CivilRegistryNER(model_path=ner_model_path)
        self.filler    = AutoFillEngine(self.extractor)

    # ── Path A β€” single cert (birth / death / marriage) ──────
    def process(self, crnn_fields: dict, form_hint: str = None):
        """
        Parameters
        ----------
        crnn_fields : dict
            Output from Irish's run_crnn_ocr() / dynamic_field_extractor

        form_hint : str, optional
            'birth' | 'death' | 'marriage'
            Auto-detected from field keys if not given.

        Returns
        -------
        Form1A | Form2A | Form3A  with all fields populated
        """
        form_type = form_hint or _detect_form_type(crnn_fields)
        ocr_text  = _CONVERTERS.get(form_type, crnn_birth_to_text)(crnn_fields)
        mnb_label = self.mnb.classify_form_type(ocr_text)
        print(f"  [Bridge] hint={form_type!r}  MNB={mnb_label}  NER→running...")

        # Use MNB classification result to pick the correct form filler
        if mnb_label == 'form2a':
            return self.filler.fill_form_2a(ocr_text)
        elif mnb_label == 'form3a':
            return self.filler.fill_form_3a(ocr_text)
        elif mnb_label == 'form90':
            return self.filler.fill_form_90(ocr_text, ocr_text)
        else:
            return self.filler.fill_form_1a(ocr_text)

    # ── Path B β€” Form 90 (two birth certs) ───────────────────
    def process_marriage_license(self,
                                  groom_crnn_fields: dict,
                                  bride_crnn_fields:  dict):
        """
        Parameters
        ----------
        groom_crnn_fields : dict   CRNN output for groom's birth cert
        bride_crnn_fields : dict   CRNN output for bride's birth cert

        Returns
        -------
        Form90  with groom.* and bride.* fields populated
        """
        groom_text = crnn_birth_to_form90_text(groom_crnn_fields, role='groom')
        bride_text = crnn_birth_to_form90_text(bride_crnn_fields, role='bride')

        groom_sex = self.mnb.classify_sex(groom_text)
        bride_sex  = self.mnb.classify_sex(bride_text)
        print(f"  [Bridge] Form90  groom_sex={groom_sex}  bride_sex={bride_sex}")

        return self.filler.fill_form_90(groom_text, bride_text)


# ── Quick test β€” run: python bridge.py ───────────────────────
if __name__ == "__main__":

    SAMPLE_BIRTH = {
        "registry_number":           "2024-001",
        "date_of_registration":      "June 12, 1998",
        "child_first_name":          "TASLIAH",
        "child_middle_name":         "ABOBACAR",
        "child_last_name":           "GOMONSANG",
        "sex":                       "FEMALE",
        "dob_day":                   "12",
        "dob_month":                 "JUNE",
        "dob_year":                  "1998",
        "place_birth_hospital":      "CAMP JAS BLISS",
        "place_birth_city":          "MALABANG",
        "place_birth_province":      "LANAO DEL SUR",
        "mother_first_name":         "H. ASLIAH",
        "mother_middle_name":        "SANTICAN",
        "mother_last_name":          "ABOBACAR",
        "nationality_of_mother":     "FILIPINO",   # nationality = citizenship
        "father_first_name":         "H. NAEEF",
        "father_middle_name":        "MUDAG",
        "father_last_name":          "GOMONSANG",
        "nationality_of_father":     "FILIPINO",   # nationality = citizenship
        "parents_marriage_month":    "JANUARY",
        "parents_marriage_day":      "5",
        "parents_marriage_year":     "1990",
        "parents_marriage_city":     "CAMP JAS BLISS MALABANG",
        "parents_marriage_province": "LANAO DEL SUR",
    }

    print("=" * 55)
    print("  BRIDGE TEST")
    print("=" * 55)

    bridge = CivilRegistryBridge()
    form   = bridge.process(SAMPLE_BIRTH, form_hint="birth")

    print(f"\n  name_of_child  β†’ {form.name_of_child!r}")
    print(f"  name_of_mother β†’ {form.name_of_mother!r}")
    print(f"  name_of_father β†’ {form.name_of_father!r}")
    print(f"  date_of_birth  β†’ {form.date_of_birth!r}")
    print("\n  Full result:")
    for k, v in form.to_dict().items():
        if v:
            print(f"    {k:<35} {v}")