Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,10 @@
|
|
| 17 |
|
| 18 |
from flask import Flask, request, jsonify
|
| 19 |
from flask_cors import CORS
|
| 20 |
-
import os
|
|
|
|
|
|
|
|
|
|
| 21 |
from datetime import datetime
|
| 22 |
|
| 23 |
# ββ sys.path setup ββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -38,9 +41,9 @@ app = Flask(__name__)
|
|
| 38 |
CORS(app)
|
| 39 |
|
| 40 |
# ββ CONFIGURATION βββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
-
USE_REAL_PIPELINE
|
| 42 |
-
USE_TEMPLATE_MATCHING
|
| 43 |
-
PIPELINE_REPO_PATH
|
| 44 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
|
| 46 |
# ββ Load template matcher βββββββββββββββββββββββββββββββββββββ
|
|
@@ -92,24 +95,25 @@ def process_document():
|
|
| 92 |
if 'file' not in request.files:
|
| 93 |
return jsonify({'status': 'error', 'message': 'No file provided'}), 400
|
| 94 |
|
| 95 |
-
file
|
| 96 |
-
file2
|
| 97 |
form_hint = request.form.get('form_hint', '1A')
|
| 98 |
|
| 99 |
# Map form_hint (1A/2A/3A/90) β pipeline form_type (birth/death/marriage)
|
| 100 |
-
hint_to_type = {'1A': 'birth', '2A': 'death',
|
|
|
|
| 101 |
form_type = hint_to_type.get(form_hint, 'birth')
|
| 102 |
|
| 103 |
# ββ Save uploaded file(s) temporarily ββββββββββββββββββββ
|
| 104 |
os.makedirs(TEMP_DIR, exist_ok=True)
|
| 105 |
-
timestamp
|
| 106 |
-
ext
|
| 107 |
saved_path = os.path.join(TEMP_DIR, f'upload_{timestamp}{ext}')
|
| 108 |
file.save(saved_path)
|
| 109 |
|
| 110 |
saved_path2 = None
|
| 111 |
if file2 and file2.filename:
|
| 112 |
-
ext2
|
| 113 |
saved_path2 = os.path.join(TEMP_DIR, f'upload_{timestamp}_bride{ext2}')
|
| 114 |
file2.save(saved_path2)
|
| 115 |
|
|
@@ -135,11 +139,15 @@ def process_document():
|
|
| 135 |
'trace': tb
|
| 136 |
}), 500
|
| 137 |
finally:
|
| 138 |
-
try:
|
| 139 |
-
|
|
|
|
|
|
|
| 140 |
if saved_path2:
|
| 141 |
-
try:
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
|
| 144 |
# ββ Save preview HTML βββββββββββββββββββββββββββββββββββββ
|
| 145 |
preview_file = f'form_{form_class}_{timestamp}.html'
|
|
@@ -201,17 +209,19 @@ def _run_real_pipeline(file_path, form_hint, form_type, file2_path=None):
|
|
| 201 |
# place_of_marriage, husband{}, wife{}
|
| 202 |
# We map these to our groom_*/bride_* DB field names.
|
| 203 |
raw_groom = _pipeline.process_pdf(file_path, form_type='marriage')
|
| 204 |
-
groom_fields, groom_conf = _map_pipeline_output_form90(
|
|
|
|
| 205 |
|
| 206 |
# ββ Process bride page separately if provided ββββββββββ
|
| 207 |
bride_fields = {}
|
| 208 |
-
bride_conf
|
| 209 |
if file2_path:
|
| 210 |
raw_bride = _pipeline.process_pdf(file2_path, form_type='marriage')
|
| 211 |
-
bride_fields, bride_conf = _map_pipeline_output_form90(
|
|
|
|
| 212 |
|
| 213 |
# ββ Merge: groom fields take priority for shared fields β
|
| 214 |
-
fields
|
| 215 |
confidence = {**bride_conf, **groom_conf}
|
| 216 |
|
| 217 |
# ββ Ensure all expected Form 90 keys exist (empty string fallback)
|
|
@@ -236,11 +246,12 @@ def _run_real_pipeline(file_path, form_hint, form_type, file2_path=None):
|
|
| 236 |
# pipeline returns a Form object with a form_class attribute
|
| 237 |
actual_class = getattr(raw_result, 'form_class', None) or form_hint
|
| 238 |
# Normalise: form1aβ1A, form2aβ2A, form3aβ3A, form90β90
|
| 239 |
-
class_map = {'form1a': '1A', 'form2a': '2A',
|
|
|
|
| 240 |
form_class = class_map.get(str(actual_class).lower(), form_hint)
|
| 241 |
|
| 242 |
fields, confidence = _map_pipeline_output(raw_result, form_class)
|
| 243 |
-
|
| 244 |
return fields, confidence, form_class
|
| 245 |
|
| 246 |
|
|
@@ -344,10 +355,10 @@ def _map_pipeline_output(raw: dict, form_hint: str):
|
|
| 344 |
'husband_citizenship': raw.get('husband_citizenship') or raw.get('husband_nationality', ''),
|
| 345 |
'husband_mother_first': raw.get('husband_mother_first', ''),
|
| 346 |
'husband_mother_last': raw.get('husband_mother_last', ''),
|
| 347 |
-
'husband_mother_citizenship':raw.get('husband_mother_citizenship', ''),
|
| 348 |
'husband_father_first': raw.get('husband_father_first', ''),
|
| 349 |
'husband_father_last': raw.get('husband_father_last', ''),
|
| 350 |
-
'husband_father_citizenship':raw.get('husband_father_citizenship', ''),
|
| 351 |
'wife_first': raw.get('wife_first') or raw.get('wife_name_first', ''),
|
| 352 |
'wife_middle': raw.get('wife_middle') or raw.get('wife_name_middle', ''),
|
| 353 |
'wife_last': raw.get('wife_last') or raw.get('wife_name_last', ''),
|
|
@@ -392,22 +403,24 @@ def _map_pipeline_output_form90(raw: dict, role: str):
|
|
| 392 |
|
| 393 |
# ββ Extract nested husband/wife dicts (may be empty) βββββ
|
| 394 |
husband = raw.get('husband') or {}
|
| 395 |
-
wife
|
| 396 |
-
if not isinstance(husband, dict):
|
| 397 |
-
|
|
|
|
|
|
|
| 398 |
|
| 399 |
# ββ Parse date_of_marriage β day/month/year βββββββββββββββ
|
| 400 |
dom_raw = raw.get('date_of_marriage') or ''
|
| 401 |
dom_parts = [p.strip() for p in str(dom_raw).split(',') if p.strip()]
|
| 402 |
-
marriage_day
|
| 403 |
marriage_month = dom_parts[1] if len(dom_parts) > 1 else ''
|
| 404 |
-
marriage_year
|
| 405 |
|
| 406 |
# ββ Parse place_of_marriage β venue / city ββββββββββββββββ
|
| 407 |
pom_raw = raw.get('place_of_marriage') or ''
|
| 408 |
pom_parts = [p.strip() for p in str(pom_raw).split(',') if p.strip()]
|
| 409 |
marriage_venue = pom_parts[0] if len(pom_parts) > 0 else ''
|
| 410 |
-
marriage_city
|
| 411 |
|
| 412 |
# ββ Shared fields (same on both pages) βββββββββββββββββββ
|
| 413 |
shared = {
|
|
@@ -481,9 +494,10 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 481 |
if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
|
| 482 |
detected = detect_form_type(img_path)
|
| 483 |
source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
|
| 484 |
-
form_hint
|
| 485 |
source_type = detected
|
| 486 |
-
print(
|
|
|
|
| 487 |
else:
|
| 488 |
source_type = hint_to_source.get(form_hint, '102')
|
| 489 |
|
|
@@ -496,22 +510,29 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 496 |
if file2_path.lower().endswith('.pdf'):
|
| 497 |
img_path2 = pdf_to_image(file2_path) or file2_path
|
| 498 |
raw2 = extract_fields(img_path2, '90')
|
| 499 |
-
raw
|
| 500 |
|
| 501 |
# Map source field names β output cert field names
|
| 502 |
-
fields
|
| 503 |
form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
|
| 504 |
|
|
|
|
| 505 |
# ββ MNB + spaCyNER enrichment ββββββββββββββββββββββββββββββ
|
| 506 |
if _bridge is not None:
|
| 507 |
try:
|
| 508 |
ner_text = _raw_to_ner_text(raw, source_type)
|
| 509 |
|
| 510 |
-
#
|
| 511 |
-
|
| 512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
|
| 514 |
-
# spaCyNER:
|
| 515 |
if source_type == '102':
|
| 516 |
ner_form = _bridge.filler.fill_form_1a(ner_text)
|
| 517 |
elif source_type == '103':
|
|
@@ -520,6 +541,7 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 520 |
ner_form = _bridge.filler.fill_form_3a(ner_text)
|
| 521 |
else: # 90
|
| 522 |
ner_form = _bridge.filler.fill_form_90(ner_text, ner_text)
|
|
|
|
| 523 |
|
| 524 |
ner_fields = _ner_to_fields(ner_form, raw, form_hint)
|
| 525 |
|
|
@@ -540,7 +562,8 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 540 |
|
| 541 |
# Debug: show all mapped fields
|
| 542 |
non_empty = {k: v for k, v in fields.items() if v}
|
| 543 |
-
print(
|
|
|
|
| 544 |
for k, v in non_empty.items():
|
| 545 |
print(f' {k:<30} = {v}')
|
| 546 |
|
|
@@ -643,8 +666,8 @@ def _map_template_output(raw: dict, form_hint: str) -> dict:
|
|
| 643 |
# husband_mother_name / husband_father_name are full names from Form 97
|
| 644 |
'husband_mother_first': g('husband_mother_name'),
|
| 645 |
'husband_father_first': g('husband_father_name'),
|
| 646 |
-
'husband_mother_citizenship':g('husband_mother_citizenship'),
|
| 647 |
-
'husband_father_citizenship':g('husband_father_citizenship'),
|
| 648 |
'wife_first': g('wife_name_first'),
|
| 649 |
'wife_middle': g('wife_name_middle'),
|
| 650 |
'wife_last': g('wife_name_last'),
|
|
@@ -709,7 +732,8 @@ def _raw_to_ner_text(raw: dict, source_type: str) -> str:
|
|
| 709 |
def g(*keys):
|
| 710 |
for k in keys:
|
| 711 |
v = raw.get(k, '')
|
| 712 |
-
if v:
|
|
|
|
| 713 |
return ''
|
| 714 |
|
| 715 |
if source_type == '102':
|
|
@@ -799,8 +823,8 @@ def _split_name(full: str):
|
|
| 799 |
if not parts:
|
| 800 |
return '', '', ''
|
| 801 |
first = parts[0]
|
| 802 |
-
last
|
| 803 |
-
mid
|
| 804 |
return first, mid, last
|
| 805 |
|
| 806 |
|
|
@@ -809,7 +833,8 @@ def _ner_to_fields(form, raw: dict, form_hint: str) -> dict:
|
|
| 809 |
def r(*keys):
|
| 810 |
for k in keys:
|
| 811 |
v = raw.get(k, '')
|
| 812 |
-
if v:
|
|
|
|
| 813 |
return ''
|
| 814 |
|
| 815 |
def ga(attr, *fallback_keys):
|
|
@@ -863,8 +888,8 @@ def _ner_to_fields(form, raw: dict, form_hint: str) -> dict:
|
|
| 863 |
}
|
| 864 |
|
| 865 |
elif form_hint == '3A':
|
| 866 |
-
h
|
| 867 |
-
w
|
| 868 |
hd = h.to_dict() if h else {}
|
| 869 |
wd = w.to_dict() if w else {}
|
| 870 |
return {
|
|
@@ -973,7 +998,7 @@ def _run_fake_pipeline(form_hint):
|
|
| 973 |
'parents_marriage_month': 'June',
|
| 974 |
'parents_marriage_year': '2020',
|
| 975 |
'parents_marriage_city': 'Tarlac City',
|
| 976 |
-
'parents_marriage_province':'Tarlac',
|
| 977 |
'date_submitted': 'January 15, 2026',
|
| 978 |
'processed_by': 'John Doe',
|
| 979 |
'verified_position': 'City Civil Registrar',
|
|
@@ -1027,10 +1052,10 @@ def _run_fake_pipeline(form_hint):
|
|
| 1027 |
'husband_citizenship': 'Filipino',
|
| 1028 |
'husband_mother_first': 'Lourdes',
|
| 1029 |
'husband_mother_last': 'Bautista',
|
| 1030 |
-
'husband_mother_citizenship':'Filipino',
|
| 1031 |
'husband_father_first': 'Ramon',
|
| 1032 |
'husband_father_last': 'Bautista',
|
| 1033 |
-
'husband_father_citizenship':'Filipino',
|
| 1034 |
'wife_first': 'Elena Grace',
|
| 1035 |
'wife_middle': '',
|
| 1036 |
'wife_last': 'Reyes',
|
|
@@ -1078,7 +1103,7 @@ def _run_fake_pipeline(form_hint):
|
|
| 1078 |
}
|
| 1079 |
confidence = {k: 0.95 for k in fields}
|
| 1080 |
|
| 1081 |
-
form_class = form_hint if form_hint in ('1A','2A','3A','90') else '1A'
|
| 1082 |
return fields, confidence, form_class
|
| 1083 |
|
| 1084 |
|
|
@@ -1091,61 +1116,72 @@ def _build_preview_html(form_class, fields):
|
|
| 1091 |
return f'<tr><td class="lbl">{label}</td><td class="val">{val}</td></tr>'
|
| 1092 |
|
| 1093 |
if form_class == '1A':
|
| 1094 |
-
child
|
| 1095 |
-
mother = f"{fields.get('mother_first','')} {fields.get('mother_last','')}".strip(
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1101 |
elif form_class == '2A':
|
| 1102 |
-
deceased = f"{fields.get('deceased_first','')} {fields.get('deceased_middle','')} {fields.get('deceased_last','')}".strip()
|
| 1103 |
-
dod
|
| 1104 |
-
|
| 1105 |
-
|
|
|
|
|
|
|
| 1106 |
elif form_class == '3A':
|
| 1107 |
-
h
|
| 1108 |
-
|
| 1109 |
-
|
| 1110 |
-
|
| 1111 |
-
|
| 1112 |
-
|
| 1113 |
-
|
|
|
|
|
|
|
| 1114 |
title = f'Form 3A β {h} & {w}'
|
| 1115 |
else: # Form 90 β Marriage License
|
| 1116 |
-
g
|
| 1117 |
-
b
|
| 1118 |
-
dom
|
| 1119 |
-
fields.get('marriage_month',''),
|
| 1120 |
-
fields.get('marriage_day',''),
|
| 1121 |
-
fields.get('marriage_year',''),
|
| 1122 |
]))
|
| 1123 |
-
pom
|
| 1124 |
-
fields.get('marriage_venue',''),
|
| 1125 |
-
fields.get('marriage_city',''),
|
| 1126 |
-
fields.get('marriage_province',''),
|
| 1127 |
]))
|
| 1128 |
-
rows
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
title = f'Form 90 β {g} & {b}' if (
|
| 1147 |
-
|
| 1148 |
-
|
|
|
|
|
|
|
| 1149 |
return f"""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>{title}</title>
|
| 1150 |
<style>
|
| 1151 |
body{{font-family:Arial,sans-serif;font-size:13px;padding:40px 50px;color:#111;}}
|
|
@@ -1157,7 +1193,7 @@ td.lbl{{width:220px;color:#555;}}
|
|
| 1157 |
td.val{{font-weight:bold;background:#fffde7;border-bottom:1px solid #f0d000;}}
|
| 1158 |
tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333;border-bottom:2px solid #ddd;}}
|
| 1159 |
</style></head><body>
|
| 1160 |
-
<h2>LCR Form No. {form_class} β {fields.get('city_municipality','')}</h2>
|
| 1161 |
<div class="mode">Mode: {mode}</div>
|
| 1162 |
<table>{rows}</table>
|
| 1163 |
</body></html>"""
|
|
@@ -1165,4 +1201,4 @@ tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333
|
|
| 1165 |
|
| 1166 |
if __name__ == '__main__':
|
| 1167 |
port = int(os.environ.get('PORT', 7860))
|
| 1168 |
-
app.run(host='0.0.0.0', port=port, debug=False)
|
|
|
|
| 17 |
|
| 18 |
from flask import Flask, request, jsonify
|
| 19 |
from flask_cors import CORS
|
| 20 |
+
import os
|
| 21 |
+
import sys
|
| 22 |
+
import json
|
| 23 |
+
import traceback
|
| 24 |
from datetime import datetime
|
| 25 |
|
| 26 |
# ββ sys.path setup ββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 41 |
CORS(app)
|
| 42 |
|
| 43 |
# ββ CONFIGURATION βββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
USE_REAL_PIPELINE = False # β set True when models are ready
|
| 45 |
+
USE_TEMPLATE_MATCHING = True # β uses coordinate cropping + Tesseract OCR
|
| 46 |
+
PIPELINE_REPO_PATH = r"C:\xampp\htdocs\python"
|
| 47 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
|
| 49 |
# ββ Load template matcher βββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 95 |
if 'file' not in request.files:
|
| 96 |
return jsonify({'status': 'error', 'message': 'No file provided'}), 400
|
| 97 |
|
| 98 |
+
file = request.files['file']
|
| 99 |
+
file2 = request.files.get('file2') # bride file for Form 90
|
| 100 |
form_hint = request.form.get('form_hint', '1A')
|
| 101 |
|
| 102 |
# Map form_hint (1A/2A/3A/90) β pipeline form_type (birth/death/marriage)
|
| 103 |
+
hint_to_type = {'1A': 'birth', '2A': 'death',
|
| 104 |
+
'3A': 'marriage', '90': 'marriage'}
|
| 105 |
form_type = hint_to_type.get(form_hint, 'birth')
|
| 106 |
|
| 107 |
# ββ Save uploaded file(s) temporarily ββββββββββββββββββββ
|
| 108 |
os.makedirs(TEMP_DIR, exist_ok=True)
|
| 109 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 110 |
+
ext = os.path.splitext(file.filename)[1] or '.pdf'
|
| 111 |
saved_path = os.path.join(TEMP_DIR, f'upload_{timestamp}{ext}')
|
| 112 |
file.save(saved_path)
|
| 113 |
|
| 114 |
saved_path2 = None
|
| 115 |
if file2 and file2.filename:
|
| 116 |
+
ext2 = os.path.splitext(file2.filename)[1] or '.pdf'
|
| 117 |
saved_path2 = os.path.join(TEMP_DIR, f'upload_{timestamp}_bride{ext2}')
|
| 118 |
file2.save(saved_path2)
|
| 119 |
|
|
|
|
| 139 |
'trace': tb
|
| 140 |
}), 500
|
| 141 |
finally:
|
| 142 |
+
try:
|
| 143 |
+
os.remove(saved_path)
|
| 144 |
+
except:
|
| 145 |
+
pass
|
| 146 |
if saved_path2:
|
| 147 |
+
try:
|
| 148 |
+
os.remove(saved_path2)
|
| 149 |
+
except:
|
| 150 |
+
pass
|
| 151 |
|
| 152 |
# ββ Save preview HTML βββββββββββββββββββββββββββββββββββββ
|
| 153 |
preview_file = f'form_{form_class}_{timestamp}.html'
|
|
|
|
| 209 |
# place_of_marriage, husband{}, wife{}
|
| 210 |
# We map these to our groom_*/bride_* DB field names.
|
| 211 |
raw_groom = _pipeline.process_pdf(file_path, form_type='marriage')
|
| 212 |
+
groom_fields, groom_conf = _map_pipeline_output_form90(
|
| 213 |
+
raw_groom, role='groom')
|
| 214 |
|
| 215 |
# ββ Process bride page separately if provided ββββββββββ
|
| 216 |
bride_fields = {}
|
| 217 |
+
bride_conf = {}
|
| 218 |
if file2_path:
|
| 219 |
raw_bride = _pipeline.process_pdf(file2_path, form_type='marriage')
|
| 220 |
+
bride_fields, bride_conf = _map_pipeline_output_form90(
|
| 221 |
+
raw_bride, role='bride')
|
| 222 |
|
| 223 |
# ββ Merge: groom fields take priority for shared fields β
|
| 224 |
+
fields = {**bride_fields, **groom_fields}
|
| 225 |
confidence = {**bride_conf, **groom_conf}
|
| 226 |
|
| 227 |
# ββ Ensure all expected Form 90 keys exist (empty string fallback)
|
|
|
|
| 246 |
# pipeline returns a Form object with a form_class attribute
|
| 247 |
actual_class = getattr(raw_result, 'form_class', None) or form_hint
|
| 248 |
# Normalise: form1aβ1A, form2aβ2A, form3aβ3A, form90β90
|
| 249 |
+
class_map = {'form1a': '1A', 'form2a': '2A',
|
| 250 |
+
'form3a': '3A', 'form90': '90'}
|
| 251 |
form_class = class_map.get(str(actual_class).lower(), form_hint)
|
| 252 |
|
| 253 |
fields, confidence = _map_pipeline_output(raw_result, form_class)
|
| 254 |
+
|
| 255 |
return fields, confidence, form_class
|
| 256 |
|
| 257 |
|
|
|
|
| 355 |
'husband_citizenship': raw.get('husband_citizenship') or raw.get('husband_nationality', ''),
|
| 356 |
'husband_mother_first': raw.get('husband_mother_first', ''),
|
| 357 |
'husband_mother_last': raw.get('husband_mother_last', ''),
|
| 358 |
+
'husband_mother_citizenship': raw.get('husband_mother_citizenship', ''),
|
| 359 |
'husband_father_first': raw.get('husband_father_first', ''),
|
| 360 |
'husband_father_last': raw.get('husband_father_last', ''),
|
| 361 |
+
'husband_father_citizenship': raw.get('husband_father_citizenship', ''),
|
| 362 |
'wife_first': raw.get('wife_first') or raw.get('wife_name_first', ''),
|
| 363 |
'wife_middle': raw.get('wife_middle') or raw.get('wife_name_middle', ''),
|
| 364 |
'wife_last': raw.get('wife_last') or raw.get('wife_name_last', ''),
|
|
|
|
| 403 |
|
| 404 |
# ββ Extract nested husband/wife dicts (may be empty) βββββ
|
| 405 |
husband = raw.get('husband') or {}
|
| 406 |
+
wife = raw.get('wife') or {}
|
| 407 |
+
if not isinstance(husband, dict):
|
| 408 |
+
husband = {}
|
| 409 |
+
if not isinstance(wife, dict):
|
| 410 |
+
wife = {}
|
| 411 |
|
| 412 |
# ββ Parse date_of_marriage β day/month/year βββββββββββββββ
|
| 413 |
dom_raw = raw.get('date_of_marriage') or ''
|
| 414 |
dom_parts = [p.strip() for p in str(dom_raw).split(',') if p.strip()]
|
| 415 |
+
marriage_day = dom_parts[0] if len(dom_parts) > 0 else ''
|
| 416 |
marriage_month = dom_parts[1] if len(dom_parts) > 1 else ''
|
| 417 |
+
marriage_year = dom_parts[2] if len(dom_parts) > 2 else ''
|
| 418 |
|
| 419 |
# ββ Parse place_of_marriage β venue / city ββββββββββββββββ
|
| 420 |
pom_raw = raw.get('place_of_marriage') or ''
|
| 421 |
pom_parts = [p.strip() for p in str(pom_raw).split(',') if p.strip()]
|
| 422 |
marriage_venue = pom_parts[0] if len(pom_parts) > 0 else ''
|
| 423 |
+
marriage_city = pom_parts[1] if len(pom_parts) > 1 else ''
|
| 424 |
|
| 425 |
# ββ Shared fields (same on both pages) βββββββββββββββββββ
|
| 426 |
shared = {
|
|
|
|
| 494 |
if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
|
| 495 |
detected = detect_form_type(img_path)
|
| 496 |
source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
|
| 497 |
+
form_hint = source_map.get(detected, '1A')
|
| 498 |
source_type = detected
|
| 499 |
+
print(
|
| 500 |
+
f'[app.py] Auto-detected form type: {detected} β output: {form_hint}')
|
| 501 |
else:
|
| 502 |
source_type = hint_to_source.get(form_hint, '102')
|
| 503 |
|
|
|
|
| 510 |
if file2_path.lower().endswith('.pdf'):
|
| 511 |
img_path2 = pdf_to_image(file2_path) or file2_path
|
| 512 |
raw2 = extract_fields(img_path2, '90')
|
| 513 |
+
raw = {**raw, **raw2} # merge β bride fields fill any gaps
|
| 514 |
|
| 515 |
# Map source field names β output cert field names
|
| 516 |
+
fields = _map_template_output(raw, form_hint)
|
| 517 |
form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
|
| 518 |
|
| 519 |
+
# ββ MNB + spaCyNER enrichment ββββββββββββββββββββββββββββββ
|
| 520 |
# ββ MNB + spaCyNER enrichment ββββββββββββββββββββββββββββββ
|
| 521 |
if _bridge is not None:
|
| 522 |
try:
|
| 523 |
ner_text = _raw_to_ner_text(raw, source_type)
|
| 524 |
|
| 525 |
+
# ββ FIX: skip MNB for Form 90 β type is already known ββ
|
| 526 |
+
if source_type == '90':
|
| 527 |
+
mnb_result = {'label': 'Form 90 - Application for Marriage License',
|
| 528 |
+
'form_code': 'form90', 'confidence': 1.0}
|
| 529 |
+
else:
|
| 530 |
+
mnb_result = _bridge.mnb.classify_full(ner_text)
|
| 531 |
+
|
| 532 |
+
print(
|
| 533 |
+
f'[app.py] MNB: {mnb_result["label"]} ({mnb_result["confidence"]:.1%})')
|
| 534 |
|
| 535 |
+
# spaCyNER: existing logic unchanged
|
| 536 |
if source_type == '102':
|
| 537 |
ner_form = _bridge.filler.fill_form_1a(ner_text)
|
| 538 |
elif source_type == '103':
|
|
|
|
| 541 |
ner_form = _bridge.filler.fill_form_3a(ner_text)
|
| 542 |
else: # 90
|
| 543 |
ner_form = _bridge.filler.fill_form_90(ner_text, ner_text)
|
| 544 |
+
# ... rest unchanged
|
| 545 |
|
| 546 |
ner_fields = _ner_to_fields(ner_form, raw, form_hint)
|
| 547 |
|
|
|
|
| 562 |
|
| 563 |
# Debug: show all mapped fields
|
| 564 |
non_empty = {k: v for k, v in fields.items() if v}
|
| 565 |
+
print(
|
| 566 |
+
f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
|
| 567 |
for k, v in non_empty.items():
|
| 568 |
print(f' {k:<30} = {v}')
|
| 569 |
|
|
|
|
| 666 |
# husband_mother_name / husband_father_name are full names from Form 97
|
| 667 |
'husband_mother_first': g('husband_mother_name'),
|
| 668 |
'husband_father_first': g('husband_father_name'),
|
| 669 |
+
'husband_mother_citizenship': g('husband_mother_citizenship'),
|
| 670 |
+
'husband_father_citizenship': g('husband_father_citizenship'),
|
| 671 |
'wife_first': g('wife_name_first'),
|
| 672 |
'wife_middle': g('wife_name_middle'),
|
| 673 |
'wife_last': g('wife_name_last'),
|
|
|
|
| 732 |
def g(*keys):
|
| 733 |
for k in keys:
|
| 734 |
v = raw.get(k, '')
|
| 735 |
+
if v:
|
| 736 |
+
return str(v)
|
| 737 |
return ''
|
| 738 |
|
| 739 |
if source_type == '102':
|
|
|
|
| 823 |
if not parts:
|
| 824 |
return '', '', ''
|
| 825 |
first = parts[0]
|
| 826 |
+
last = parts[-1] if len(parts) > 1 else ''
|
| 827 |
+
mid = ' '.join(parts[1:-1]) if len(parts) > 2 else ''
|
| 828 |
return first, mid, last
|
| 829 |
|
| 830 |
|
|
|
|
| 833 |
def r(*keys):
|
| 834 |
for k in keys:
|
| 835 |
v = raw.get(k, '')
|
| 836 |
+
if v:
|
| 837 |
+
return v
|
| 838 |
return ''
|
| 839 |
|
| 840 |
def ga(attr, *fallback_keys):
|
|
|
|
| 888 |
}
|
| 889 |
|
| 890 |
elif form_hint == '3A':
|
| 891 |
+
h = getattr(form, 'husband', None)
|
| 892 |
+
w = getattr(form, 'wife', None)
|
| 893 |
hd = h.to_dict() if h else {}
|
| 894 |
wd = w.to_dict() if w else {}
|
| 895 |
return {
|
|
|
|
| 998 |
'parents_marriage_month': 'June',
|
| 999 |
'parents_marriage_year': '2020',
|
| 1000 |
'parents_marriage_city': 'Tarlac City',
|
| 1001 |
+
'parents_marriage_province': 'Tarlac',
|
| 1002 |
'date_submitted': 'January 15, 2026',
|
| 1003 |
'processed_by': 'John Doe',
|
| 1004 |
'verified_position': 'City Civil Registrar',
|
|
|
|
| 1052 |
'husband_citizenship': 'Filipino',
|
| 1053 |
'husband_mother_first': 'Lourdes',
|
| 1054 |
'husband_mother_last': 'Bautista',
|
| 1055 |
+
'husband_mother_citizenship': 'Filipino',
|
| 1056 |
'husband_father_first': 'Ramon',
|
| 1057 |
'husband_father_last': 'Bautista',
|
| 1058 |
+
'husband_father_citizenship': 'Filipino',
|
| 1059 |
'wife_first': 'Elena Grace',
|
| 1060 |
'wife_middle': '',
|
| 1061 |
'wife_last': 'Reyes',
|
|
|
|
| 1103 |
}
|
| 1104 |
confidence = {k: 0.95 for k in fields}
|
| 1105 |
|
| 1106 |
+
form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
|
| 1107 |
return fields, confidence, form_class
|
| 1108 |
|
| 1109 |
|
|
|
|
| 1116 |
return f'<tr><td class="lbl">{label}</td><td class="val">{val}</td></tr>'
|
| 1117 |
|
| 1118 |
if form_class == '1A':
|
| 1119 |
+
child = f"{fields.get('child_first', '')} {fields.get('child_middle', '')} {fields.get('child_last', '')}".strip()
|
| 1120 |
+
mother = f"{fields.get('mother_first', '')} {fields.get('mother_last', '')}".strip(
|
| 1121 |
+
)
|
| 1122 |
+
father = f"{fields.get('father_first', '')} {fields.get('father_last', '')}".strip(
|
| 1123 |
+
)
|
| 1124 |
+
dob = f"{fields.get('dob_month', '')} {fields.get('dob_day', '')}, {fields.get('dob_year', '')}".strip(
|
| 1125 |
+
', ')
|
| 1126 |
+
pob = f"{fields.get('pob_city', '')}, {fields.get('pob_province', '')}".strip(
|
| 1127 |
+
', ')
|
| 1128 |
+
rows = row('Registry No', fields.get('registry_no', '')) + row('Name of Child', child) + row('Sex', fields.get('sex', '')
|
| 1129 |
+
) + row('Date of Birth', dob) + row('Place of Birth', pob) + row('Mother', mother) + row('Father', father)
|
| 1130 |
+
title = f'Form 1A β {child}'
|
| 1131 |
elif form_class == '2A':
|
| 1132 |
+
deceased = f"{fields.get('deceased_first', '')} {fields.get('deceased_middle', '')} {fields.get('deceased_last', '')}".strip()
|
| 1133 |
+
dod = f"{fields.get('dod_month', '')} {fields.get('dod_day', '')}, {fields.get('dod_year', '')}".strip(
|
| 1134 |
+
', ')
|
| 1135 |
+
rows = row('Registry No', fields.get('registry_no', '')) + row('Name of Deceased',
|
| 1136 |
+
deceased) + row('Date of Death', dod) + row('Cause', fields.get('cause_immediate', ''))
|
| 1137 |
+
title = f'Form 2A β {deceased}'
|
| 1138 |
elif form_class == '3A':
|
| 1139 |
+
h = f"{fields.get('husband_first', '')} {fields.get('husband_last', '')}".strip(
|
| 1140 |
+
)
|
| 1141 |
+
w = f"{fields.get('wife_first', '')} {fields.get('wife_last', '')}".strip()
|
| 1142 |
+
dom = f"{fields.get('marriage_month', '')} {fields.get('marriage_day', '')}, {fields.get('marriage_year', '')}".strip(
|
| 1143 |
+
', ')
|
| 1144 |
+
rows = (row('Registry No', fields.get('registry_no', '')) +
|
| 1145 |
+
row('Husband', h) + row('Wife', w) +
|
| 1146 |
+
row('Date of Marriage', dom) +
|
| 1147 |
+
row('Place of Marriage', f"{fields.get('marriage_venue', '')} {fields.get('marriage_city', '')}".strip()))
|
| 1148 |
title = f'Form 3A β {h} & {w}'
|
| 1149 |
else: # Form 90 β Marriage License
|
| 1150 |
+
g = f"{fields.get('groom_first', '')} {fields.get('groom_middle', '')} {fields.get('groom_last', '')}".strip()
|
| 1151 |
+
b = f"{fields.get('bride_first', '')} {fields.get('bride_middle', '')} {fields.get('bride_last', '')}".strip()
|
| 1152 |
+
dom = ' '.join(filter(None, [
|
| 1153 |
+
fields.get('marriage_month', ''),
|
| 1154 |
+
fields.get('marriage_day', ''),
|
| 1155 |
+
fields.get('marriage_year', ''),
|
| 1156 |
]))
|
| 1157 |
+
pom = ', '.join(filter(None, [
|
| 1158 |
+
fields.get('marriage_venue', ''),
|
| 1159 |
+
fields.get('marriage_city', ''),
|
| 1160 |
+
fields.get('marriage_province', ''),
|
| 1161 |
]))
|
| 1162 |
+
rows = (row('Registry No', fields.get('registry_no', '')) +
|
| 1163 |
+
row('License No', fields.get('license_no', '')) +
|
| 1164 |
+
row('Date of Issuance', fields.get('date_issuance', '')) +
|
| 1165 |
+
'<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">GROOM</td></tr>' +
|
| 1166 |
+
row('Name', g) +
|
| 1167 |
+
row('Age', fields.get('groom_age', '')) +
|
| 1168 |
+
row('Citizenship', fields.get('groom_citizenship', '')) +
|
| 1169 |
+
row('Mother', f"{fields.get('groom_mother_first', '')} {fields.get('groom_mother_last', '')}".strip()) +
|
| 1170 |
+
row('Father', f"{fields.get('groom_father_first', '')} {fields.get('groom_father_last', '')}".strip()) +
|
| 1171 |
+
'<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">BRIDE</td></tr>' +
|
| 1172 |
+
row('Name', b) +
|
| 1173 |
+
row('Age', fields.get('bride_age', '')) +
|
| 1174 |
+
row('Citizenship', fields.get('bride_citizenship', '')) +
|
| 1175 |
+
row('Mother', f"{fields.get('bride_mother_first', '')} {fields.get('bride_mother_last', '')}".strip()) +
|
| 1176 |
+
row('Father', f"{fields.get('bride_father_first', '')} {fields.get('bride_father_last', '')}".strip()) +
|
| 1177 |
+
'<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">MARRIAGE</td></tr>' +
|
| 1178 |
+
row('Date of Marriage', dom) +
|
| 1179 |
+
row('Place of Marriage', pom))
|
| 1180 |
+
title = f'Form 90 β {g} & {b}' if (
|
| 1181 |
+
g or b) else 'Form 90 β Marriage License'
|
| 1182 |
+
|
| 1183 |
+
mode = 'REAL PIPELINE' if (
|
| 1184 |
+
USE_REAL_PIPELINE and _pipeline) else 'FAKE DATA (dev mode)'
|
| 1185 |
return f"""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>{title}</title>
|
| 1186 |
<style>
|
| 1187 |
body{{font-family:Arial,sans-serif;font-size:13px;padding:40px 50px;color:#111;}}
|
|
|
|
| 1193 |
td.val{{font-weight:bold;background:#fffde7;border-bottom:1px solid #f0d000;}}
|
| 1194 |
tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333;border-bottom:2px solid #ddd;}}
|
| 1195 |
</style></head><body>
|
| 1196 |
+
<h2>LCR Form No. {form_class} β {fields.get('city_municipality', '')}</h2>
|
| 1197 |
<div class="mode">Mode: {mode}</div>
|
| 1198 |
<table>{rows}</table>
|
| 1199 |
</body></html>"""
|
|
|
|
| 1201 |
|
| 1202 |
if __name__ == '__main__':
|
| 1203 |
port = int(os.environ.get('PORT', 7860))
|
| 1204 |
+
app.run(host='0.0.0.0', port=port, debug=False)
|