hanz245 commited on
Commit
005eabb
Β·
verified Β·
1 Parent(s): 3f26cbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -100
app.py CHANGED
@@ -17,7 +17,10 @@
17
 
18
  from flask import Flask, request, jsonify
19
  from flask_cors import CORS
20
- import os, sys, json, traceback
 
 
 
21
  from datetime import datetime
22
 
23
  # ── sys.path setup ────────────────────────────────────────────
@@ -38,9 +41,9 @@ app = Flask(__name__)
38
  CORS(app)
39
 
40
  # ── CONFIGURATION ─────────────────────────────────────────────
41
- USE_REAL_PIPELINE = False # ← set True when models are ready
42
- USE_TEMPLATE_MATCHING = True # ← uses coordinate cropping + Tesseract OCR
43
- PIPELINE_REPO_PATH = r"C:\xampp\htdocs\python"
44
  # ─────────────────────────────────────────────────────────────
45
 
46
  # ── Load template matcher ─────────────────────────────────────
@@ -92,24 +95,25 @@ def process_document():
92
  if 'file' not in request.files:
93
  return jsonify({'status': 'error', 'message': 'No file provided'}), 400
94
 
95
- file = request.files['file']
96
- file2 = request.files.get('file2') # bride file for Form 90
97
  form_hint = request.form.get('form_hint', '1A')
98
 
99
  # Map form_hint (1A/2A/3A/90) β†’ pipeline form_type (birth/death/marriage)
100
- hint_to_type = {'1A': 'birth', '2A': 'death', '3A': 'marriage', '90': 'marriage'}
 
101
  form_type = hint_to_type.get(form_hint, 'birth')
102
 
103
  # ── Save uploaded file(s) temporarily ────────────────────
104
  os.makedirs(TEMP_DIR, exist_ok=True)
105
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
106
- ext = os.path.splitext(file.filename)[1] or '.pdf'
107
  saved_path = os.path.join(TEMP_DIR, f'upload_{timestamp}{ext}')
108
  file.save(saved_path)
109
 
110
  saved_path2 = None
111
  if file2 and file2.filename:
112
- ext2 = os.path.splitext(file2.filename)[1] or '.pdf'
113
  saved_path2 = os.path.join(TEMP_DIR, f'upload_{timestamp}_bride{ext2}')
114
  file2.save(saved_path2)
115
 
@@ -135,11 +139,15 @@ def process_document():
135
  'trace': tb
136
  }), 500
137
  finally:
138
- try: os.remove(saved_path)
139
- except: pass
 
 
140
  if saved_path2:
141
- try: os.remove(saved_path2)
142
- except: pass
 
 
143
 
144
  # ── Save preview HTML ─────────────────────────────────────
145
  preview_file = f'form_{form_class}_{timestamp}.html'
@@ -201,17 +209,19 @@ def _run_real_pipeline(file_path, form_hint, form_type, file2_path=None):
201
  # place_of_marriage, husband{}, wife{}
202
  # We map these to our groom_*/bride_* DB field names.
203
  raw_groom = _pipeline.process_pdf(file_path, form_type='marriage')
204
- groom_fields, groom_conf = _map_pipeline_output_form90(raw_groom, role='groom')
 
205
 
206
  # ── Process bride page separately if provided ──────────
207
  bride_fields = {}
208
- bride_conf = {}
209
  if file2_path:
210
  raw_bride = _pipeline.process_pdf(file2_path, form_type='marriage')
211
- bride_fields, bride_conf = _map_pipeline_output_form90(raw_bride, role='bride')
 
212
 
213
  # ── Merge: groom fields take priority for shared fields ─
214
- fields = {**bride_fields, **groom_fields}
215
  confidence = {**bride_conf, **groom_conf}
216
 
217
  # ── Ensure all expected Form 90 keys exist (empty string fallback)
@@ -236,11 +246,12 @@ def _run_real_pipeline(file_path, form_hint, form_type, file2_path=None):
236
  # pipeline returns a Form object with a form_class attribute
237
  actual_class = getattr(raw_result, 'form_class', None) or form_hint
238
  # Normalise: form1aβ†’1A, form2aβ†’2A, form3aβ†’3A, form90β†’90
239
- class_map = {'form1a': '1A', 'form2a': '2A', 'form3a': '3A', 'form90': '90'}
 
240
  form_class = class_map.get(str(actual_class).lower(), form_hint)
241
 
242
  fields, confidence = _map_pipeline_output(raw_result, form_class)
243
-
244
  return fields, confidence, form_class
245
 
246
 
@@ -344,10 +355,10 @@ def _map_pipeline_output(raw: dict, form_hint: str):
344
  'husband_citizenship': raw.get('husband_citizenship') or raw.get('husband_nationality', ''),
345
  'husband_mother_first': raw.get('husband_mother_first', ''),
346
  'husband_mother_last': raw.get('husband_mother_last', ''),
347
- 'husband_mother_citizenship':raw.get('husband_mother_citizenship', ''),
348
  'husband_father_first': raw.get('husband_father_first', ''),
349
  'husband_father_last': raw.get('husband_father_last', ''),
350
- 'husband_father_citizenship':raw.get('husband_father_citizenship', ''),
351
  'wife_first': raw.get('wife_first') or raw.get('wife_name_first', ''),
352
  'wife_middle': raw.get('wife_middle') or raw.get('wife_name_middle', ''),
353
  'wife_last': raw.get('wife_last') or raw.get('wife_name_last', ''),
@@ -392,22 +403,24 @@ def _map_pipeline_output_form90(raw: dict, role: str):
392
 
393
  # ── Extract nested husband/wife dicts (may be empty) ─────
394
  husband = raw.get('husband') or {}
395
- wife = raw.get('wife') or {}
396
- if not isinstance(husband, dict): husband = {}
397
- if not isinstance(wife, dict): wife = {}
 
 
398
 
399
  # ── Parse date_of_marriage β†’ day/month/year ───────────────
400
  dom_raw = raw.get('date_of_marriage') or ''
401
  dom_parts = [p.strip() for p in str(dom_raw).split(',') if p.strip()]
402
- marriage_day = dom_parts[0] if len(dom_parts) > 0 else ''
403
  marriage_month = dom_parts[1] if len(dom_parts) > 1 else ''
404
- marriage_year = dom_parts[2] if len(dom_parts) > 2 else ''
405
 
406
  # ── Parse place_of_marriage β†’ venue / city ────────────────
407
  pom_raw = raw.get('place_of_marriage') or ''
408
  pom_parts = [p.strip() for p in str(pom_raw).split(',') if p.strip()]
409
  marriage_venue = pom_parts[0] if len(pom_parts) > 0 else ''
410
- marriage_city = pom_parts[1] if len(pom_parts) > 1 else ''
411
 
412
  # ── Shared fields (same on both pages) ───────────────────
413
  shared = {
@@ -481,9 +494,10 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
481
  if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
482
  detected = detect_form_type(img_path)
483
  source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
484
- form_hint = source_map.get(detected, '1A')
485
  source_type = detected
486
- print(f'[app.py] Auto-detected form type: {detected} β†’ output: {form_hint}')
 
487
  else:
488
  source_type = hint_to_source.get(form_hint, '102')
489
 
@@ -496,22 +510,29 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
496
  if file2_path.lower().endswith('.pdf'):
497
  img_path2 = pdf_to_image(file2_path) or file2_path
498
  raw2 = extract_fields(img_path2, '90')
499
- raw = {**raw, **raw2} # merge β€” bride fields fill any gaps
500
 
501
  # Map source field names β†’ output cert field names
502
- fields = _map_template_output(raw, form_hint)
503
  form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
504
 
 
505
  # ── MNB + spaCyNER enrichment ──────────────────────────────
506
  if _bridge is not None:
507
  try:
508
  ner_text = _raw_to_ner_text(raw, source_type)
509
 
510
- # MNB: classify form type (for logging / confidence)
511
- mnb_result = _bridge.mnb.classify_full(ner_text)
512
- print(f'[app.py] MNB: {mnb_result["label"]} ({mnb_result["confidence"]:.1%})')
 
 
 
 
 
 
513
 
514
- # spaCyNER: extract structured fields
515
  if source_type == '102':
516
  ner_form = _bridge.filler.fill_form_1a(ner_text)
517
  elif source_type == '103':
@@ -520,6 +541,7 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
520
  ner_form = _bridge.filler.fill_form_3a(ner_text)
521
  else: # 90
522
  ner_form = _bridge.filler.fill_form_90(ner_text, ner_text)
 
523
 
524
  ner_fields = _ner_to_fields(ner_form, raw, form_hint)
525
 
@@ -540,7 +562,8 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
540
 
541
  # Debug: show all mapped fields
542
  non_empty = {k: v for k, v in fields.items() if v}
543
- print(f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
 
544
  for k, v in non_empty.items():
545
  print(f' {k:<30} = {v}')
546
 
@@ -643,8 +666,8 @@ def _map_template_output(raw: dict, form_hint: str) -> dict:
643
  # husband_mother_name / husband_father_name are full names from Form 97
644
  'husband_mother_first': g('husband_mother_name'),
645
  'husband_father_first': g('husband_father_name'),
646
- 'husband_mother_citizenship':g('husband_mother_citizenship'),
647
- 'husband_father_citizenship':g('husband_father_citizenship'),
648
  'wife_first': g('wife_name_first'),
649
  'wife_middle': g('wife_name_middle'),
650
  'wife_last': g('wife_name_last'),
@@ -709,7 +732,8 @@ def _raw_to_ner_text(raw: dict, source_type: str) -> str:
709
  def g(*keys):
710
  for k in keys:
711
  v = raw.get(k, '')
712
- if v: return str(v)
 
713
  return ''
714
 
715
  if source_type == '102':
@@ -799,8 +823,8 @@ def _split_name(full: str):
799
  if not parts:
800
  return '', '', ''
801
  first = parts[0]
802
- last = parts[-1] if len(parts) > 1 else ''
803
- mid = ' '.join(parts[1:-1]) if len(parts) > 2 else ''
804
  return first, mid, last
805
 
806
 
@@ -809,7 +833,8 @@ def _ner_to_fields(form, raw: dict, form_hint: str) -> dict:
809
  def r(*keys):
810
  for k in keys:
811
  v = raw.get(k, '')
812
- if v: return v
 
813
  return ''
814
 
815
  def ga(attr, *fallback_keys):
@@ -863,8 +888,8 @@ def _ner_to_fields(form, raw: dict, form_hint: str) -> dict:
863
  }
864
 
865
  elif form_hint == '3A':
866
- h = getattr(form, 'husband', None)
867
- w = getattr(form, 'wife', None)
868
  hd = h.to_dict() if h else {}
869
  wd = w.to_dict() if w else {}
870
  return {
@@ -973,7 +998,7 @@ def _run_fake_pipeline(form_hint):
973
  'parents_marriage_month': 'June',
974
  'parents_marriage_year': '2020',
975
  'parents_marriage_city': 'Tarlac City',
976
- 'parents_marriage_province':'Tarlac',
977
  'date_submitted': 'January 15, 2026',
978
  'processed_by': 'John Doe',
979
  'verified_position': 'City Civil Registrar',
@@ -1027,10 +1052,10 @@ def _run_fake_pipeline(form_hint):
1027
  'husband_citizenship': 'Filipino',
1028
  'husband_mother_first': 'Lourdes',
1029
  'husband_mother_last': 'Bautista',
1030
- 'husband_mother_citizenship':'Filipino',
1031
  'husband_father_first': 'Ramon',
1032
  'husband_father_last': 'Bautista',
1033
- 'husband_father_citizenship':'Filipino',
1034
  'wife_first': 'Elena Grace',
1035
  'wife_middle': '',
1036
  'wife_last': 'Reyes',
@@ -1078,7 +1103,7 @@ def _run_fake_pipeline(form_hint):
1078
  }
1079
  confidence = {k: 0.95 for k in fields}
1080
 
1081
- form_class = form_hint if form_hint in ('1A','2A','3A','90') else '1A'
1082
  return fields, confidence, form_class
1083
 
1084
 
@@ -1091,61 +1116,72 @@ def _build_preview_html(form_class, fields):
1091
  return f'<tr><td class="lbl">{label}</td><td class="val">{val}</td></tr>'
1092
 
1093
  if form_class == '1A':
1094
- child = f"{fields.get('child_first','')} {fields.get('child_middle','')} {fields.get('child_last','')}".strip()
1095
- mother = f"{fields.get('mother_first','')} {fields.get('mother_last','')}".strip()
1096
- father = f"{fields.get('father_first','')} {fields.get('father_last','')}".strip()
1097
- dob = f"{fields.get('dob_month','')} {fields.get('dob_day','')}, {fields.get('dob_year','')}".strip(', ')
1098
- pob = f"{fields.get('pob_city','')}, {fields.get('pob_province','')}".strip(', ')
1099
- rows = row('Registry No', fields.get('registry_no','')) + row('Name of Child', child) + row('Sex', fields.get('sex','')) + row('Date of Birth', dob) + row('Place of Birth', pob) + row('Mother', mother) + row('Father', father)
1100
- title = f'Form 1A β€” {child}'
 
 
 
 
 
1101
  elif form_class == '2A':
1102
- deceased = f"{fields.get('deceased_first','')} {fields.get('deceased_middle','')} {fields.get('deceased_last','')}".strip()
1103
- dod = f"{fields.get('dod_month','')} {fields.get('dod_day','')}, {fields.get('dod_year','')}".strip(', ')
1104
- rows = row('Registry No', fields.get('registry_no','')) + row('Name of Deceased', deceased) + row('Date of Death', dod) + row('Cause', fields.get('cause_immediate',''))
1105
- title = f'Form 2A β€” {deceased}'
 
 
1106
  elif form_class == '3A':
1107
- h = f"{fields.get('husband_first','')} {fields.get('husband_last','')}".strip()
1108
- w = f"{fields.get('wife_first','')} {fields.get('wife_last','')}".strip()
1109
- dom = f"{fields.get('marriage_month','')} {fields.get('marriage_day','')}, {fields.get('marriage_year','')}".strip(', ')
1110
- rows = (row('Registry No', fields.get('registry_no','')) +
1111
- row('Husband', h) + row('Wife', w) +
1112
- row('Date of Marriage', dom) +
1113
- row('Place of Marriage', f"{fields.get('marriage_venue','')} {fields.get('marriage_city','')}".strip()))
 
 
1114
  title = f'Form 3A β€” {h} & {w}'
1115
  else: # Form 90 β€” Marriage License
1116
- g = f"{fields.get('groom_first','')} {fields.get('groom_middle','')} {fields.get('groom_last','')}".strip()
1117
- b = f"{fields.get('bride_first','')} {fields.get('bride_middle','')} {fields.get('bride_last','')}".strip()
1118
- dom = ' '.join(filter(None, [
1119
- fields.get('marriage_month',''),
1120
- fields.get('marriage_day',''),
1121
- fields.get('marriage_year',''),
1122
  ]))
1123
- pom = ', '.join(filter(None, [
1124
- fields.get('marriage_venue',''),
1125
- fields.get('marriage_city',''),
1126
- fields.get('marriage_province',''),
1127
  ]))
1128
- rows = (row('Registry No', fields.get('registry_no','')) +
1129
- row('License No', fields.get('license_no','')) +
1130
- row('Date of Issuance', fields.get('date_issuance','')) +
1131
- '<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">GROOM</td></tr>' +
1132
- row('Name', g) +
1133
- row('Age', fields.get('groom_age','')) +
1134
- row('Citizenship', fields.get('groom_citizenship','')) +
1135
- row('Mother', f"{fields.get('groom_mother_first','')} {fields.get('groom_mother_last','')}".strip()) +
1136
- row('Father', f"{fields.get('groom_father_first','')} {fields.get('groom_father_last','')}".strip()) +
1137
- '<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">BRIDE</td></tr>' +
1138
- row('Name', b) +
1139
- row('Age', fields.get('bride_age','')) +
1140
- row('Citizenship', fields.get('bride_citizenship','')) +
1141
- row('Mother', f"{fields.get('bride_mother_first','')} {fields.get('bride_mother_last','')}".strip()) +
1142
- row('Father', f"{fields.get('bride_father_first','')} {fields.get('bride_father_last','')}".strip()) +
1143
- '<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">MARRIAGE</td></tr>' +
1144
- row('Date of Marriage', dom) +
1145
- row('Place of Marriage', pom))
1146
- title = f'Form 90 β€” {g} & {b}' if (g or b) else 'Form 90 β€” Marriage License'
1147
-
1148
- mode = 'REAL PIPELINE' if (USE_REAL_PIPELINE and _pipeline) else 'FAKE DATA (dev mode)'
 
 
1149
  return f"""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>{title}</title>
1150
  <style>
1151
  body{{font-family:Arial,sans-serif;font-size:13px;padding:40px 50px;color:#111;}}
@@ -1157,7 +1193,7 @@ td.lbl{{width:220px;color:#555;}}
1157
  td.val{{font-weight:bold;background:#fffde7;border-bottom:1px solid #f0d000;}}
1158
  tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333;border-bottom:2px solid #ddd;}}
1159
  </style></head><body>
1160
- <h2>LCR Form No. {form_class} β€” {fields.get('city_municipality','')}</h2>
1161
  <div class="mode">Mode: {mode}</div>
1162
  <table>{rows}</table>
1163
  </body></html>"""
@@ -1165,4 +1201,4 @@ tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333
1165
 
1166
  if __name__ == '__main__':
1167
  port = int(os.environ.get('PORT', 7860))
1168
- app.run(host='0.0.0.0', port=port, debug=False)
 
17
 
18
  from flask import Flask, request, jsonify
19
  from flask_cors import CORS
20
+ import os
21
+ import sys
22
+ import json
23
+ import traceback
24
  from datetime import datetime
25
 
26
  # ── sys.path setup ────────────────────────────────────────────
 
41
  CORS(app)
42
 
43
  # ── CONFIGURATION ─────────────────────────────────────────────
44
+ USE_REAL_PIPELINE = False # ← set True when models are ready
45
+ USE_TEMPLATE_MATCHING = True # ← uses coordinate cropping + Tesseract OCR
46
+ PIPELINE_REPO_PATH = r"C:\xampp\htdocs\python"
47
  # ─────────────────────────────────────────────────────────────
48
 
49
  # ── Load template matcher ─────────────────────────────────────
 
95
  if 'file' not in request.files:
96
  return jsonify({'status': 'error', 'message': 'No file provided'}), 400
97
 
98
+ file = request.files['file']
99
+ file2 = request.files.get('file2') # bride file for Form 90
100
  form_hint = request.form.get('form_hint', '1A')
101
 
102
  # Map form_hint (1A/2A/3A/90) β†’ pipeline form_type (birth/death/marriage)
103
+ hint_to_type = {'1A': 'birth', '2A': 'death',
104
+ '3A': 'marriage', '90': 'marriage'}
105
  form_type = hint_to_type.get(form_hint, 'birth')
106
 
107
  # ── Save uploaded file(s) temporarily ────────────────────
108
  os.makedirs(TEMP_DIR, exist_ok=True)
109
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
110
+ ext = os.path.splitext(file.filename)[1] or '.pdf'
111
  saved_path = os.path.join(TEMP_DIR, f'upload_{timestamp}{ext}')
112
  file.save(saved_path)
113
 
114
  saved_path2 = None
115
  if file2 and file2.filename:
116
+ ext2 = os.path.splitext(file2.filename)[1] or '.pdf'
117
  saved_path2 = os.path.join(TEMP_DIR, f'upload_{timestamp}_bride{ext2}')
118
  file2.save(saved_path2)
119
 
 
139
  'trace': tb
140
  }), 500
141
  finally:
142
+ try:
143
+ os.remove(saved_path)
144
+ except:
145
+ pass
146
  if saved_path2:
147
+ try:
148
+ os.remove(saved_path2)
149
+ except:
150
+ pass
151
 
152
  # ── Save preview HTML ─────────────────────────────────────
153
  preview_file = f'form_{form_class}_{timestamp}.html'
 
209
  # place_of_marriage, husband{}, wife{}
210
  # We map these to our groom_*/bride_* DB field names.
211
  raw_groom = _pipeline.process_pdf(file_path, form_type='marriage')
212
+ groom_fields, groom_conf = _map_pipeline_output_form90(
213
+ raw_groom, role='groom')
214
 
215
  # ── Process bride page separately if provided ──────────
216
  bride_fields = {}
217
+ bride_conf = {}
218
  if file2_path:
219
  raw_bride = _pipeline.process_pdf(file2_path, form_type='marriage')
220
+ bride_fields, bride_conf = _map_pipeline_output_form90(
221
+ raw_bride, role='bride')
222
 
223
  # ── Merge: groom fields take priority for shared fields ─
224
+ fields = {**bride_fields, **groom_fields}
225
  confidence = {**bride_conf, **groom_conf}
226
 
227
  # ── Ensure all expected Form 90 keys exist (empty string fallback)
 
246
  # pipeline returns a Form object with a form_class attribute
247
  actual_class = getattr(raw_result, 'form_class', None) or form_hint
248
  # Normalise: form1aβ†’1A, form2aβ†’2A, form3aβ†’3A, form90β†’90
249
+ class_map = {'form1a': '1A', 'form2a': '2A',
250
+ 'form3a': '3A', 'form90': '90'}
251
  form_class = class_map.get(str(actual_class).lower(), form_hint)
252
 
253
  fields, confidence = _map_pipeline_output(raw_result, form_class)
254
+
255
  return fields, confidence, form_class
256
 
257
 
 
355
  'husband_citizenship': raw.get('husband_citizenship') or raw.get('husband_nationality', ''),
356
  'husband_mother_first': raw.get('husband_mother_first', ''),
357
  'husband_mother_last': raw.get('husband_mother_last', ''),
358
+ 'husband_mother_citizenship': raw.get('husband_mother_citizenship', ''),
359
  'husband_father_first': raw.get('husband_father_first', ''),
360
  'husband_father_last': raw.get('husband_father_last', ''),
361
+ 'husband_father_citizenship': raw.get('husband_father_citizenship', ''),
362
  'wife_first': raw.get('wife_first') or raw.get('wife_name_first', ''),
363
  'wife_middle': raw.get('wife_middle') or raw.get('wife_name_middle', ''),
364
  'wife_last': raw.get('wife_last') or raw.get('wife_name_last', ''),
 
403
 
404
  # ── Extract nested husband/wife dicts (may be empty) ─────
405
  husband = raw.get('husband') or {}
406
+ wife = raw.get('wife') or {}
407
+ if not isinstance(husband, dict):
408
+ husband = {}
409
+ if not isinstance(wife, dict):
410
+ wife = {}
411
 
412
  # ── Parse date_of_marriage β†’ day/month/year ───────────────
413
  dom_raw = raw.get('date_of_marriage') or ''
414
  dom_parts = [p.strip() for p in str(dom_raw).split(',') if p.strip()]
415
+ marriage_day = dom_parts[0] if len(dom_parts) > 0 else ''
416
  marriage_month = dom_parts[1] if len(dom_parts) > 1 else ''
417
+ marriage_year = dom_parts[2] if len(dom_parts) > 2 else ''
418
 
419
  # ── Parse place_of_marriage β†’ venue / city ────────────────
420
  pom_raw = raw.get('place_of_marriage') or ''
421
  pom_parts = [p.strip() for p in str(pom_raw).split(',') if p.strip()]
422
  marriage_venue = pom_parts[0] if len(pom_parts) > 0 else ''
423
+ marriage_city = pom_parts[1] if len(pom_parts) > 1 else ''
424
 
425
  # ── Shared fields (same on both pages) ───────────────────
426
  shared = {
 
494
  if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
495
  detected = detect_form_type(img_path)
496
  source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
497
+ form_hint = source_map.get(detected, '1A')
498
  source_type = detected
499
+ print(
500
+ f'[app.py] Auto-detected form type: {detected} β†’ output: {form_hint}')
501
  else:
502
  source_type = hint_to_source.get(form_hint, '102')
503
 
 
510
  if file2_path.lower().endswith('.pdf'):
511
  img_path2 = pdf_to_image(file2_path) or file2_path
512
  raw2 = extract_fields(img_path2, '90')
513
+ raw = {**raw, **raw2} # merge β€” bride fields fill any gaps
514
 
515
  # Map source field names β†’ output cert field names
516
+ fields = _map_template_output(raw, form_hint)
517
  form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
518
 
519
+ # ── MNB + spaCyNER enrichment ──────────────────────────────
520
  # ── MNB + spaCyNER enrichment ──────────────────────────────
521
  if _bridge is not None:
522
  try:
523
  ner_text = _raw_to_ner_text(raw, source_type)
524
 
525
+ # ── FIX: skip MNB for Form 90 β€” type is already known ──
526
+ if source_type == '90':
527
+ mnb_result = {'label': 'Form 90 - Application for Marriage License',
528
+ 'form_code': 'form90', 'confidence': 1.0}
529
+ else:
530
+ mnb_result = _bridge.mnb.classify_full(ner_text)
531
+
532
+ print(
533
+ f'[app.py] MNB: {mnb_result["label"]} ({mnb_result["confidence"]:.1%})')
534
 
535
+ # spaCyNER: existing logic unchanged
536
  if source_type == '102':
537
  ner_form = _bridge.filler.fill_form_1a(ner_text)
538
  elif source_type == '103':
 
541
  ner_form = _bridge.filler.fill_form_3a(ner_text)
542
  else: # 90
543
  ner_form = _bridge.filler.fill_form_90(ner_text, ner_text)
544
+ # ... rest unchanged
545
 
546
  ner_fields = _ner_to_fields(ner_form, raw, form_hint)
547
 
 
562
 
563
  # Debug: show all mapped fields
564
  non_empty = {k: v for k, v in fields.items() if v}
565
+ print(
566
+ f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
567
  for k, v in non_empty.items():
568
  print(f' {k:<30} = {v}')
569
 
 
666
  # husband_mother_name / husband_father_name are full names from Form 97
667
  'husband_mother_first': g('husband_mother_name'),
668
  'husband_father_first': g('husband_father_name'),
669
+ 'husband_mother_citizenship': g('husband_mother_citizenship'),
670
+ 'husband_father_citizenship': g('husband_father_citizenship'),
671
  'wife_first': g('wife_name_first'),
672
  'wife_middle': g('wife_name_middle'),
673
  'wife_last': g('wife_name_last'),
 
732
  def g(*keys):
733
  for k in keys:
734
  v = raw.get(k, '')
735
+ if v:
736
+ return str(v)
737
  return ''
738
 
739
  if source_type == '102':
 
823
  if not parts:
824
  return '', '', ''
825
  first = parts[0]
826
+ last = parts[-1] if len(parts) > 1 else ''
827
+ mid = ' '.join(parts[1:-1]) if len(parts) > 2 else ''
828
  return first, mid, last
829
 
830
 
 
833
  def r(*keys):
834
  for k in keys:
835
  v = raw.get(k, '')
836
+ if v:
837
+ return v
838
  return ''
839
 
840
  def ga(attr, *fallback_keys):
 
888
  }
889
 
890
  elif form_hint == '3A':
891
+ h = getattr(form, 'husband', None)
892
+ w = getattr(form, 'wife', None)
893
  hd = h.to_dict() if h else {}
894
  wd = w.to_dict() if w else {}
895
  return {
 
998
  'parents_marriage_month': 'June',
999
  'parents_marriage_year': '2020',
1000
  'parents_marriage_city': 'Tarlac City',
1001
+ 'parents_marriage_province': 'Tarlac',
1002
  'date_submitted': 'January 15, 2026',
1003
  'processed_by': 'John Doe',
1004
  'verified_position': 'City Civil Registrar',
 
1052
  'husband_citizenship': 'Filipino',
1053
  'husband_mother_first': 'Lourdes',
1054
  'husband_mother_last': 'Bautista',
1055
+ 'husband_mother_citizenship': 'Filipino',
1056
  'husband_father_first': 'Ramon',
1057
  'husband_father_last': 'Bautista',
1058
+ 'husband_father_citizenship': 'Filipino',
1059
  'wife_first': 'Elena Grace',
1060
  'wife_middle': '',
1061
  'wife_last': 'Reyes',
 
1103
  }
1104
  confidence = {k: 0.95 for k in fields}
1105
 
1106
+ form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
1107
  return fields, confidence, form_class
1108
 
1109
 
 
1116
  return f'<tr><td class="lbl">{label}</td><td class="val">{val}</td></tr>'
1117
 
1118
  if form_class == '1A':
1119
+ child = f"{fields.get('child_first', '')} {fields.get('child_middle', '')} {fields.get('child_last', '')}".strip()
1120
+ mother = f"{fields.get('mother_first', '')} {fields.get('mother_last', '')}".strip(
1121
+ )
1122
+ father = f"{fields.get('father_first', '')} {fields.get('father_last', '')}".strip(
1123
+ )
1124
+ dob = f"{fields.get('dob_month', '')} {fields.get('dob_day', '')}, {fields.get('dob_year', '')}".strip(
1125
+ ', ')
1126
+ pob = f"{fields.get('pob_city', '')}, {fields.get('pob_province', '')}".strip(
1127
+ ', ')
1128
+ rows = row('Registry No', fields.get('registry_no', '')) + row('Name of Child', child) + row('Sex', fields.get('sex', '')
1129
+ ) + row('Date of Birth', dob) + row('Place of Birth', pob) + row('Mother', mother) + row('Father', father)
1130
+ title = f'Form 1A β€” {child}'
1131
  elif form_class == '2A':
1132
+ deceased = f"{fields.get('deceased_first', '')} {fields.get('deceased_middle', '')} {fields.get('deceased_last', '')}".strip()
1133
+ dod = f"{fields.get('dod_month', '')} {fields.get('dod_day', '')}, {fields.get('dod_year', '')}".strip(
1134
+ ', ')
1135
+ rows = row('Registry No', fields.get('registry_no', '')) + row('Name of Deceased',
1136
+ deceased) + row('Date of Death', dod) + row('Cause', fields.get('cause_immediate', ''))
1137
+ title = f'Form 2A β€” {deceased}'
1138
  elif form_class == '3A':
1139
+ h = f"{fields.get('husband_first', '')} {fields.get('husband_last', '')}".strip(
1140
+ )
1141
+ w = f"{fields.get('wife_first', '')} {fields.get('wife_last', '')}".strip()
1142
+ dom = f"{fields.get('marriage_month', '')} {fields.get('marriage_day', '')}, {fields.get('marriage_year', '')}".strip(
1143
+ ', ')
1144
+ rows = (row('Registry No', fields.get('registry_no', '')) +
1145
+ row('Husband', h) + row('Wife', w) +
1146
+ row('Date of Marriage', dom) +
1147
+ row('Place of Marriage', f"{fields.get('marriage_venue', '')} {fields.get('marriage_city', '')}".strip()))
1148
  title = f'Form 3A β€” {h} & {w}'
1149
  else: # Form 90 β€” Marriage License
1150
+ g = f"{fields.get('groom_first', '')} {fields.get('groom_middle', '')} {fields.get('groom_last', '')}".strip()
1151
+ b = f"{fields.get('bride_first', '')} {fields.get('bride_middle', '')} {fields.get('bride_last', '')}".strip()
1152
+ dom = ' '.join(filter(None, [
1153
+ fields.get('marriage_month', ''),
1154
+ fields.get('marriage_day', ''),
1155
+ fields.get('marriage_year', ''),
1156
  ]))
1157
+ pom = ', '.join(filter(None, [
1158
+ fields.get('marriage_venue', ''),
1159
+ fields.get('marriage_city', ''),
1160
+ fields.get('marriage_province', ''),
1161
  ]))
1162
+ rows = (row('Registry No', fields.get('registry_no', '')) +
1163
+ row('License No', fields.get('license_no', '')) +
1164
+ row('Date of Issuance', fields.get('date_issuance', '')) +
1165
+ '<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">GROOM</td></tr>' +
1166
+ row('Name', g) +
1167
+ row('Age', fields.get('groom_age', '')) +
1168
+ row('Citizenship', fields.get('groom_citizenship', '')) +
1169
+ row('Mother', f"{fields.get('groom_mother_first', '')} {fields.get('groom_mother_last', '')}".strip()) +
1170
+ row('Father', f"{fields.get('groom_father_first', '')} {fields.get('groom_father_last', '')}".strip()) +
1171
+ '<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">BRIDE</td></tr>' +
1172
+ row('Name', b) +
1173
+ row('Age', fields.get('bride_age', '')) +
1174
+ row('Citizenship', fields.get('bride_citizenship', '')) +
1175
+ row('Mother', f"{fields.get('bride_mother_first', '')} {fields.get('bride_mother_last', '')}".strip()) +
1176
+ row('Father', f"{fields.get('bride_father_first', '')} {fields.get('bride_father_last', '')}".strip()) +
1177
+ '<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">MARRIAGE</td></tr>' +
1178
+ row('Date of Marriage', dom) +
1179
+ row('Place of Marriage', pom))
1180
+ title = f'Form 90 β€” {g} & {b}' if (
1181
+ g or b) else 'Form 90 β€” Marriage License'
1182
+
1183
+ mode = 'REAL PIPELINE' if (
1184
+ USE_REAL_PIPELINE and _pipeline) else 'FAKE DATA (dev mode)'
1185
  return f"""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>{title}</title>
1186
  <style>
1187
  body{{font-family:Arial,sans-serif;font-size:13px;padding:40px 50px;color:#111;}}
 
1193
  td.val{{font-weight:bold;background:#fffde7;border-bottom:1px solid #f0d000;}}
1194
  tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333;border-bottom:2px solid #ddd;}}
1195
  </style></head><body>
1196
+ <h2>LCR Form No. {form_class} β€” {fields.get('city_municipality', '')}</h2>
1197
  <div class="mode">Mode: {mode}</div>
1198
  <table>{rows}</table>
1199
  </body></html>"""
 
1201
 
1202
  if __name__ == '__main__':
1203
  port = int(os.environ.get('PORT', 7860))
1204
+ app.run(host='0.0.0.0', port=port, debug=False)