ocr / app.py
Hanz Pillerva
new up
486ed05
# app.py
# ============================================================
# Flask API β€” Civil Registry Pipeline
#
# TWO MODES (switch via USE_REAL_PIPELINE below):
#
# USE_REAL_PIPELINE = False β†’ fake data (safe, always works)
# USE_REAL_PIPELINE = True β†’ calls pipeline.py (real models)
#
# HOW TO ENABLE THE REAL PIPELINE:
# 1. Set USE_REAL_PIPELINE = True
# 2. Set PIPELINE_REPO_PATH to the absolute path of your repo
# e.g. r"C:\Users\YourName\Documents\thesis-repo"
# 3. Make sure venv has all model dependencies installed
# 4. Run: python app.py
# ============================================================
from flask import Flask, request, jsonify
from flask_cors import CORS
import os, sys, json, traceback
from datetime import datetime
# ── sys.path setup ────────────────────────────────────────────
# _BASE = python/ folder (where app.py lives)
# Keep _BASE so "from spacyNER.X" / "from MNB.X" package imports work
# Also add subfolders so direct imports work (field_extractor, etc.)
_BASE = os.path.dirname(os.path.abspath(__file__))
for _p in [
_BASE,
os.path.join(_BASE, 'CRNN+CTC'),
os.path.join(_BASE, 'MNB'),
os.path.join(_BASE, 'spacyNER'),
]:
if _p not in sys.path:
sys.path.insert(0, _p)
app = Flask(__name__)
CORS(app)
# ── CONFIGURATION ─────────────────────────────────────────────
USE_REAL_PIPELINE = False # ← set True when models are ready
USE_TEMPLATE_MATCHING = True # ← uses coordinate cropping + Tesseract OCR
PIPELINE_REPO_PATH = r"C:\xampp\htdocs\python"
# ─────────────────────────────────────────────────────────────
# ── Load template matcher ─────────────────────────────────────
try:
from template_matcher import extract_fields, pdf_to_image, detect_form_type, _get_crnn
_template_matcher_ok = True
print("[app.py] Template matcher loaded")
# Preload CRNN+CTC at startup so the first request isn't slow
print("[app.py] Preloading CRNN+CTC model...")
_get_crnn()
print("[app.py] CRNN+CTC preloaded.")
except Exception as _tm_err:
_template_matcher_ok = False
print(f"[app.py] Template matcher unavailable: {_tm_err}")
# ── Load bridge (MNB + spaCyNER) ──────────────────────────────
_bridge = None
try:
from bridge import CivilRegistryBridge
print("[app.py] Loading MNB + spaCyNER bridge...")
_bridge = CivilRegistryBridge()
print("[app.py] Bridge (MNB + spaCyNER) ready.")
except Exception as _br_err:
print(f"[app.py] Bridge unavailable (MNB/NER disabled): {_br_err}")
TEMP_DIR = os.environ.get('TEMP_DIR', os.path.join('/tmp', 'uploads', 'temp'))
# ── Load real pipeline (only if enabled) ─────────────────────
_pipeline = None
_pipeline_error = None
if USE_REAL_PIPELINE:
try:
if PIPELINE_REPO_PATH not in sys.path:
sys.path.insert(0, PIPELINE_REPO_PATH)
from pipeline import CivilRegistryPipeline
print("[app.py] Loading pipeline models β€” this may take a moment...")
_pipeline = CivilRegistryPipeline()
print("[app.py] βœ… Pipeline ready")
except Exception as e:
_pipeline_error = traceback.format_exc()
print(f"[app.py] ❌ Pipeline failed to load:\n{_pipeline_error}")
print("[app.py] ⚠️ Falling back to fake data")
# ── /process endpoint ─────────────────────────────────────────
@app.route('/process', methods=['POST'])
def process_document():
if 'file' not in request.files:
return jsonify({'status': 'error', 'message': 'No file provided'}), 400
file = request.files['file']
file2 = request.files.get('file2') # bride file for Form 90
form_hint = request.form.get('form_hint', '1A')
# Map form_hint (1A/2A/3A/90) β†’ pipeline form_type (birth/death/marriage)
hint_to_type = {'1A': 'birth', '2A': 'death', '3A': 'marriage', '90': 'marriage'}
form_type = hint_to_type.get(form_hint, 'birth')
# ── Save uploaded file(s) temporarily ────────────────────
os.makedirs(TEMP_DIR, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
ext = os.path.splitext(file.filename)[1] or '.pdf'
saved_path = os.path.join(TEMP_DIR, f'upload_{timestamp}{ext}')
file.save(saved_path)
saved_path2 = None
if file2 and file2.filename:
ext2 = os.path.splitext(file2.filename)[1] or '.pdf'
saved_path2 = os.path.join(TEMP_DIR, f'upload_{timestamp}_bride{ext2}')
file2.save(saved_path2)
# ── Run pipeline or template matching or fake data ───────
try:
if USE_REAL_PIPELINE and _pipeline is not None:
fields, confidence, form_class = _run_real_pipeline(
saved_path, form_hint, form_type,
file2_path=saved_path2,
)
elif USE_TEMPLATE_MATCHING and _template_matcher_ok:
fields, confidence, form_class = _run_template_pipeline(
saved_path, form_hint, file2_path=saved_path2
)
else:
fields, confidence, form_class = _run_fake_pipeline(form_hint)
except Exception as e:
tb = traceback.format_exc()
print(f"[app.py] ❌ Processing error:\n{tb}")
return jsonify({
'status': 'error',
'message': str(e),
'trace': tb
}), 500
finally:
try: os.remove(saved_path)
except: pass
if saved_path2:
try: os.remove(saved_path2)
except: pass
# ── Save preview HTML ─────────────────────────────────────
preview_file = f'form_{form_class}_{timestamp}.html'
preview_path = os.path.join(TEMP_DIR, preview_file)
with open(preview_path, 'w', encoding='utf-8') as fh:
fh.write(_build_preview_html(form_class, fields))
return jsonify({
'status': 'success',
'form_class': form_class,
'raw_text': f'Processed via {"pipeline" if USE_REAL_PIPELINE and _pipeline else "fake data"} β€” Form {form_class}',
'fields': fields,
'confidence': confidence,
'saved_file': preview_file,
'preview_url': f'/uploads/temp/{preview_file}',
})
# ── /status endpoint β€” check pipeline health ─────────────────
@app.route('/status', methods=['GET'])
def status():
return jsonify({
'mode': 'real_pipeline' if (USE_REAL_PIPELINE and _pipeline) else 'fake_data',
'pipeline_ready': _pipeline is not None,
'pipeline_error': _pipeline_error,
'repo_path': PIPELINE_REPO_PATH if USE_REAL_PIPELINE else None,
})
# ── /debug endpoint β€” test pipeline with a dummy call ────────
@app.route('/debug', methods=['GET'])
def debug():
"""Test the pipeline import and show full traceback if it fails."""
try:
import pipeline as _pl_module
return jsonify({'import': 'ok', 'sys_path': sys.path[:6]})
except Exception:
return jsonify({'import': 'FAILED', 'trace': traceback.format_exc(), 'sys_path': sys.path[:6]}), 500
# ═════════════════════════════════════════════════════════════
# REAL PIPELINE β€” calls pipeline.py
# ═════════════════════════════════════════════════════════════
def _run_real_pipeline(file_path, form_hint, form_type, file2_path=None):
"""
Call CivilRegistryPipeline.process_pdf() and map the result
to the thesis DB field names.
For Form 90, processes groom (file_path) and bride (file2_path)
separately through the pipeline, then merges the results.
NOTE: Once you know what Form.to_dict() actually returns,
update the _map_pipeline_output() function below.
"""
if form_hint == '90':
# ── Process groom page (primary file) ─────────────────
# Pipeline currently returns flat dict with keys:
# registry_number, date_of_registration, date_of_marriage,
# place_of_marriage, husband{}, wife{}
# We map these to our groom_*/bride_* DB field names.
raw_groom = _pipeline.process_pdf(file_path, form_type='marriage')
groom_fields, groom_conf = _map_pipeline_output_form90(raw_groom, role='groom')
# ── Process bride page separately if provided ──────────
bride_fields = {}
bride_conf = {}
if file2_path:
raw_bride = _pipeline.process_pdf(file2_path, form_type='marriage')
bride_fields, bride_conf = _map_pipeline_output_form90(raw_bride, role='bride')
# ── Merge: groom fields take priority for shared fields ─
fields = {**bride_fields, **groom_fields}
confidence = {**bride_conf, **groom_conf}
# ── Ensure all expected Form 90 keys exist (empty string fallback)
for key in [
'registry_no', 'city_municipality', 'date_issuance', 'license_no',
'marriage_day', 'marriage_month', 'marriage_year',
'marriage_venue', 'marriage_city',
'groom_first', 'groom_middle', 'groom_last', 'groom_age',
'groom_citizenship', 'groom_mother_first', 'groom_mother_last',
'groom_father_first', 'groom_father_last',
'bride_first', 'bride_middle', 'bride_last', 'bride_age',
'bride_citizenship', 'bride_mother_first', 'bride_mother_last',
'bride_father_first', 'bride_father_last',
]:
fields.setdefault(key, '')
return fields, confidence, '90'
# ── All other forms β€” single file ────────────────────────
raw_result = _pipeline.process_pdf(file_path, form_type=form_type)
# Get the actual form class from the pipeline result
# pipeline returns a Form object with a form_class attribute
actual_class = getattr(raw_result, 'form_class', None) or form_hint
# Normalise: form1a→1A, form2a→2A, form3a→3A, form90→90
class_map = {'form1a': '1A', 'form2a': '2A', 'form3a': '3A', 'form90': '90'}
form_class = class_map.get(str(actual_class).lower(), form_hint)
fields, confidence = _map_pipeline_output(raw_result, form_class)
return fields, confidence, form_class
def _map_pipeline_output(raw: dict, form_hint: str):
"""
Map Form.to_dict() keys β†’ thesis DB field names.
⚠️ THIS FUNCTION NEEDS TO BE UPDATED once you test
what pipeline.process_pdf() actually returns.
Steps to update:
1. Run pipeline manually:
python pipeline.py --pdf test.pdf --form birth
2. Note the printed field names
3. Update the mapping dicts below to match
"""
# ── Confidence β€” pipeline may or may not return scores ───
# If pipeline returns confidence per field, map them here too.
# For now default all to 0.90.
confidence = {k: 0.90 for k in raw.keys()}
# ── BIRTH (Form 1A) β€” update keys once pipeline is tested ─
if form_hint == '1A':
fields = {
# Header
'registry_no': raw.get('registry_number') or raw.get('registry_no', ''),
'city_municipality': raw.get('city_municipality') or raw.get('city', ''),
'province': raw.get('province', ''),
'date_issuance': raw.get('date_issuance') or raw.get('date', ''),
# Child
'child_first': raw.get('child_first') or raw.get('name_of_child_first', ''),
'child_middle': raw.get('child_middle') or raw.get('name_of_child_middle', ''),
'child_last': raw.get('child_last') or raw.get('name_of_child_last', ''),
'sex': raw.get('sex', ''),
'dob_day': raw.get('dob_day') or raw.get('date_of_birth_day', ''),
'dob_month': raw.get('dob_month') or raw.get('date_of_birth_month', ''),
'dob_year': raw.get('dob_year') or raw.get('date_of_birth_year', ''),
'pob_hospital': raw.get('pob_hospital') or raw.get('place_of_birth_hospital', ''),
'pob_city': raw.get('pob_city') or raw.get('place_of_birth_city', ''),
'pob_province': raw.get('pob_province') or raw.get('place_of_birth_province', ''),
# Mother
'mother_first': raw.get('mother_first') or raw.get('mother_name_first', ''),
'mother_middle': raw.get('mother_middle') or raw.get('mother_name_middle', ''),
'mother_last': raw.get('mother_last') or raw.get('mother_name_last', ''),
'mother_citizenship': raw.get('mother_citizenship') or raw.get('mother_nationality', ''),
'mother_age': raw.get('mother_age', ''),
# Father
'father_first': raw.get('father_first') or raw.get('father_name_first', ''),
'father_middle': raw.get('father_middle') or raw.get('father_name_middle', ''),
'father_last': raw.get('father_last') or raw.get('father_name_last', ''),
'father_citizenship': raw.get('father_citizenship') or raw.get('father_nationality', ''),
# Parents marriage
'parents_marriage_day': raw.get('parents_marriage_day', ''),
'parents_marriage_month': raw.get('parents_marriage_month', ''),
'parents_marriage_year': raw.get('parents_marriage_year', ''),
'parents_marriage_city': raw.get('parents_marriage_city', ''),
'parents_marriage_province': raw.get('parents_marriage_province', ''),
# Registration
'date_submitted': raw.get('date_submitted') or raw.get('date_of_registration', ''),
'prepared_by': raw.get('prepared_by', ''),
}
# ── DEATH (Form 2A) ───────────────────────────────────────
elif form_hint == '2A':
fields = {
'registry_no': raw.get('registry_number') or raw.get('registry_no', ''),
'city_municipality': raw.get('city_municipality') or raw.get('city', ''),
'province': raw.get('province', ''),
'date_issuance': raw.get('date_issuance') or raw.get('date', ''),
'deceased_first': raw.get('deceased_first') or raw.get('name_of_deceased_first', ''),
'deceased_middle': raw.get('deceased_middle') or raw.get('name_of_deceased_middle', ''),
'deceased_last': raw.get('deceased_last') or raw.get('name_of_deceased_last', ''),
'sex': raw.get('sex', ''),
'age_years': raw.get('age_years') or raw.get('age', ''),
'civil_status': raw.get('civil_status', ''),
'citizenship': raw.get('citizenship') or raw.get('nationality', ''),
'dod_day': raw.get('dod_day') or raw.get('date_of_death_day', ''),
'dod_month': raw.get('dod_month') or raw.get('date_of_death_month', ''),
'dod_year': raw.get('dod_year') or raw.get('date_of_death_year', ''),
'pod_hospital': raw.get('pod_hospital') or raw.get('place_of_death_hospital', ''),
'pod_city': raw.get('pod_city') or raw.get('place_of_death_city', ''),
'pod_province': raw.get('pod_province') or raw.get('place_of_death_province', ''),
'cause_immediate': raw.get('cause_immediate') or raw.get('cause_of_death', ''),
'cause_antecedent': raw.get('cause_antecedent', ''),
'cause_underlying': raw.get('cause_underlying', ''),
'date_submitted': raw.get('date_submitted') or raw.get('date_of_registration', ''),
}
# ── MARRIAGE CERT (Form 3A) ───────────────────────────────
else:
fields = {
'registry_no': raw.get('registry_number') or raw.get('registry_no', ''),
'city_municipality': raw.get('city_municipality') or raw.get('city', ''),
'province': raw.get('province', ''),
'date_issuance': raw.get('date_issuance') or raw.get('date', ''),
'husband_first': raw.get('husband_first') or raw.get('husband_name_first', ''),
'husband_middle': raw.get('husband_middle') or raw.get('husband_name_middle', ''),
'husband_last': raw.get('husband_last') or raw.get('husband_name_last', ''),
'husband_age': raw.get('husband_age', ''),
'husband_citizenship': raw.get('husband_citizenship') or raw.get('husband_nationality', ''),
'husband_mother_first': raw.get('husband_mother_first', ''),
'husband_mother_last': raw.get('husband_mother_last', ''),
'husband_mother_citizenship':raw.get('husband_mother_citizenship', ''),
'husband_father_first': raw.get('husband_father_first', ''),
'husband_father_last': raw.get('husband_father_last', ''),
'husband_father_citizenship':raw.get('husband_father_citizenship', ''),
'wife_first': raw.get('wife_first') or raw.get('wife_name_first', ''),
'wife_middle': raw.get('wife_middle') or raw.get('wife_name_middle', ''),
'wife_last': raw.get('wife_last') or raw.get('wife_name_last', ''),
'wife_age': raw.get('wife_age', ''),
'wife_citizenship': raw.get('wife_citizenship') or raw.get('wife_nationality', ''),
'wife_mother_first': raw.get('wife_mother_first', ''),
'wife_mother_last': raw.get('wife_mother_last', ''),
'wife_mother_citizenship': raw.get('wife_mother_citizenship', ''),
'wife_father_first': raw.get('wife_father_first', ''),
'wife_father_last': raw.get('wife_father_last', ''),
'wife_father_citizenship': raw.get('wife_father_citizenship', ''),
'marriage_day': raw.get('marriage_day') or raw.get('date_of_marriage_day', ''),
'marriage_month': raw.get('marriage_month') or raw.get('date_of_marriage_month', ''),
'marriage_year': raw.get('marriage_year') or raw.get('date_of_marriage_year', ''),
'marriage_venue': raw.get('marriage_venue', ''),
'marriage_city': raw.get('marriage_city', ''),
'marriage_province': raw.get('marriage_province', ''),
'date_submitted': raw.get('date_submitted') or raw.get('date_of_registration', ''),
}
# Add any remaining raw fields not yet mapped (future-proofing)
for k, v in raw.items():
if k not in fields and v:
fields[k] = v
return fields, confidence
def _map_pipeline_output_form90(raw: dict, role: str):
"""
Map pipeline output for a single Form 90 page (groom or bride).
Actual pipeline output keys confirmed:
registry_number, date_of_registration, date_of_marriage,
place_of_marriage, husband (dict), wife (dict)
NOTE: MNB currently misclassifies Form 90 as form1a so husband/wife
dicts are empty. Fields will populate once MNB is retrained on Form 90.
Until then, shared header fields are extracted correctly.
"""
confidence = {k: 0.90 for k in raw.keys()}
# ── Extract nested husband/wife dicts (may be empty) ─────
husband = raw.get('husband') or {}
wife = raw.get('wife') or {}
if not isinstance(husband, dict): husband = {}
if not isinstance(wife, dict): wife = {}
# ── Parse date_of_marriage β†’ day/month/year ───────────────
dom_raw = raw.get('date_of_marriage') or ''
dom_parts = [p.strip() for p in str(dom_raw).split(',') if p.strip()]
marriage_day = dom_parts[0] if len(dom_parts) > 0 else ''
marriage_month = dom_parts[1] if len(dom_parts) > 1 else ''
marriage_year = dom_parts[2] if len(dom_parts) > 2 else ''
# ── Parse place_of_marriage β†’ venue / city ────────────────
pom_raw = raw.get('place_of_marriage') or ''
pom_parts = [p.strip() for p in str(pom_raw).split(',') if p.strip()]
marriage_venue = pom_parts[0] if len(pom_parts) > 0 else ''
marriage_city = pom_parts[1] if len(pom_parts) > 1 else ''
# ── Shared fields (same on both pages) ───────────────────
shared = {
'registry_no': str(raw.get('registry_number') or '').strip(),
'city_municipality': marriage_city,
'date_issuance': str(raw.get('date_of_registration') or '').strip(),
'license_no': str(raw.get('license_no') or raw.get('license_number') or '').strip(),
'marriage_day': marriage_day,
'marriage_month': marriage_month,
'marriage_year': marriage_year,
'marriage_venue': marriage_venue,
'marriage_city': marriage_city,
'marriage_province': str(raw.get('province') or '').strip(),
}
if role == 'groom':
# Groom-specific β€” from husband dict or top-level fallbacks
person = husband
fields = {
**shared,
'groom_first': str(person.get('first_name') or person.get('first') or raw.get('groom_first') or '').strip(),
'groom_middle': str(person.get('middle_name') or person.get('middle') or raw.get('groom_middle') or '').strip(),
'groom_last': str(person.get('last_name') or person.get('last') or raw.get('groom_last') or '').strip(),
'groom_age': str(person.get('age') or raw.get('groom_age') or '').strip(),
'groom_citizenship': str(person.get('citizenship') or person.get('nationality') or raw.get('groom_citizenship') or '').strip(),
'groom_civil_status': str(person.get('civil_status') or '').strip(),
'groom_residence': str(person.get('residence') or person.get('address') or '').strip(),
'groom_mother_first': str(person.get('mother_first') or person.get('mother_name') or '').strip(),
'groom_mother_last': str(person.get('mother_last') or '').strip(),
'groom_father_first': str(person.get('father_first') or person.get('father_name') or '').strip(),
'groom_father_last': str(person.get('father_last') or '').strip(),
}
else: # bride
person = wife
fields = {
**shared,
'bride_first': str(person.get('first_name') or person.get('first') or raw.get('bride_first') or '').strip(),
'bride_middle': str(person.get('middle_name') or person.get('middle') or raw.get('bride_middle') or '').strip(),
'bride_last': str(person.get('last_name') or person.get('last') or raw.get('bride_last') or '').strip(),
'bride_age': str(person.get('age') or raw.get('bride_age') or '').strip(),
'bride_citizenship': str(person.get('citizenship') or person.get('nationality') or raw.get('bride_citizenship') or '').strip(),
'bride_civil_status': str(person.get('civil_status') or '').strip(),
'bride_residence': str(person.get('residence') or person.get('address') or '').strip(),
'bride_mother_first': str(person.get('mother_first') or person.get('mother_name') or '').strip(),
'bride_mother_last': str(person.get('mother_last') or '').strip(),
'bride_father_first': str(person.get('father_first') or person.get('father_name') or '').strip(),
'bride_father_last': str(person.get('father_last') or '').strip(),
}
# Strip empty strings so UI only shows fields with actual values
fields = {k: v for k, v in fields.items() if v}
return fields, confidence
# ═════════════════════════════════════════════════════════════
# TEMPLATE MATCHING PIPELINE β€” coordinate crop + Tesseract OCR
# ═════════════════════════════════════════════════════════════
def _run_template_pipeline(file_path, form_hint, file2_path=None):
"""
Use coordinate templates to crop and OCR each field region.
form_hint: '1A' / '2A' / '3A' / '90'
Maps form_hint β†’ source form type (102/103/97/90).
"""
# Convert PDF to image if needed
img_path = file_path
if file_path.lower().endswith('.pdf'):
img_path = pdf_to_image(file_path) or file_path
# Auto-detect form type if hint is generic 'cert'
hint_to_source = {'1A': '102', '2A': '103', '3A': '97', '90': '90'}
if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
detected = detect_form_type(img_path)
source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
form_hint = source_map.get(detected, '1A')
source_type = detected
print(f'[app.py] Auto-detected form type: {detected} β†’ output: {form_hint}')
else:
source_type = hint_to_source.get(form_hint, '102')
# Extract fields from primary file
raw = extract_fields(img_path, source_type)
# For Form 90 (marriage license), also process bride file if provided
if form_hint == '90' and file2_path:
img_path2 = file2_path
if file2_path.lower().endswith('.pdf'):
img_path2 = pdf_to_image(file2_path) or file2_path
raw2 = extract_fields(img_path2, '90')
raw = {**raw, **raw2} # merge β€” bride fields fill any gaps
# Map source field names β†’ output cert field names
fields = _map_template_output(raw, form_hint)
form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
# ── MNB + spaCyNER enrichment ──────────────────────────────
if _bridge is not None:
try:
ner_text = _raw_to_ner_text(raw, source_type)
# MNB: classify form type (for logging / confidence)
mnb_result = _bridge.mnb.classify_full(ner_text)
print(f'[app.py] MNB: {mnb_result["label"]} ({mnb_result["confidence"]:.1%})')
# spaCyNER: extract structured fields
if source_type == '102':
ner_form = _bridge.filler.fill_form_1a(ner_text)
elif source_type == '103':
ner_form = _bridge.filler.fill_form_2a(ner_text)
elif source_type == '97':
ner_form = _bridge.filler.fill_form_3a(ner_text)
else: # 90
ner_form = _bridge.filler.fill_form_90(ner_text, ner_text)
ner_fields = _ner_to_fields(ner_form, raw, form_hint)
# Merge: NER non-empty values override template values
fields = {k: (ner_fields.get(k) or v) for k, v in fields.items()}
for k, v in ner_fields.items():
if k not in fields and v:
fields[k] = v
ner_count = sum(1 for v in ner_fields.values() if v)
print(f'[app.py] NER enriched {ner_count} fields')
confidence = {k: mnb_result['confidence'] for k in fields}
except Exception as _ner_err:
print(f'[app.py] NER error (using template only): {_ner_err}')
confidence = {k: 0.85 for k in fields}
else:
confidence = {k: 0.85 for k in fields}
# Debug: show all mapped fields
non_empty = {k: v for k, v in fields.items() if v}
print(f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
for k, v in non_empty.items():
print(f' {k:<30} = {v}')
return fields, confidence, form_class
def _clean_ocr(text: str) -> str:
"""Light cleanup for EasyOCR output: strip extra spaces and punctuation."""
import re
if not text:
return text
text = re.sub(r'\s+', ' ', text).strip()
text = text.strip('.,;:')
return text
def _clean_age(text: str) -> str:
"""Extract the numeric age from OCR text like 'cicntu 23' β†’ '23'."""
import re
nums = re.findall(r'\b\d+\b', text)
# Return the last number found (avoids page numbers etc.)
return nums[-1] if nums else _clean_ocr(text)
def _clean_civil_status(text: str) -> str:
"""Normalize OCR-garbled civil status to a standard value."""
t = text.lower().replace(' ', '')
if any(x in t for x in ['singl', 'fngle', 'fingle', 'single']):
return 'Single'
if any(x in t for x in ['marr', 'maried', 'married']):
return 'Married'
if any(x in t for x in ['widow', 'widw']):
return 'Widowed'
if any(x in t for x in ['separ', 'annul']):
return 'Separated'
return _clean_ocr(text)
def _map_template_output(raw: dict, form_hint: str) -> dict:
"""Map template field names to the thesis DB field names used by the UI."""
def g(key, *aliases):
for k in (key,) + aliases:
if raw.get(k):
return raw[k]
return ''
if form_hint == '1A':
return {
'registry_no': g('registry_no'),
'city_municipality': g('city_municipality'),
'province': g('province'),
'date_submitted': g('registration_date'),
'child_first': g('name_first'),
'child_middle': g('name_middle'),
'child_last': g('name_last'),
'sex': g('sex'),
'dob_day': g('dob_day'),
'dob_month': g('dob_month'),
'dob_year': g('dob_year'),
'pob_city': g('place_of_birth'),
'mother_first': g('mother_name'),
'mother_citizenship': g('mother_citizenship'),
'father_first': g('father_name'),
'father_citizenship': g('father_citizenship'),
'parents_marriage_month': g('marriage_date'),
'parents_marriage_city': g('marriage_place'),
}
elif form_hint == '2A':
cause = ' / '.join(filter(None, [
g('cause_immediate'), g('cause_antecedent'), g('cause_underlying')
]))
return {
'registry_no': g('registry_no'),
'city_municipality': g('city_municipality'),
'province': g('province'),
'date_submitted': g('registration_date'),
'deceased_first': g('deceased_name'),
'sex': g('sex'),
'age_years': g('age'),
'civil_status': g('civil_status'),
'citizenship': g('citizenship'),
'dod_full': g('date_of_death'),
'pod_hospital': g('place_of_death'),
'cause_immediate': cause,
}
elif form_hint == '3A':
return {
'registry_no': g('registry_no'),
'city_municipality': g('city_municipality'),
'province': g('province'),
'date_submitted': g('registration_date'),
'husband_first': g('husband_name_first'),
'husband_middle': g('husband_name_middle'),
'husband_last': g('husband_name_last'),
'husband_age': g('husband_age'),
'husband_citizenship': g('husband_citizenship'),
# husband_mother_name / husband_father_name are full names from Form 97
'husband_mother_first': g('husband_mother_name'),
'husband_father_first': g('husband_father_name'),
'husband_mother_citizenship':g('husband_mother_citizenship'),
'husband_father_citizenship':g('husband_father_citizenship'),
'wife_first': g('wife_name_first'),
'wife_middle': g('wife_name_middle'),
'wife_last': g('wife_name_last'),
'wife_age': g('wife_age'),
'wife_citizenship': g('wife_citizenship'),
'wife_mother_first': g('wife_mother_name'),
'wife_father_first': g('wife_father_name'),
'wife_mother_citizenship': g('wife_mother_citizenship'),
'wife_father_citizenship': g('wife_father_citizenship'),
'marriage_venue': g('place_of_marriage'),
'marriage_city': g('city_municipality'),
'marriage_month': g('date_of_marriage'),
}
else: # Form 90
return {
'registry_no': g('registry_no'),
'city_municipality': g('city_municipality'),
'province': g('province'),
'license_no': g('marriage_license_no'),
'date_issuance': g('date_issued'),
'groom_first': g('groom_name_first'),
'groom_middle': g('groom_name_middle'),
'groom_last': g('groom_name_last'),
'groom_dob': g('groom_dob'),
'groom_age': g('groom_age'),
'groom_place_of_birth': g('groom_place_of_birth'),
'groom_sex': g('groom_sex'),
'groom_citizenship': g('groom_citizenship'),
'groom_civil_status': g('groom_civil_status'),
'groom_residence': g('groom_residence'),
'groom_religion': g('groom_religion'),
# groom_father_name / groom_mother_name are full names from Form 90
'groom_father_first': g('groom_father_name'),
'groom_father_citizenship': g('groom_father_citizenship'),
'groom_mother_first': g('groom_mother_name'),
'groom_mother_citizenship': g('groom_mother_citizenship'),
'bride_first': g('bride_name_first'),
'bride_middle': g('bride_name_middle'),
'bride_last': g('bride_name_last'),
'bride_dob': g('bride_dob'),
'bride_age': g('bride_age'),
'bride_place_of_birth': g('bride_place_of_birth'),
'bride_sex': g('bride_sex'),
'bride_citizenship': g('bride_citizenship'),
'bride_civil_status': g('bride_civil_status'),
'bride_residence': g('bride_residence'),
'bride_religion': g('bride_religion'),
'bride_father_first': g('bride_father_name'),
'bride_father_citizenship': g('bride_father_citizenship'),
'bride_mother_first': g('bride_mother_name'),
'bride_mother_citizenship': g('bride_mother_citizenship'),
}
# ═════════════════════════════════════════════════════════════
# BRIDGE HELPERS β€” MNB + spaCyNER integration
# ═════════════════════════════════════════════════════════════
def _raw_to_ner_text(raw: dict, source_type: str) -> str:
"""Convert template raw fields β†’ structured text the NER model understands."""
def g(*keys):
for k in keys:
v = raw.get(k, '')
if v: return str(v)
return ''
if source_type == '102':
return (
f"Registry No.: {g('registry_no')}\n"
f"Date of Registration: {g('registration_date')}\n"
f"1. NAME (First): {g('name_first')} (Middle): {g('name_middle')} (Last): {g('name_last')}\n"
f"2. SEX: {g('sex')}\n"
f"3. DATE OF BIRTH: {g('dob_month')} {g('dob_day')}, {g('dob_year')}\n"
f"4. PLACE OF BIRTH: {g('place_of_birth')}\n"
f"MOTHER:\n"
f"7. MAIDEN NAME: {g('mother_name')}\n"
f"8. CITIZENSHIP/NATIONALITY: {g('mother_citizenship')}\n"
f"FATHER:\n"
f"14. NAME: {g('father_name')}\n"
f"15. CITIZENSHIP/NATIONALITY: {g('father_citizenship')}\n"
f"MARRIAGE OF PARENTS:\n"
f"20a. DATE: {g('marriage_date')}\n"
f"20b. PLACE: {g('marriage_place')}\n"
)
elif source_type == '103':
return (
f"Registry No.: {g('registry_no')}\n"
f"Date of Registration: {g('registration_date')}\n"
f"1. NAME (First): {g('deceased_name')}\n"
f"2. SEX: {g('sex')}\n"
f"4. AGE: {g('age')}\n"
f"9. CIVIL STATUS: {g('civil_status')}\n"
f"7. CITIZENSHIP/NATIONALITY: {g('citizenship')}\n"
f"6. DATE OF DEATH: {g('date_of_death')}\n"
f"5. PLACE OF DEATH: {g('place_of_death')}\n"
f"17. CAUSE OF DEATH: {g('cause_immediate')}\n"
f"Antecedent cause: {g('cause_antecedent')}\n"
f"Underlying cause: {g('cause_underlying')}\n"
)
elif source_type == '97':
return (
f"Registry No.: {g('registry_no')}\n"
f"Date of Registration: {g('registration_date')}\n"
f"HUSBAND:\n"
f"1. NAME (First): {g('husband_name_first')} (Middle): {g('husband_name_middle')} (Last): {g('husband_name_last')}\n"
f"2b. AGE: {g('husband_age')}\n"
f"4b. CITIZENSHIP/NATIONALITY: {g('husband_citizenship')}\n"
f"8. NAME OF FATHER: {g('husband_father_name')}\n"
f"8b. FATHER CITIZENSHIP/NATIONALITY: {g('husband_father_citizenship')}\n"
f"10. NAME OF MOTHER: {g('husband_mother_name')}\n"
f"10b. MOTHER CITIZENSHIP/NATIONALITY: {g('husband_mother_citizenship')}\n"
f"WIFE:\n"
f"1. NAME (First): {g('wife_name_first')} (Middle): {g('wife_name_middle')} (Last): {g('wife_name_last')}\n"
f"2b. AGE: {g('wife_age')}\n"
f"4b. CITIZENSHIP/NATIONALITY: {g('wife_citizenship')}\n"
f"8. NAME OF FATHER: {g('wife_father_name')}\n"
f"8b. FATHER CITIZENSHIP/NATIONALITY: {g('wife_father_citizenship')}\n"
f"10. NAME OF MOTHER: {g('wife_mother_name')}\n"
f"10b. MOTHER CITIZENSHIP/NATIONALITY: {g('wife_mother_citizenship')}\n"
f"15. PLACE OF MARRIAGE: {g('place_of_marriage')}\n"
f"16. DATE OF MARRIAGE: {g('date_of_marriage')}\n"
)
else: # 90
return (
f"GROOM:\n"
f"1. NAME (First): {g('groom_name_first')} (Middle): {g('groom_name_middle')} (Last): {g('groom_name_last')}\n"
f"2. DATE OF BIRTH: {g('groom_dob')}\n"
f"3. PLACE OF BIRTH: {g('groom_place_of_birth')}\n"
f"4. SEX: {g('groom_sex')}\n"
f"5. CITIZENSHIP/NATIONALITY: {g('groom_citizenship')}\n"
f"NAME OF FATHER: {g('groom_father_name')}\n"
f"FATHER CITIZENSHIP/NATIONALITY: {g('groom_father_citizenship')}\n"
f"NAME OF MOTHER: {g('groom_mother_name')}\n"
f"MOTHER CITIZENSHIP/NATIONALITY: {g('groom_mother_citizenship')}\n"
f"BRIDE:\n"
f"1. NAME (First): {g('bride_name_first')} (Middle): {g('bride_name_middle')} (Last): {g('bride_name_last')}\n"
f"2. DATE OF BIRTH: {g('bride_dob')}\n"
f"3. PLACE OF BIRTH: {g('bride_place_of_birth')}\n"
f"4. SEX: {g('bride_sex')}\n"
f"5. CITIZENSHIP/NATIONALITY: {g('bride_citizenship')}\n"
f"NAME OF FATHER: {g('bride_father_name')}\n"
f"FATHER CITIZENSHIP/NATIONALITY: {g('bride_father_citizenship')}\n"
f"NAME OF MOTHER: {g('bride_mother_name')}\n"
f"MOTHER CITIZENSHIP/NATIONALITY: {g('bride_mother_citizenship')}\n"
)
def _split_name(full: str):
"""Split 'First Middle Last' β†’ (first, middle, last)."""
parts = (full or '').split()
if not parts:
return '', '', ''
first = parts[0]
last = parts[-1] if len(parts) > 1 else ''
mid = ' '.join(parts[1:-1]) if len(parts) > 2 else ''
return first, mid, last
def _ner_to_fields(form, raw: dict, form_hint: str) -> dict:
"""Convert spaCyNER Form object β†’ thesis DB field names, with raw fallbacks."""
def r(*keys):
for k in keys:
v = raw.get(k, '')
if v: return v
return ''
def ga(attr, *fallback_keys):
v = getattr(form, attr, None) or ''
return v or r(*fallback_keys)
if form_hint == '1A':
cf, cm, cl = _split_name(getattr(form, 'name_of_child', '') or '')
return {
'registry_no': ga('registry_number', 'registry_no'),
'city_municipality': r('city_municipality'),
'province': r('province'),
'date_submitted': ga('date_of_registration', 'registration_date'),
'child_first': cf or r('name_first'),
'child_middle': cm or r('name_middle'),
'child_last': cl or r('name_last'),
'sex': ga('sex', 'sex'),
'dob_day': r('dob_day'),
'dob_month': r('dob_month'),
'dob_year': r('dob_year'),
'pob_city': ga('place_of_birth', 'place_of_birth'),
'mother_first': ga('name_of_mother', 'mother_name'),
'mother_citizenship': ga('nationality_of_mother', 'mother_citizenship'),
'father_first': ga('name_of_father', 'father_name'),
'father_citizenship': ga('nationality_of_father', 'father_citizenship'),
'parents_marriage_month': ga('date_of_marriage_of_parents', 'marriage_date'),
'parents_marriage_city': ga('place_of_marriage_of_parents', 'marriage_place'),
}
elif form_hint == '2A':
cause = ' / '.join(filter(None, [
getattr(form, 'cause_of_death', ''),
getattr(form, 'cause_antecedent', ''),
getattr(form, 'cause_underlying', ''),
])) or ' / '.join(filter(None, [
r('cause_immediate'), r('cause_antecedent'), r('cause_underlying')
]))
return {
'registry_no': ga('registry_number', 'registry_no'),
'city_municipality': r('city_municipality'),
'province': r('province'),
'date_submitted': ga('date_of_registration', 'registration_date'),
'deceased_first': ga('name_of_deceased', 'deceased_name'),
'sex': ga('sex', 'sex'),
'age_years': ga('age', 'age'),
'civil_status': ga('civil_status', 'civil_status'),
'citizenship': ga('nationality', 'citizenship'),
'dod_full': ga('date_of_death', 'date_of_death'),
'pod_hospital': ga('place_of_death', 'place_of_death'),
'cause_immediate': cause,
}
elif form_hint == '3A':
h = getattr(form, 'husband', None)
w = getattr(form, 'wife', None)
hd = h.to_dict() if h else {}
wd = w.to_dict() if w else {}
return {
'registry_no': ga('registry_number', 'registry_no'),
'city_municipality': r('city_municipality'),
'province': r('province'),
'date_submitted': ga('date_of_registration', 'registration_date'),
'husband_first': hd.get('name') or r('husband_name_first'),
'husband_middle': r('husband_name_middle'),
'husband_last': r('husband_name_last'),
'husband_age': hd.get('age') or r('husband_age'),
'husband_citizenship': hd.get('nationality') or r('husband_citizenship'),
'husband_mother_first': hd.get('name_of_mother') or r('husband_mother_name'),
'husband_mother_citizenship': hd.get('nationality_of_mother') or r('husband_mother_citizenship'),
'husband_father_first': hd.get('name_of_father') or r('husband_father_name'),
'husband_father_citizenship': hd.get('nationality_of_father') or r('husband_father_citizenship'),
'wife_first': wd.get('name') or r('wife_name_first'),
'wife_middle': r('wife_name_middle'),
'wife_last': r('wife_name_last'),
'wife_age': wd.get('age') or r('wife_age'),
'wife_citizenship': wd.get('nationality') or r('wife_citizenship'),
'wife_mother_first': wd.get('name_of_mother') or r('wife_mother_name'),
'wife_mother_citizenship': wd.get('nationality_of_mother') or r('wife_mother_citizenship'),
'wife_father_first': wd.get('name_of_father') or r('wife_father_name'),
'wife_father_citizenship': wd.get('nationality_of_father') or r('wife_father_citizenship'),
'marriage_venue': ga('place_of_marriage', 'place_of_marriage'),
'marriage_city': r('city_municipality'),
'marriage_month': ga('date_of_marriage', 'date_of_marriage'),
}
else: # Form 90
groom = getattr(form, 'groom', None)
bride = getattr(form, 'bride', None)
gd = groom.to_dict() if groom else {}
bd = bride.to_dict() if bride else {}
return {
'registry_no': r('registry_no'),
'city_municipality': r('city_municipality'),
'province': r('province'),
'license_no': r('marriage_license_no'),
'date_issuance': r('date_issued'),
'groom_first': gd.get('name_of_applicant') or r('groom_name_first'),
'groom_middle': r('groom_name_middle'),
'groom_last': r('groom_name_last'),
'groom_dob': gd.get('date_of_birth') or r('groom_dob'),
'groom_age': gd.get('age') or r('groom_age'),
'groom_place_of_birth': gd.get('place_of_birth') or r('groom_place_of_birth'),
'groom_sex': gd.get('sex') or r('groom_sex'),
'groom_citizenship': gd.get('citizenship') or r('groom_citizenship'),
'groom_civil_status': gd.get('civil_status', r('groom_civil_status')),
'groom_residence': gd.get('residence') or r('groom_residence'),
'groom_religion': gd.get('religion') or r('groom_religion'),
'groom_father_first': gd.get('name_of_father') or r('groom_father_name'),
'groom_father_citizenship': gd.get('father_citizenship') or r('groom_father_citizenship'),
'groom_mother_first': gd.get('maiden_name_of_mother') or r('groom_mother_name'),
'groom_mother_citizenship': gd.get('mother_citizenship') or r('groom_mother_citizenship'),
'bride_first': bd.get('name_of_applicant') or r('bride_name_first'),
'bride_middle': r('bride_name_middle'),
'bride_last': r('bride_name_last'),
'bride_dob': bd.get('date_of_birth') or r('bride_dob'),
'bride_age': bd.get('age') or r('bride_age'),
'bride_place_of_birth': bd.get('place_of_birth') or r('bride_place_of_birth'),
'bride_sex': bd.get('sex') or r('bride_sex'),
'bride_citizenship': bd.get('citizenship') or r('bride_citizenship'),
'bride_civil_status': bd.get('civil_status', r('bride_civil_status')),
'bride_residence': bd.get('residence') or r('bride_residence'),
'bride_religion': bd.get('religion') or r('bride_religion'),
'bride_father_first': bd.get('name_of_father') or r('bride_father_name'),
'bride_father_citizenship': bd.get('father_citizenship') or r('bride_father_citizenship'),
'bride_mother_first': bd.get('maiden_name_of_mother') or r('bride_mother_name'),
'bride_mother_citizenship': bd.get('mother_citizenship') or r('bride_mother_citizenship'),
}
# ═════════════════════════════════════════════════════════════
# FAKE PIPELINE β€” returns hardcoded data (for development)
# ═════════════════════════════════════════════════════════════
def _run_fake_pipeline(form_hint):
"""Returns fake data using real thesis DB field names."""
if form_hint == '1A':
fields = {
'registry_no': '2026-BC-00123',
'city_municipality': 'Tarlac City',
'province': 'Tarlac',
'date_issuance': datetime.now().strftime('%B %d, %Y'),
'child_first': 'Maria Luisa',
'child_middle': 'Dela Cruz',
'child_last': 'Santos',
'sex': 'Female',
'dob_day': '10',
'dob_month': 'January',
'dob_year': '2026',
'pob_city': 'Tarlac City',
'pob_province': 'Tarlac',
'mother_first': 'Rosa',
'mother_middle': 'Reyes',
'mother_last': 'Dela Cruz',
'mother_citizenship': 'Filipino',
'mother_age': '28',
'father_first': 'Juan Pedro',
'father_middle': '',
'father_last': 'Santos',
'father_citizenship': 'Filipino',
'parents_marriage_day': '12',
'parents_marriage_month': 'June',
'parents_marriage_year': '2020',
'parents_marriage_city': 'Tarlac City',
'parents_marriage_province':'Tarlac',
'date_submitted': 'January 15, 2026',
'processed_by': 'John Doe',
'verified_position': 'City Civil Registrar',
'issued_to': 'Rosa Reyes Dela Cruz',
'amount_paid': '75.00',
'or_number': 'OR-2026-00456',
'date_paid': datetime.now().strftime('%B %d, %Y'),
}
confidence = {k: 0.95 for k in fields}
elif form_hint == '2A':
fields = {
'registry_no': '2026-DC-00045',
'city_municipality': 'Tarlac City',
'province': 'Tarlac',
'date_issuance': datetime.now().strftime('%B %d, %Y'),
'deceased_first': 'Roberto',
'deceased_middle': 'Cruz',
'deceased_last': 'Villanueva',
'sex': 'Male',
'age_years': '72',
'civil_status': 'Married',
'citizenship': 'Filipino',
'dod_day': '28',
'dod_month': 'January',
'dod_year': '2026',
'pod_hospital': 'Tarlac Provincial Hospital',
'pod_city': 'Tarlac City',
'pod_province': 'Tarlac',
'cause_immediate': 'Cardiopulmonary Arrest',
'date_submitted': 'February 1, 2026',
'processed_by': 'John Doe',
'verified_position': 'City Civil Registrar',
'issued_to': 'Maria Villanueva',
'amount_paid': '75.00',
'or_number': 'OR-2026-00457',
'date_paid': datetime.now().strftime('%B %d, %Y'),
}
confidence = {k: 0.95 for k in fields}
elif form_hint == '3A':
fields = {
'registry_no': '2026-MC-00078',
'city_municipality': 'Tarlac City',
'province': 'Tarlac',
'date_issuance': datetime.now().strftime('%B %d, %Y'),
'husband_first': 'Carlos Miguel',
'husband_middle': '',
'husband_last': 'Bautista',
'husband_age': '28',
'husband_citizenship': 'Filipino',
'husband_mother_first': 'Lourdes',
'husband_mother_last': 'Bautista',
'husband_mother_citizenship':'Filipino',
'husband_father_first': 'Ramon',
'husband_father_last': 'Bautista',
'husband_father_citizenship':'Filipino',
'wife_first': 'Elena Grace',
'wife_middle': '',
'wife_last': 'Reyes',
'wife_age': '26',
'wife_citizenship': 'Filipino',
'wife_mother_first': 'Susan',
'wife_mother_last': 'Reyes',
'wife_mother_citizenship': 'Filipino',
'wife_father_first': 'Eduardo',
'wife_father_last': 'Reyes',
'wife_father_citizenship': 'Filipino',
'marriage_day': '14',
'marriage_month': 'February',
'marriage_year': '2026',
'marriage_venue': 'Saint John Parish',
'marriage_city': 'Tarlac City',
'marriage_province': 'Tarlac',
'date_submitted': 'March 1, 2026',
'processed_by': 'John Doe',
'verified_position': 'City Civil Registrar',
'issued_to': 'Carlos Miguel Bautista',
'amount_paid': '75.00',
'or_number': 'OR-2026-00458',
'date_paid': datetime.now().strftime('%B %d, %Y'),
}
confidence = {k: 0.95 for k in fields}
else: # Form 90
fields = {
'registry_no': '2026-ML-00031',
'city_municipality': 'Tarlac City',
'date_issuance': datetime.now().strftime('%B %d, %Y'),
'groom_first': 'Paolo Gabriel',
'groom_last': 'Mendoza',
'groom_age': '27',
'groom_citizenship': 'Filipino',
'bride_first': 'Kristine Ann',
'bride_last': 'Santos',
'bride_age': '25',
'bride_citizenship': 'Filipino',
'marriage_day': '10',
'marriage_month': 'April',
'marriage_year': '2026',
'marriage_city': 'Tarlac City',
}
confidence = {k: 0.95 for k in fields}
form_class = form_hint if form_hint in ('1A','2A','3A','90') else '1A'
return fields, confidence, form_class
# ═════════════════════════════════════════════════════════════
# Preview HTML builder
# ═════════════════════════════════════════════════════════════
def _build_preview_html(form_class, fields):
def row(label, value):
val = value or '_______________'
return f'<tr><td class="lbl">{label}</td><td class="val">{val}</td></tr>'
if form_class == '1A':
child = f"{fields.get('child_first','')} {fields.get('child_middle','')} {fields.get('child_last','')}".strip()
mother = f"{fields.get('mother_first','')} {fields.get('mother_last','')}".strip()
father = f"{fields.get('father_first','')} {fields.get('father_last','')}".strip()
dob = f"{fields.get('dob_month','')} {fields.get('dob_day','')}, {fields.get('dob_year','')}".strip(', ')
pob = f"{fields.get('pob_city','')}, {fields.get('pob_province','')}".strip(', ')
rows = row('Registry No', fields.get('registry_no','')) + row('Name of Child', child) + row('Sex', fields.get('sex','')) + row('Date of Birth', dob) + row('Place of Birth', pob) + row('Mother', mother) + row('Father', father)
title = f'Form 1A β€” {child}'
elif form_class == '2A':
deceased = f"{fields.get('deceased_first','')} {fields.get('deceased_middle','')} {fields.get('deceased_last','')}".strip()
dod = f"{fields.get('dod_month','')} {fields.get('dod_day','')}, {fields.get('dod_year','')}".strip(', ')
rows = row('Registry No', fields.get('registry_no','')) + row('Name of Deceased', deceased) + row('Date of Death', dod) + row('Cause', fields.get('cause_immediate',''))
title = f'Form 2A β€” {deceased}'
elif form_class == '3A':
h = f"{fields.get('husband_first','')} {fields.get('husband_last','')}".strip()
w = f"{fields.get('wife_first','')} {fields.get('wife_last','')}".strip()
dom = f"{fields.get('marriage_month','')} {fields.get('marriage_day','')}, {fields.get('marriage_year','')}".strip(', ')
rows = (row('Registry No', fields.get('registry_no','')) +
row('Husband', h) + row('Wife', w) +
row('Date of Marriage', dom) +
row('Place of Marriage', f"{fields.get('marriage_venue','')} {fields.get('marriage_city','')}".strip()))
title = f'Form 3A β€” {h} & {w}'
else: # Form 90 β€” Marriage License
g = f"{fields.get('groom_first','')} {fields.get('groom_middle','')} {fields.get('groom_last','')}".strip()
b = f"{fields.get('bride_first','')} {fields.get('bride_middle','')} {fields.get('bride_last','')}".strip()
dom = ' '.join(filter(None, [
fields.get('marriage_month',''),
fields.get('marriage_day',''),
fields.get('marriage_year',''),
]))
pom = ', '.join(filter(None, [
fields.get('marriage_venue',''),
fields.get('marriage_city',''),
fields.get('marriage_province',''),
]))
rows = (row('Registry No', fields.get('registry_no','')) +
row('License No', fields.get('license_no','')) +
row('Date of Issuance', fields.get('date_issuance','')) +
'<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">GROOM</td></tr>' +
row('Name', g) +
row('Age', fields.get('groom_age','')) +
row('Citizenship', fields.get('groom_citizenship','')) +
row('Mother', f"{fields.get('groom_mother_first','')} {fields.get('groom_mother_last','')}".strip()) +
row('Father', f"{fields.get('groom_father_first','')} {fields.get('groom_father_last','')}".strip()) +
'<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">BRIDE</td></tr>' +
row('Name', b) +
row('Age', fields.get('bride_age','')) +
row('Citizenship', fields.get('bride_citizenship','')) +
row('Mother', f"{fields.get('bride_mother_first','')} {fields.get('bride_mother_last','')}".strip()) +
row('Father', f"{fields.get('bride_father_first','')} {fields.get('bride_father_last','')}".strip()) +
'<tr><td colspan="2" style="padding:8px 0;font-weight:bold;background:#f9f9f9;text-align:center;">MARRIAGE</td></tr>' +
row('Date of Marriage', dom) +
row('Place of Marriage', pom))
title = f'Form 90 β€” {g} & {b}' if (g or b) else 'Form 90 β€” Marriage License'
mode = 'REAL PIPELINE' if (USE_REAL_PIPELINE and _pipeline) else 'FAKE DATA (dev mode)'
return f"""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>{title}</title>
<style>
body{{font-family:Arial,sans-serif;font-size:13px;padding:40px 50px;color:#111;}}
h2{{font-size:15px;border-bottom:2px solid #333;padding-bottom:8px;margin-bottom:16px;}}
.mode{{font-size:11px;color:#888;margin-bottom:12px;}}
table{{width:100%;border-collapse:collapse;}}
td{{padding:6px 8px;border-bottom:1px dotted #ccc;vertical-align:top;}}
td.lbl{{width:220px;color:#555;}}
td.val{{font-weight:bold;background:#fffde7;border-bottom:1px solid #f0d000;}}
tr td[colspan]{{background:#f5f5f5;font-weight:bold;text-align:center;color:#333;border-bottom:2px solid #ddd;}}
</style></head><body>
<h2>LCR Form No. {form_class} β€” {fields.get('city_municipality','')}</h2>
<div class="mode">Mode: {mode}</div>
<table>{rows}</table>
</body></html>"""
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port, debug=False)