Spaces:

hanz245
/

ocr

Running

Hanz Pillerva Claude Sonnet 4.6 commited on Mar 26

Commit

d748584

1 Parent(s): 56161f2

Replace TrOCR/anchor system with CRNN+CTC absolute-coordinate OCR

- template_matcher: removed anchor detection, TrOCR, pytesseract; now uses CRNN+CTC model with ECC/ORB alignment + absolute coordinate crops
- app.py: updated preload to use _get_crnn instead of _get_trocr
- calibrate_fields: updated to match latest changes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

CRNN+CTC/calibrate_fields.py +10 -10
app.py +5 -5
template_matcher.py +396 -972

CRNN+CTC/calibrate_fields.py CHANGED Viewed

@@ -37,18 +37,17 @@ COLOURS = [
 def draw_boxes(img, bounds):
     left, top, right, bottom = bounds
-    fw = right - left
-    fh = bottom - top
     vis = img.copy()
     # form boundary
     cv2.rectangle(vis, (left, top), (right, bottom), (0, 140, 255), 2)
     for idx, (name, rx1, ry1, rx2, ry2) in enumerate(boxes):
-        x1 = int(left + rx1 * fw)
-        y1 = int(top  + ry1 * fh)
-        x2 = int(left + rx2 * fw)
-        y2 = int(top  + ry2 * fh)
         c  = COLOURS[idx % len(COLOURS)]
         cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
         cv2.putText(vis, name[:25], (x1 + 2, max(0, y1 - 3)),
@@ -160,10 +159,11 @@ def main():
         elif event == cv2.EVENT_LBUTTONUP:
             drawing = False
             ex, ey  = x, y
-            x1r = (min(ix, ex) - left) / fw
-            y1r = (min(iy, ey) - top)  / fh
-            x2r = (max(ix, ex) - left) / fw
-            y2r = (max(iy, ey) - top)  / fh
             x1r, y1r = max(0.0, x1r), max(0.0, y1r)
             x2r, y2r = min(1.0, x2r), min(1.0, y2r)
             if (x2r - x1r) > 0.005 and (y2r - y1r) > 0.003:

 def draw_boxes(img, bounds):
     left, top, right, bottom = bounds
+    h, w = img.shape[:2]
     vis = img.copy()
     # form boundary
     cv2.rectangle(vis, (left, top), (right, bottom), (0, 140, 255), 2)
     for idx, (name, rx1, ry1, rx2, ry2) in enumerate(boxes):
+        x1 = int(rx1 * w)
+        y1 = int(ry1 * h)
+        x2 = int(rx2 * w)
+        y2 = int(ry2 * h)
         c  = COLOURS[idx % len(COLOURS)]
         cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2)
         cv2.putText(vis, name[:25], (x1 + 2, max(0, y1 - 3)),
         elif event == cv2.EVENT_LBUTTONUP:
             drawing = False
             ex, ey  = x, y
+            ih, iw  = img_orig.shape[:2]
+            x1r = min(ix, ex) / iw
+            y1r = min(iy, ey) / ih
+            x2r = max(ix, ex) / iw
+            y2r = max(iy, ey) / ih
             x1r, y1r = max(0.0, x1r), max(0.0, y1r)
             x2r, y2r = min(1.0, x2r), min(1.0, y2r)
             if (x2r - x1r) > 0.005 and (y2r - y1r) > 0.003:

app.py CHANGED Viewed

@@ -45,13 +45,13 @@ PIPELINE_REPO_PATH     = r"C:\xampp\htdocs\python"
 # ── Load template matcher ─────────────────────────────────────
 try:
-    from template_matcher import extract_fields, pdf_to_image, detect_form_type, _get_trocr
     _template_matcher_ok = True
     print("[app.py] Template matcher loaded")
-    # Preload TrOCR at startup so the first request isn't slow
-    print("[app.py] Preloading TrOCR model...")
-    _get_trocr()
-    print("[app.py] TrOCR preloaded.")
 except Exception as _tm_err:
     _template_matcher_ok = False
     print(f"[app.py] Template matcher unavailable: {_tm_err}")

 # ── Load template matcher ─────────────────────────────────────
 try:
+    from template_matcher import extract_fields, pdf_to_image, detect_form_type, _get_crnn
     _template_matcher_ok = True
     print("[app.py] Template matcher loaded")
+    # Preload CRNN+CTC at startup so the first request isn't slow
+    print("[app.py] Preloading CRNN+CTC model...")
+    _get_crnn()
+    print("[app.py] CRNN+CTC preloaded.")
 except Exception as _tm_err:
     _template_matcher_ok = False
     print(f"[app.py] Template matcher unavailable: {_tm_err}")

template_matcher.py CHANGED Viewed

@@ -1,29 +1,22 @@
 """
-template_matcher.py
-====================
-Extracts field values from civil registry scanned forms using fixed
-coordinate templates.  No ML training required.
-How it works:
-  1. Load the uploaded image
-  2. Detect which form it is (passed in as form_type: '102','103','90','97')
-  3. Normalize image to a standard size
-  4. Crop each predefined field region
-  5. Run Tesseract OCR on the crop
-  6. Return  { field_name: value, ... }
-Coordinates are stored as relative fractions (0.0-1.0) of the image
-width/height so they work at any scan resolution.
-CALIBRATION
------------
-If OCR picks up the wrong area, adjust the (x1, y1, x2, y2) values
-for that field in the TEMPLATES dict below.
-Run:  python template_matcher.py <image_path> <form_type>
-to see a debug image with all boxes drawn.
 """
-import os, sys
 import numpy as np
 from PIL import Image
@@ -33,741 +26,364 @@ try:
 except ImportError:
     _CV2_OK = False
-# ── Reference images for form alignment ────────────────────────
-# Place one clean blank/lightly-filled scan for each form type in:
-#   python/references/reference_102.png
-#   python/references/reference_103.png
-#   python/references/reference_90.png
-#   python/references/reference_97.png
 _REF_DIR = os.path.join(os.path.dirname(__file__), 'references')
 REFERENCE_IMAGES = {
-    '102': os.path.join(_REF_DIR, 'reference_102.png'),
     '103': os.path.join(_REF_DIR, 'reference_103.png'),
-    '90':  os.path.join(_REF_DIR, 'reference-90.png'),
-    '97':  os.path.join(_REF_DIR, 'reference_97.png'),
 }
-# ── OCR engine: TrOCR large ────────────────────────────────────
-_trocr_processor = None
-_trocr_model     = None
-def _get_trocr():
-    global _trocr_processor, _trocr_model
-    if _trocr_processor is None:
         try:
-            from transformers import TrOCRProcessor, VisionEncoderDecoderModel
             import torch
-            print('[template_matcher] Loading TrOCR large-handwritten...')
-            _trocr_processor = TrOCRProcessor.from_pretrained(
-                'microsoft/trocr-large-handwritten')
-            _trocr_model = VisionEncoderDecoderModel.from_pretrained(
-                'microsoft/trocr-large-handwritten')
-            _trocr_model.eval()
-            print('[template_matcher] TrOCR ready.')
         except Exception as e:
-            print(f'[template_matcher] TrOCR load error: {e}')
-    return _trocr_processor, _trocr_model
-# ── CRNN+CTC (kept for future use — swap back when model is trained) ──
-# _crnn_ocr = None
-# _CRNN_CHECKPOINT = os.path.join(
-#     os.path.dirname(__file__), 'CRNN+CTC', 'checkpoints', 'best_model_final.pth'
-# )
-# def _get_crnn(): ...  (see git history)
-# Hint constants kept for template dict compatibility (values unused by CRNN)
 _LINE  = 'line'
 _BLOCK = 'block'
 _WORD  = 'word'
-# ── Per-field post-processing ──────────────────────────────────
-import re as _re
-# Maps abbreviated sex readings → canonical value
 _SEX_KEYWORDS = {
     'female': 'FEMALE', 'fem': 'FEMALE', 'f': 'FEMALE',
-    'male':   'MALE',   'm':   'MALE',
 }
-# Maps field name → normalization rule
 _FIELD_TYPE = {
-    # Sex
     'sex': 'sex', 'groom_sex': 'sex', 'bride_sex': 'sex',
     'husband_sex': 'sex', 'wife_sex': 'sex',
-    # Year (4-digit)
     'dob_year': 'year',
-    # Pure digits
     'age': 'digits', 'groom_age': 'digits', 'bride_age': 'digits',
     'husband_age': 'digits', 'wife_age': 'digits', 'dob_day': 'digits',
-    # Dates — keep digits, spaces, common separators
     'registration_date': 'date', 'marriage_date': 'date',
-    'date_of_marriage':  'date', 'date_of_death': 'date',
-    'date_of_birth':     'date', 'date_issued':   'date',
     'groom_dob': 'date', 'bride_dob': 'date',
     'husband_dob': 'date', 'wife_dob': 'date',
-    # Registry / license numbers — alphanumeric + separators
     'registry_no': 'registry', 'marriage_license_no': 'registry',
 }
 def _postprocess(text: str, field_name: str) -> str:
-    """
-    Normalize and validate OCR output by field type.
-    sex       → 'MALE' or 'FEMALE'
-    year      → 4-digit year string, e.g. '1990'
-    digits    → strip all non-digit characters
-    date      → keep digits, spaces, '-', '/', '.', ','
-    registry  → keep alphanumeric, spaces, hyphens, slashes
-    (default) → strip leading/trailing whitespace
-    """
     text = text.strip()
     if not text:
         return text
     rule = _FIELD_TYPE.get(field_name)
     if rule == 'sex':
         tl = text.lower()
-        # Try longest keyword first to avoid 'f' matching inside 'female' twice
         for kw in sorted(_SEX_KEYWORDS, key=len, reverse=True):
             if kw in tl:
                 return _SEX_KEYWORDS[kw]
         return text
     if rule == 'year':
         m = _re.search(r'(19|20)\d{2}', text)
         if m:
             return m.group(0)
         digits = _re.sub(r'\D', '', text)
         return digits[:4] if len(digits) >= 4 else text
     if rule == 'digits':
         d = _re.sub(r'\D', '', text)
         return d if d else text
     if rule == 'date':
         return _re.sub(r'[^\w\s\-/,.]', '', text).strip()
     if rule == 'registry':
         return _re.sub(r'[^\w\s\-/]', '', text).strip()
     return text
-# ── Field templates ────────────────────────────────────────────
-# Each entry: 'field_name': (x1, y1, x2, y2, hint)
-# Coordinates are relative fractions of image dimensions (0.0 – 1.0)
-# hint is kept for compatibility but EasyOCR ignores it.
-#
-# TO CALIBRATE: run this file directly with a sample image, it will
-# draw all boxes so you can see which regions need adjusting.
 TEMPLATES = {
-    # ══════════════════════════════════════════════════════════
-    # FORM 102 — Certificate of Live Birth  (green border)
-    # Calibrated from the REAL Municipal Form No. 102
-    # ══════════════════════════════════════════════════════════
-       '102': {
-        'province':             (0.173, 0.089, 0.655, 0.105, _LINE),
-        'registry_no':          (0.673, 0.105, 0.957, 0.130, _LINE),
-        'city_municipality':    (0.226, 0.107, 0.658, 0.125, _LINE),
         'name_first':           (0.169, 0.161, 0.453, 0.181, _LINE),
         'name_middle':          (0.450, 0.161, 0.674, 0.181, _LINE),
-        'name_last':            (0.682, 0.161, 0.943, 0.181, _LINE),
-        'sex':                  (0.771, 0.155, 0.963, 0.173, _WORD),
-        'dob_day':              (0.479, 0.196, 0.596, 0.213, _WORD),
-        'dob_month':            (0.610, 0.196, 0.781, 0.214, _LINE),
-        'dob_year':             (0.799, 0.199, 0.947, 0.215, _WORD),
-        'place_of_birth':       (0.446, 0.227, 0.953, 0.245, _LINE),
-        'type_of_birth':        (0.112, 0.280, 0.316, 0.299, _WORD),
-        'birth_order':          (0.596, 0.280, 0.824, 0.294, _WORD),
-        'weight_at_birth':      (0.837, 0.262, 0.919, 0.291, _WORD),
-        'mother_name':          (0.296, 0.321, 0.627, 0.341, _LINE),
-        'mother_citizenship':   (0.206, 0.337, 0.540, 0.363, _LINE),
-        'mother_religion':      (0.566, 0.347, 0.959, 0.366, _LINE),
-        'mother_occupation':    (0.553, 0.380, 0.831, 0.405, _LINE),
-        'mother_age_at_birth':  (0.829, 0.389, 0.967, 0.409, _WORD),
-        'mother_residence':     (0.219, 0.422, 0.944, 0.442, _LINE),
-        'father_name':          (0.641, 0.321, 0.957, 0.342, _LINE),
-        'father_citizenship':   (0.115, 0.491, 0.325, 0.515, _LINE),
-        'father_religion':      (0.333, 0.493, 0.588, 0.519, _LINE),
-        'father_occupation':    (0.600, 0.492, 0.811, 0.522, _LINE),
-        'father_age_at_birth':  (0.819, 0.503, 0.969, 0.523, _WORD),
-        'father_residence':     (0.230, 0.536, 0.963, 0.555, _LINE),
-        'marriage_date':        (0.091, 0.593, 0.412, 0.610, _LINE),
-        'marriage_place':       (0.427, 0.591, 0.949, 0.608, _LINE),
-        'registration_date':    (0.621, 0.685, 0.905, 0.704, _LINE),
-    },
-    # ══════════════════════════════════════════════════════════
-    # FORM 103 — Certificate of Death  (blue border)
-    # ══════════════════════════════════════════════════════════
-       '103': {
-        'province':           (0.173, 0.089, 0.655, 0.105, _LINE),
-        'registry_no':        (0.673, 0.105, 0.957, 0.130, _LINE),
-        'city_municipality':  (0.226, 0.107, 0.658, 0.125, _LINE),
-        'deceased_name':      (0.086, 0.147, 0.745, 0.181, _LINE),
-        'sex':                (0.771, 0.155, 0.963, 0.173, _WORD),
-        'date_of_death':      (0.100, 0.197, 0.293, 0.224, _LINE),
-        'date_of_birth':      (0.320, 0.201, 0.568, 0.228, _LINE),
-        'age':                (0.575, 0.215, 0.719, 0.231, _WORD),
-        'place_of_death':     (0.089, 0.241, 0.720, 0.265, _LINE),
-        'civil_status':       (0.723, 0.250, 0.970, 0.265, _WORD),
-        'religion':           (0.078, 0.281, 0.310, 0.308, _LINE),
-        'citizenship':        (0.328, 0.281, 0.526, 0.306, _LINE),
-        'residence':          (0.540, 0.284, 0.957, 0.310, _LINE),
-        'occupation':         (0.086, 0.318, 0.283, 0.343, _LINE),
-        'father_name':        (0.641, 0.321, 0.957, 0.342, _LINE),
-        'mother_name':        (0.296, 0.321, 0.627, 0.341, _LINE),
-        'cause_immediate':    (0.298, 0.406, 0.947, 0.420, _LINE),
-        'cause_antecedent':   (0.298, 0.422, 0.951, 0.441, _LINE),
-        'cause_underlying':   (0.432, 0.438, 0.960, 0.456, _LINE),
-        'registration_date':  (0.621, 0.685, 0.905, 0.704, _LINE),
-    },
-    # ══════════════════════════════════════════════════════════
-    # FORM 90 — Application for Marriage License  (black border)
-    # 3-column: GROOM (left 38%) | LABEL (center 24%) | BRIDE (right 38%)
-    # ══════════════════════════════════════════════════════════
-    '90': {
-        # Header
-        'province':           (0.173, 0.089, 0.655, 0.105, _LINE),
-        'registry_no':        (0.673, 0.105, 0.957, 0.130, _LINE),
-        'city_municipality':  (0.226, 0.107, 0.658, 0.125, _LINE),
-        'marriage_license_no':(0.696, 0.138, 0.951, 0.156, _LINE),
-        'date_issued':        (0.825, 0.151, 0.982, 0.169, _LINE),
-        # Groom name (first / middle / last — each on its own row)
-        'groom_name_first':   (0.128, 0.310, 0.441, 0.325, _LINE),
-        'groom_name_middle':  (0.137, 0.326, 0.446, 0.338, _LINE),
-        'groom_name_last':    (0.127, 0.340, 0.439, 0.354, _LINE),
-        # Bride name (first / middle / last — each on its own row)
-        'bride_name_first':   (0.629, 0.311, 0.944, 0.325, _LINE),
-        'bride_name_middle':  (0.631, 0.326, 0.937, 0.339, _LINE),
-        'bride_name_last':    (0.633, 0.339, 0.941, 0.354, _LINE),
-        # Groom DOB / age
-        'groom_dob':          (0.085, 0.372, 0.369, 0.393, _LINE),
-        'groom_age':          (0.379, 0.373, 0.456, 0.391, _WORD),
-        # Bride DOB / age
-        'bride_dob':          (0.584, 0.373, 0.879, 0.393, _LINE),
-        'bride_age':          (0.881, 0.374, 0.965, 0.392, _WORD),
-        # Place of birth
-        'groom_place_of_birth':(0.080, 0.403, 0.462, 0.426, _LINE),
-        'bride_place_of_birth':(0.586, 0.405, 0.960, 0.425, _LINE),
-        # Sex / Citizenship
-        'groom_sex':          (0.085, 0.435, 0.217, 0.452, _WORD),
-        'groom_citizenship':  (0.219, 0.435, 0.460, 0.454, _LINE),
-        'bride_sex':          (0.582, 0.436, 0.716, 0.452, _WORD),
-        'bride_citizenship':  (0.725, 0.436, 0.961, 0.451, _LINE),
-        # Residence
-        'groom_residence':    (0.076, 0.465, 0.460, 0.490, _LINE),
-        'bride_residence':    (0.586, 0.465, 0.964, 0.489, _LINE),
-        # Religion
-        'groom_religion':     (0.079, 0.493, 0.462, 0.522, _LINE),
-        'bride_religion':     (0.585, 0.492, 0.962, 0.520, _LINE),
-        # Civil Status
-        'groom_civil_status': (0.080, 0.520, 0.463, 0.552, _WORD),
-        'bride_civil_status': (0.586, 0.522, 0.961, 0.546, _WORD),
-        # Father
-        'groom_father_name':       (0.082, 0.695, 0.459, 0.711, _LINE),
-        'groom_father_citizenship':(0.082, 0.713, 0.459, 0.736, _LINE),
-        'bride_father_name':       (0.581, 0.695, 0.961, 0.715, _LINE),
-        'bride_father_citizenship':(0.577, 0.715, 0.963, 0.737, _LINE),
-        # Mother
-        'groom_mother_name':       (0.080, 0.784, 0.456, 0.809, _LINE),
-        'groom_mother_citizenship':(0.081, 0.811, 0.459, 0.830, _LINE),
-        'bride_mother_name':       (0.583, 0.785, 0.962, 0.808, _LINE),
-        'bride_mother_citizenship':(0.580, 0.811, 0.963, 0.833, _LINE),
-    },
-    # ══════════════════════════════════════════════════════════
-    # FORM 97 — Certificate of Marriage  (pink/magenta border)
-    # Layout: ITEM col (20%) | HUSBAND col (40%) | WIFE col (40%)
-    # x-ranges: HUSBAND = 0.22–0.59 | WIFE = 0.62–0.97
-    #
-    # Form 97 — y-coords calibrated from actual ORB-aligned scan.
-    # ORB alignment introduces ~40% vertical stretch by bottom of form;
-    # all y values are empirically measured from crop images, NOT from
-    # the reference image directly.
-    # ══════════════════════════════════════════════════════════
-       '97': {
-        'province':                    (0.173, 0.089, 0.655, 0.105, _LINE),
-        'registry_no':                 (0.673, 0.105, 0.957, 0.130, _LINE),
-        'city_municipality':           (0.226, 0.107, 0.658, 0.125, _LINE),
-        'husband_name_first':          (0.255, 0.140, 0.570, 0.155, _LINE),
-        'husband_name_middle':         (0.258, 0.154, 0.569, 0.166, _LINE),
-        'husband_name_last':           (0.259, 0.167, 0.581, 0.182, _LINE),
-        'wife_name_first':             (0.650, 0.142, 0.954, 0.155, _LINE),
-        'wife_name_middle':            (0.639, 0.155, 0.940, 0.170, _LINE),
-        'wife_name_last':              (0.634, 0.169, 0.951, 0.181, _LINE),
-        'husband_dob':                 (0.219, 0.196, 0.507, 0.213, _LINE),
-        'husband_age':                 (0.523, 0.196, 0.580, 0.212, _WORD),
-        'wife_dob':                    (0.606, 0.198, 0.892, 0.209, _LINE),
-        'wife_age':                    (0.910, 0.199, 0.970, 0.213, _WORD),
-        'husband_place_of_birth':      (0.203, 0.225, 0.583, 0.241, _LINE),
-        'wife_place_of_birth':         (0.594, 0.229, 0.962, 0.245, _LINE),
-        'husband_sex':                 (0.219, 0.249, 0.307, 0.269, _WORD),
-        'wife_sex':                    (0.602, 0.249, 0.697, 0.269, _WORD),
-        'husband_citizenship':         (0.344, 0.257, 0.588, 0.274, _LINE),
-        'wife_citizenship':            (0.724, 0.255, 0.965, 0.272, _LINE),
-        'husband_residence':           (0.219, 0.283, 0.579, 0.301, _LINE),
-        'wife_residence':              (0.596, 0.285, 0.966, 0.307, _LINE),
-        'husband_religion':            (0.204, 0.310, 0.581, 0.327, _LINE),
-        'wife_religion':               (0.592, 0.311, 0.964, 0.327, _LINE),
-        'husband_civil_status':        (0.196, 0.333, 0.579, 0.351, _WORD),
-        'wife_civil_status':           (0.591, 0.335, 0.959, 0.351, _WORD),
-        'husband_father_name':         (0.205, 0.367, 0.588, 0.384, _LINE),
-        'wife_father_name':            (0.588, 0.369, 0.960, 0.386, _LINE),
-        'husband_father_citizenship':  (0.195, 0.390, 0.580, 0.406, _LINE),
-        'wife_father_citizenship':     (0.599, 0.388, 0.958, 0.404, _LINE),
-        'husband_mother_name':         (0.196, 0.421, 0.583, 0.438, _LINE),
-        'wife_mother_name':            (0.600, 0.419, 0.954, 0.436, _LINE),
-        'husband_mother_citizenship':  (0.196, 0.443, 0.578, 0.459, _LINE),
-        'wife_mother_citizenship':     (0.590, 0.447, 0.971, 0.463, _LINE),
-        'place_of_marriage':           (0.219, 0.551, 0.981, 0.565, _LINE),
-        'date_of_marriage':            (0.222, 0.582, 0.571, 0.596, _LINE),
-        'time_of_marriage':            (0.730, 0.581, 0.916, 0.596, _LINE),
-        'registration_date':           (0.621, 0.685, 0.905, 0.704, _LINE),
     },
-}
-# ── Anchor-based field templates ───────────────────────────────
-# These complement TEMPLATES (absolute coords).  For each field that has an
-# anchor entry, extract_fields() will:
-#   1. Run EasyOCR once on the full form (detail=1) to get all text + bboxes
-#   2. Search for the printed label text inside 'search' region
-#   3. If found, crop the data region from the anchor's edge
-#   4. Fall back to absolute coords from TEMPLATES when anchor not found.
-#
-# Entry format:
-#   'labels' : list of strings — tried in order, case-insensitive partial match
-#   'search'  : (x1,y1,x2,y2) fractions of form to search for the label
-#   'side'    : 'right' | 'below' — where the data field is vs. the label
-#   'dx','dy' : offset from anchor edge to data start (fractions of form dims)
-#   'dw','dh' : data region size (fractions of form dims); dh=0 → auto from anchor
-ANCHOR_TEMPLATES = {
-    # ── Form 102 ─────────────────────────────────────────────
-    '102': {
-        'province': {
-            'labels': ['Province', 'PROVINCE'],
-            'search': (0.00, 0.09, 0.17, 0.14),
-            'side': 'right', 'dx': 0.003, 'dy': -0.004,
-            'dw': 0.48, 'dh': 0.020,
-        },
-        'registry_no': {
-            'labels': ['Registry No', 'REGISTRY NO'],
-            'search': (0.56, 0.10, 0.72, 0.15),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.28, 'dh': 0.026,
-        },
-        'city_municipality': {
-            'labels': ['City', 'Municipality', 'City/Municipality'],
-            'search': (0.00, 0.12, 0.23, 0.16),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.43, 'dh': 0.020,
-        },
-        'mother_name': {
-            'labels': ['Maiden Name', 'MAIDEN NAME', "Mother's Name"],
-            'search': (0.05, 0.30, 0.22, 0.35),
-            'side': 'right', 'dx': 0.003, 'dy': -0.005,
-            'dw': 0.77, 'dh': 0.022,
-        },
-        'father_name': {
-            'labels': ["Father's Name", "FATHER'S NAME", 'Father Name'],
-            'search': (0.05, 0.45, 0.22, 0.50),
-            'side': 'right', 'dx': 0.003, 'dy': -0.005,
-            'dw': 0.77, 'dh': 0.025,
-        },
-        'marriage_date': {
-            'labels': ['Date Married', 'DATE MARRIED', 'Date and Place'],
-            'search': (0.00, 0.58, 0.18, 0.63),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.32, 'dh': 0.020,
-        },
-        'marriage_place': {
-            'labels': ['Place', 'PLACE'],
-            'search': (0.32, 0.58, 0.44, 0.63),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.52, 'dh': 0.020,
-        },
-        'registration_date': {
-            'labels': ['Date', 'DATE', 'Registration'],
-            'search': (0.45, 0.72, 0.65, 0.77),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.28, 'dh': 0.020,
-        },
-    },
-    # ── Form 103 ─────────────────────────────────────────────
     '103': {
-        'province': {
-            'labels': ['Province', 'PROVINCE'],
-            'search': (0.00, 0.07, 0.18, 0.12),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.47, 'dh': 0.020,
-        },
-        'registry_no': {
-            'labels': ['Registry No', 'REGISTRY NO'],
-            'search': (0.55, 0.09, 0.70, 0.14),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.28, 'dh': 0.026,
-        },
-        'city_municipality': {
-            'labels': ['City', 'Municipality'],
-            'search': (0.00, 0.10, 0.23, 0.14),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.42, 'dh': 0.020,
-        },
-        'deceased_name': {
-            'labels': ['First Name', 'FIRST NAME', 'Name of Deceased', 'NAME'],
-            'search': (0.00, 0.13, 0.18, 0.20),
-            'side': 'right', 'dx': 0.003, 'dy': -0.005,
-            'dw': 0.65, 'dh': 0.038,
-        },
-        'father_name': {
-            'labels': ["Father's Name", "FATHER'S NAME", "Father"],
-            'search': (0.20, 0.31, 0.35, 0.36),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.33, 'dh': 0.022,
-        },
-        'mother_name': {
-            'labels': ["Mother's Maiden", "MOTHER'S MAIDEN", "Mother"],
-            'search': (0.55, 0.31, 0.70, 0.36),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.33, 'dh': 0.022,
-        },
-        'cause_immediate': {
-            'labels': ['Immediate Cause', 'IMMEDIATE CAUSE', 'Immediate'],
-            'search': (0.05, 0.39, 0.32, 0.43),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.65, 'dh': 0.018,
-        },
-        'cause_antecedent': {
-            'labels': ['Antecedent', 'ANTECEDENT'],
-            'search': (0.05, 0.41, 0.32, 0.45),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.65, 'dh': 0.018,
-        },
-        'registration_date': {
-            'labels': ['Date', 'Registration Date'],
-            'search': (0.50, 0.67, 0.68, 0.72),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.28, 'dh': 0.020,
-        },
     },
-    # ── Form 90 ──────────────────────────────────────────────
     '90': {
-        'province': {
-            'labels': ['Province', 'PROVINCE'],
-            'search': (0.00, 0.08, 0.17, 0.12),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.46, 'dh': 0.020,
-        },
-        'registry_no': {
-            'labels': ['Registry No', 'REGISTRY NO'],
-            'search': (0.55, 0.10, 0.70, 0.15),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.28, 'dh': 0.030,
-        },
-        'city_municipality': {
-            'labels': ['City', 'Municipality'],
-            'search': (0.00, 0.11, 0.23, 0.15),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.40, 'dh': 0.020,
-        },
-        'groom_name_first': {
-            'labels': ['First', 'FIRST', 'Given Name'],
-            'search': (0.00, 0.30, 0.14, 0.34),
-            'side': 'right', 'dx': 0.002, 'dy': -0.003,
-            'dw': 0.31, 'dh': 0.018,
-        },
-        'bride_name_first': {
-            'labels': ['First', 'FIRST', 'Given Name'],
-            'search': (0.48, 0.30, 0.63, 0.34),
-            'side': 'right', 'dx': 0.002, 'dy': -0.003,
-            'dw': 0.31, 'dh': 0.018,
-        },
-        'groom_father_name': {
-            'labels': ["Father's Name", 'Father', 'FATHER'],
-            'search': (0.00, 0.68, 0.14, 0.73),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.020,
-        },
-        'bride_father_name': {
-            'labels': ["Father's Name", 'Father', 'FATHER'],
-            'search': (0.46, 0.68, 0.60, 0.73),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.020,
-        },
-        'groom_mother_name': {
-            'labels': ["Mother's Name", "Mother's Maiden", 'Mother', 'MOTHER'],
-            'search': (0.00, 0.77, 0.14, 0.82),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.022,
-        },
-        'bride_mother_name': {
-            'labels': ["Mother's Name", "Mother's Maiden", 'Mother', 'MOTHER'],
-            'search': (0.46, 0.77, 0.60, 0.82),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.022,
-        },
     },
-    # ── Form 97 ──────────────────────────────────────────────
     '97': {
-        'province': {
-            'labels': ['Province', 'PROVINCE'],
-            'search': (0.00, 0.07, 0.17, 0.11),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.48, 'dh': 0.018,
-        },
-        'registry_no': {
-            'labels': ['Registry No', 'REGISTRY NO'],
-            'search': (0.55, 0.08, 0.70, 0.14),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.30, 'dh': 0.030,
-        },
-        'city_municipality': {
-            'labels': ['City', 'Municipality'],
-            'search': (0.00, 0.09, 0.23, 0.13),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.42, 'dh': 0.018,
-        },
-        'husband_father_name': {
-            'labels': ["Father's Name", 'Father', 'FATHER'],
-            'search': (0.05, 0.36, 0.22, 0.40),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.018,
-        },
-        'wife_father_name': {
-            'labels': ["Father's Name", 'Father', 'FATHER'],
-            'search': (0.50, 0.36, 0.65, 0.40),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.018,
-        },
-        'husband_mother_name': {
-            'labels': ["Mother's Name", "Mother's Maiden", 'Mother'],
-            'search': (0.05, 0.41, 0.22, 0.46),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.38, 'dh': 0.020,
-        },
-        'wife_mother_name': {
-            'labels': ["Mother's Name", "Mother's Maiden", 'Mother'],
-            'search': (0.50, 0.41, 0.65, 0.46),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.020,
-        },
-        'place_of_marriage': {
-            'labels': ['Place of Marriage', 'PLACE OF MARRIAGE', 'Place'],
-            'search': (0.05, 0.54, 0.30, 0.58),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.74, 'dh': 0.018,
-        },
-        'date_of_marriage': {
-            'labels': ['Date of Marriage', 'DATE OF MARRIAGE', 'Date'],
-            'search': (0.05, 0.57, 0.27, 0.62),
-            'side': 'right', 'dx': 0.003, 'dy': -0.003,
-            'dw': 0.37, 'dh': 0.018,
-        },
-        'registration_date': {
-            'labels': ['Date', 'Registration'],
-            'search': (0.55, 0.81, 0.72, 0.85),
-            'side': 'right', 'dx': 0.003, 'dy': 0.000,
-            'dw': 0.20, 'dh': 0.020,
-        },
     },
 }
-# ── Anchor scanning helpers ─────────────────────────────────────
-def _scan_form_text(_img: Image.Image):
-    """
-    Anchor detection disabled — TrOCR has no built-in text detector.
-    extract_fields falls back to absolute coordinates for all fields.
-    """
-    return []
-def _find_anchor_bbox(detections, labels: list, search_box: tuple,
-                      form_w: int, form_h: int):
-    """
-    Find the first bounding box whose text matches any of `labels` (case-insensitive
-    partial match) and whose centre lies within `search_box` (fractions).
-    Returns [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] pixel coords, or None.
-    """
-    sx1 = search_box[0] * form_w
-    sy1 = search_box[1] * form_h
-    sx2 = search_box[2] * form_w
-    sy2 = search_box[3] * form_h
-    best_bbox = None
-    best_score = 0.0
-    for (bbox, text, conf) in detections:
-        if conf < 0.25:
-            continue
-        pts = np.array(bbox, dtype=np.float32)
-        cx = pts[:, 0].mean()
-        cy = pts[:, 1].mean()
-        if not (sx1 <= cx <= sx2 and sy1 <= cy <= sy2):
-            continue
-        text_u = text.upper().strip()
-        for label in labels:
-            label_u = label.upper()
-            if label_u in text_u or text_u in label_u:
-                score = conf * len(label_u)
-                if score > best_score:
-                    best_score = score
-                    best_bbox = bbox
-    return best_bbox
-def _crop_from_anchor(img: Image.Image, anchor_bbox,
-                      side: str, dx: float, dy: float,
-                      dw: float, dh: float) -> Image.Image:
-    """
-    Compute data region relative to a found anchor bbox and return the crop.
-    anchor_bbox : [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] pixel coords
-    side        : 'right' → data starts at anchor's right edge
-                  'below' → data starts below anchor's bottom edge
-    dx, dy      : offset fractions (of form width/height) from anchor edge
-    dw, dh      : data region size fractions (of form width/height);
-                  dh=0 means use anchor's own height
-    """
-    form_w, form_h = img.size
-    pts = np.array(anchor_bbox, dtype=np.float32)
-    ax1 = int(pts[:, 0].min())
-    ay1 = int(pts[:, 1].min())
-    ax2 = int(pts[:, 0].max())
-    ay2 = int(pts[:, 1].max())
-    data_w = int(dw * form_w)
-    data_h = int(dh * form_h) if dh > 0 else (ay2 - ay1 + 4)
-    if side == 'right':
-        rx1 = ax2 + int(dx * form_w)
-        ry1 = ay1 + int(dy * form_h)
-    else:  # 'below'
-        rx1 = ax1 + int(dx * form_w)
-        ry1 = ay2 + int(dy * form_h)
-    rx2 = min(form_w, rx1 + data_w)
-    ry2 = min(form_h, ry1 + data_h)
-    if rx2 <= rx1 or ry2 <= ry1:
-        return None
-    pad = 3
-    return img.crop((max(0, rx1 - pad), max(0, ry1 - pad),
-                     min(form_w, rx2 + pad), min(form_h, ry2 + pad)))
-def _find_document_corners(gray: np.ndarray):
-    """
-    Try to find the 4 corners of the document in a grayscale scan.
-    Returns a (4,2) float32 array ordered [TL, TR, BR, BL], or None.
-    """
-    # Blur + threshold to isolate the white page against background
-    blur = _cv2.GaussianBlur(gray, (5, 5), 0)
     _, thresh = _cv2.threshold(blur, 0, 255, _cv2.THRESH_BINARY + _cv2.THRESH_OTSU)
-    # Find contours
-    contours, _ = _cv2.findContours(thresh, _cv2.RETR_EXTERNAL, _cv2.CHAIN_APPROX_SIMPLE)
     if not contours:
-        return None
-    # Take the largest contour — should be the document page
-    c = max(contours, key=_cv2.contourArea)
     area = _cv2.contourArea(c)
-    img_area = gray.shape[0] * gray.shape[1]
-    # Must cover at least 30% of the image
-    if area < 0.30 * img_area:
-        return None
-    # Approximate to a polygon
-    peri = _cv2.arcLength(c, True)
     approx = _cv2.approxPolyDP(c, 0.02 * peri, True)
     if len(approx) != 4:
         return None
-    pts = approx.reshape(4, 2).astype(np.float32)
-    # Order: TL, TR, BR, BL
-    s = pts.sum(axis=1)
-    d = np.diff(pts, axis=1)
-    ordered = np.array([
-        pts[np.argmin(s)],   # TL — smallest sum
-        pts[np.argmin(d)],   # TR — smallest diff
-        pts[np.argmax(s)],   # BR — largest sum
-        pts[np.argmax(d)],   # BL — largest diff
-    ], dtype=np.float32)
-    return ordered
-def _orb_align(scan_gray: np.ndarray, ref_gray: np.ndarray, scan_rgb: np.ndarray):
-    """
-    Align scan_rgb to ref_gray using ORB feature matching + RANSAC homography.
-    Returns (aligned_rgb, inlier_count) or (None, 0) on failure.
-    """
-    h, w = scan_gray.shape
     ref_resized = _cv2.resize(ref_gray, (w, h))
-    orb = _cv2.ORB_create(nfeatures=5000)
-    kp1, des1 = orb.detectAndCompute(scan_gray, None)
-    kp2, des2 = orb.detectAndCompute(ref_resized, None)
     if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
         return None, 0
     matcher = _cv2.BFMatcher(_cv2.NORM_HAMMING, crossCheck=True)
     matches = sorted(matcher.match(des1, des2), key=lambda m: m.distance)
-    # Keep only top 30% as good matches
-    good = matches[:max(10, len(matches) // 3)]
     if len(good) < 10:
         return None, 0
     src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
     dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
     M, mask = _cv2.estimateAffinePartial2D(
-        src_pts, dst_pts, method=_cv2.RANSAC, ransacReprojThreshold=5.0
-    )
     if M is None:
         return None, 0
     inliers = int(mask.sum()) if mask is not None else 0
-    print(f'[align] ORB homography: {inliers}/{len(good)} inliers')
-    aligned = _cv2.warpAffine(
-        scan_rgb, M, (w, h),
-        flags=_cv2.INTER_LINEAR,
-        borderMode=_cv2.BORDER_REPLICATE
-    )
     return aligned, inliers
 def _orb_inliers(scan_gray: np.ndarray, ref_gray: np.ndarray) -> int:
-    """
-    Count ORB RANSAC inliers between two grayscale images without warping.
-    Used by detect_form_type() to score form candidates.
-    """
-    orb = _cv2.ORB_create(nfeatures=3000)
     kp1, des1 = orb.detectAndCompute(scan_gray, None)
-    kp2, des2 = orb.detectAndCompute(ref_gray, None)
     if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
         return 0
     matcher = _cv2.BFMatcher(_cv2.NORM_HAMMING, crossCheck=True)
     matches = sorted(matcher.match(des1, des2), key=lambda m: m.distance)
-    good = matches[:max(10, len(matches) // 3)]
     if len(good) < 10:
         return 0
     src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
@@ -776,191 +392,72 @@ def _orb_inliers(scan_gray: np.ndarray, ref_gray: np.ndarray) -> int:
     return int(mask.sum()) if mask is not None else 0
-def _ecc_align(scan_gray: np.ndarray, ref_gray: np.ndarray, scan_rgb: np.ndarray):
     """
-    Align scan_rgb to ref_gray using ECC (Enhanced Correlation Coefficient).
-    Uses MOTION_EUCLIDEAN (translation + rotation) — more robust than ORB for
-    blurry, low-texture, or handwriting-heavy scans where keypoint matching fails.
-    Returns aligned RGB array or None on failure.
-    """
-    try:
-        h, w = ref_gray.shape
-        # Downscale to 500px for speed; scale translation back afterward
-        scale = min(1.0, 500.0 / max(h, w))
-        sh, sw = max(1, int(h * scale)), max(1, int(w * scale))
-        ref_s = _cv2.resize(ref_gray, (sw, sh))
-        scn_s = _cv2.resize(_cv2.resize(scan_gray, (w, h)), (sw, sh))
-        warp = np.eye(2, 3, dtype=np.float32)
-        criteria = (_cv2.TERM_CRITERIA_EPS | _cv2.TERM_CRITERIA_COUNT, 100, 1e-4)
-        cc, warp = _cv2.findTransformECC(ref_s, scn_s, warp, _cv2.MOTION_EUCLIDEAN, criteria)
-        # Clamp rotation to ±3° to prevent over-tilting
-        angle_rad = np.arctan2(warp[1, 0], warp[0, 0])
-        angle_deg = np.degrees(angle_rad)
-        MAX_ANGLE = 1.0
-        if abs(angle_deg) > MAX_ANGLE:
-            clamped = np.radians(np.clip(angle_deg, -MAX_ANGLE, MAX_ANGLE))
-            warp[0, 0] =  np.cos(clamped)
-            warp[0, 1] = -np.sin(clamped)
-            warp[1, 0] =  np.sin(clamped)
-            warp[1, 1] =  np.cos(clamped)
-            print(f'[align] ECC rotation clamped {angle_deg:.2f}° -> {np.degrees(clamped):.2f}°')
-        # Scale translation to full resolution
-        warp[0, 2] /= scale
-        warp[1, 2] /= scale
-        scan_full = _cv2.resize(scan_rgb, (w, h))
-        aligned = _cv2.warpAffine(
-            scan_full, warp, (w, h),
-            flags=_cv2.INTER_LINEAR,
-            borderMode=_cv2.BORDER_REPLICATE
-        )
-        print(f'[align] ECC applied (cc={cc:.4f} angle={angle_deg:.2f}° tx={warp[0,2]:.1f} ty={warp[1,2]:.1f})')
-        return aligned
-    except Exception as e:
-        print(f'[align] ECC failed: {e}')
-        return None
-def align_to_reference(img: Image.Image, form_type: str):
-    """
-    Align a scanned form to its clean reference using a three-stage cascade:
-    Stage 1 — ORB feature matching + RANSAC homography (primary).
-      Matches structural features (printed lines, boxes, text layout).
-      Most accurate when the scan has reasonable contrast/sharpness.
-      Returns high confidence (inlier count) used to decide if anchor
-      scan is needed in extract_fields().
-    Stage 2 — ECC (Enhanced Correlation Coefficient) EUCLIDEAN.
-      Good for blurry / low-texture / handwriting-heavy scans where ORB
-      finds too few keypoints.  Corrects translation + rotation only.
-    Stage 3 — Corner perspective correction (fallback).
-      Finds document corners via contour detection.  Only works when the
-      page is visible against a background.
-    Stage 4 — Resize only (last resort).
-    Returns (aligned_image, orb_inliers) where orb_inliers=0 means ORB
-    did not succeed (ECC/corner/resize was used instead).
     """
     if not _CV2_OK:
         return img, 0
     ref_path = REFERENCE_IMAGES.get(form_type)
     if not ref_path or not os.path.exists(ref_path):
-        print(f'[align] No reference for form {form_type} at {ref_path}')
         return img, 0
     ref_gray = _cv2.imread(ref_path, _cv2.IMREAD_GRAYSCALE)
     if ref_gray is None:
         return img, 0
-    scan_rgb  = np.array(img.convert('RGB'))
-    scan_gray = _cv2.cvtColor(scan_rgb, _cv2.COLOR_RGB2GRAY)
     ref_h, ref_w = ref_gray.shape
-    scan_gray_rs = _cv2.resize(scan_gray, (ref_w, ref_h))
-    scan_rgb_rs  = _cv2.resize(scan_rgb,  (ref_w, ref_h))
-    # ── Stage 1: ECC (translation + rotation only — no distortion) ────
     print(f'[align] Form {form_type}: trying ECC...')
     aligned = _ecc_align(scan_gray_rs, ref_gray, scan_rgb_rs)
     if aligned is not None:
-        print(f'[align] Form {form_type}: ECC alignment applied')
-        return Image.fromarray(aligned), 25  # return 25 so anchor scan is skipped
-    # ── Stage 2: ORB (fallback if ECC fails) ──────────────────
     print(f'[align] Form {form_type}: ECC failed, trying ORB...')
     aligned, inliers = _orb_align(scan_gray_rs, ref_gray, scan_rgb_rs)
     if aligned is not None:
-        print(f'[align] Form {form_type}: ORB applied ({inliers} inliers)')
         return Image.fromarray(aligned), inliers
-    # ── Stage 3: corner perspective correction ────────────────
-    print(f'[align] Form {form_type}: ECC failed, trying corner detection...')
-    corners = _find_document_corners(scan_gray)
-    if corners is not None:
-        dst_corners = np.array([
-            [0,     0    ],
-            [ref_w, 0    ],
-            [ref_w, ref_h],
-            [0,     ref_h],
-        ], dtype=np.float32)
-        M = _cv2.getPerspectiveTransform(corners, dst_corners)
-        warped = _cv2.warpPerspective(
-            scan_rgb, M, (ref_w, ref_h),
-            flags=_cv2.INTER_LINEAR,
-            borderMode=_cv2.BORDER_REPLICATE
-        )
-        print(f'[align] Form {form_type}: perspective correction applied')
-        return Image.fromarray(warped), 0
-    # ── Stage 4: resize only ──────────────────────────────────
-    print(f'[align] Form {form_type}: all alignment methods failed, resizing only')
     resized = _cv2.resize(scan_rgb, (ref_w, ref_h))
     return Image.fromarray(resized), 0
 def _deskew(gray: np.ndarray) -> np.ndarray:
-    """Correct slight rotation using Hough line detection."""
     if not _CV2_OK:
         return gray
     edges = _cv2.Canny(gray, 50, 150, apertureSize=3)
-    lines = _cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100,
                               minLineLength=100, maxLineGap=10)
-    if lines is None or len(lines) == 0:
         return gray
-    angles = []
-    for x1, y1, x2, y2 in lines[:, 0]:
-        angle = np.degrees(np.arctan2(y2 - y1, x2 - x1))
-        if -15 < angle < 15:
-            angles.append(angle)
     if not angles:
         return gray
-    median_angle = float(np.median(angles))
-    if abs(median_angle) < 0.3:
         return gray
     h, w = gray.shape
-    M = _cv2.getRotationMatrix2D((w / 2, h / 2), median_angle, 1.0)
     return _cv2.warpAffine(gray, M, (w, h),
                            flags=_cv2.INTER_CUBIC,
                            borderMode=_cv2.BORDER_REPLICATE)
-def _enhance_for_ocr(gray: np.ndarray) -> np.ndarray:
-    """CLAHE contrast enhancement + gentle denoising."""
-    if not _CV2_OK:
-        return gray
-    clahe = _cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-    enhanced = clahe.apply(gray)
-    denoised = _cv2.fastNlMeansDenoising(enhanced, h=10,
-                                          templateWindowSize=7,
-                                          searchWindowSize=21)
-    return denoised
-def _binarize(gray: np.ndarray) -> np.ndarray:
-    """Adaptive threshold — cleaner black-on-white for OCR."""
-    if not _CV2_OK:
-        return gray
-    return _cv2.adaptiveThreshold(gray, 255,
-                                   _cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                   _cv2.THRESH_BINARY, 11, 2)
 def _preprocess(img: Image.Image) -> Image.Image:
-    """
-    Prepare the full form image for field cropping:
-      - Convert to grayscale
-      - Deskew (correct residual rotation after ORB alignment)
-    CLAHE and denoising are applied later per-crop in _ocr(), where
-    they are more effective and don't risk blurring the whole form.
-    """
     if not _CV2_OK:
         return img.convert('L')
     gray = np.array(img.convert('L'))
@@ -969,63 +466,29 @@ def _preprocess(img: Image.Image) -> Image.Image:
 def _crop_field(img: Image.Image, x1r, y1r, x2r, y2r) -> Image.Image:
-    """Crop a field region using relative coordinates."""
     w, h = img.size
-    x1 = int(x1r * w); y1 = int(y1r * h)
-    x2 = int(x2r * w); y2 = int(y2r * h)
-    # Add small padding for OCR accuracy
-    pad = 4
-    x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
-    x2 = min(w, x2 + pad); y2 = min(h, y2 + pad)
     return img.crop((x1, y1, x2, y2))
-def _ocr(crop: Image.Image, config: str = '') -> str:
-    """Run TrOCR large-handwritten on a cropped field image."""
-    processor, model = _get_trocr()
-    if processor is None or model is None:
-        return ''
-    try:
-        import torch
-        rgb = crop.convert('RGB')
-        pixel_values = processor(rgb, return_tensors='pt').pixel_values
-        with torch.no_grad():
-            generated_ids = model.generate(pixel_values)
-        return processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-    except Exception as e:
-        print(f'[template_matcher] OCR error: {e}')
-        return ''
 def detect_form_type(image_path: str) -> str:
-    """
-    Auto-detect form type from a scanned image.
-    Primary — ORB inlier count:
-      Resize the scan to each reference's dimensions, run ORB feature
-      matching against all 4 reference images, and pick the form type
-      with the most RANSAC inliers.  Robust to rotation, brightness
-      differences, and partial fills because it matches structural
-      features (printed lines, boxes, column layout) rather than title
-      text.  Works at ~800px for speed.
-    Fallback — OCR title:
-      Used when no reference images exist or cv2 is unavailable.
-      Less reliable for rotated / faint / cropped scans.
-    Returns '102', '103', '90', or '97'.
-    """
     if _CV2_OK:
         try:
-            img = Image.open(image_path).convert('RGB')
             scan_rgb  = np.array(img)
             scan_gray = _cv2.cvtColor(scan_rgb, _cv2.COLOR_RGB2GRAY)
-            best_type    = None
-            best_inliers = 0
-            DET_W        = 800  # detection width — fast enough, enough detail
             for ft, ref_path in REFERENCE_IMAGES.items():
                 if not os.path.exists(ref_path):
                     continue
@@ -1033,56 +496,50 @@ def detect_form_type(image_path: str) -> str:
                 if ref_gray is None:
                     continue
                 ref_h, ref_w = ref_gray.shape
-                # Resize scan to reference aspect, then both to DET_W
                 sc = min(1.0, DET_W / ref_w)
-                dw, dh = max(1, int(ref_w * sc)), max(1, int(ref_h * sc))
-                ref_ds  = _cv2.resize(ref_gray, (dw, dh))
-                scan_ds = _cv2.resize(_cv2.resize(scan_gray, (ref_w, ref_h)), (dw, dh))
-                count = _orb_inliers(scan_ds, ref_ds)
                 print(f'[detect] Form {ft}: {count} ORB inliers')
                 if count > best_inliers:
-                    best_inliers = count
-                    best_type    = ft
             if best_type and best_inliers >= 15:
-                print(f'[detect] Best match: Form {best_type} ({best_inliers} inliers)')
                 return best_type
-            print(f'[detect] ORB inconclusive (best={best_inliers}), falling back to OCR title')
         except Exception as e:
             print(f'[template_matcher] detect_form_type ORB error: {e}')
-    # ── OCR title fallback ────────────────────────────────────
     try:
         img_l = Image.open(image_path).convert('L')
         w, h  = img_l.size
-        title_crop = img_l.crop((0, int(h * 0.04), w, int(h * 0.15)))
-        title = _ocr(title_crop).upper()
         if title:
             if 'LIVE BIRTH' in title or ('BIRTH' in title
                     and 'DEATH' not in title and 'MARRIAGE' not in title):
                 return '102'
-            elif 'DEATH' in title:
                 return '103'
-            elif 'MARRIAGE' in title and 'LICENSE' in title:
                 return '90'
-            elif 'MARRIAGE' in title:
                 return '97'
-        print(f'[template_matcher] Could not detect form type; defaulting to 102. '
-              f'Title: {title[:80] if title else "(empty)"}')
     except Exception as e:
         print(f'[template_matcher] detect_form_type OCR error: {e}')
     return '102'
 def extract_fields(image_path: str, form_type: str) -> dict:
     """
-    Main entry point.
     Args:
-        image_path: Path to the uploaded form image (PNG/JPG/PDF page)
-        form_type:  '102', '103', '90', or '97'
     Returns:
         dict of { field_name: extracted_text }
@@ -1091,168 +548,135 @@ def extract_fields(image_path: str, form_type: str) -> dict:
     if template is None:
         print(f'[template_matcher] No template for form type: {form_type}')
         return {}
-    if _get_trocr()[0] is None:
-        print('[template_matcher] TrOCR not available — returning empty fields')
         return {}
-    # Load and preprocess
     try:
         img = Image.open(image_path).convert('RGB')
     except Exception as e:
         print(f'[template_matcher] Cannot open image: {e}')
         return {}
-    # Align to reference before cropping (fixes scan offset/rotation)
-    # orb_inliers > 0 means ORB succeeded — absolute coords are reliable.
     img, orb_inliers = align_to_reference(img, form_type)
-    processed = _preprocess(img)
-    # ── One-time full-form scan for anchor detection ──────────
-    # When ORB aligned with high confidence (inliers >= 25), absolute
-    # coordinates are accurate and the expensive full-page OCR scan can
-    # be skipped.  Below that threshold, anchors improve robustness.
-    anchor_defs = ANCHOR_TEMPLATES.get(form_type, {})
-    detections  = []
-    if anchor_defs and orb_inliers < 25:
-        print(f'[template_matcher] ORB inliers={orb_inliers} — scanning form for anchors...')
-        detections = _scan_form_text(img)  # use colour/original for label scan
-        print(f'[template_matcher] Found {len(detections)} text regions in form')
-    elif anchor_defs:
-        print(f'[template_matcher] ORB inliers={orb_inliers} >= 25 — skipping anchor scan')
     form_w, form_h = img.size
-    anchor_hits = 0
-    # ── Collect all crops first, then batch-infer in one pass ─
-    field_names = []
-    crops       = []
     for field_name, coords in template.items():
-        crop = None
-        # ── Try anchor-based crop first ───────────────────────
-        adef = anchor_defs.get(field_name)
-        if adef and detections:
-            bbox = _find_anchor_bbox(
-                detections, adef['labels'], adef['search'], form_w, form_h
-            )
-            if bbox is not None:
-                crop = _crop_from_anchor(
-                    processed, bbox,
-                    adef['side'], adef['dx'], adef['dy'],
-                    adef['dw'], adef['dh']
-                )
-                if crop is not None:
-                    anchor_hits += 1
-        # ── Fallback: absolute coordinate crop ────────────────
-        if crop is None:
-            x1r, y1r, x2r, y2r, cfg = coords
-            crop = _crop_field(processed, x1r, y1r, x2r, y2r)
         field_names.append(field_name)
         crops.append(crop)
-    fields = {}
-    for field_name, crop in zip(field_names, crops):
         text = _postprocess(_ocr(crop), field_name)
         if text:
             fields[field_name] = text
-    if anchor_defs:
-        print(f'[template_matcher] Anchor hits: {anchor_hits}/{len(anchor_defs)} defined')
-    print(f'[template_matcher] Extracted {len(fields)}/{len(template)} fields from form {form_type}')
     return fields
-def pdf_to_image(pdf_path: str, page: int = 0) -> str:
-    """
-    Convert a PDF page to a PNG image for processing.
-    Returns path to the saved PNG, or None on failure.
-    Requires: pip install pdf2image  +  poppler installed
-    """
-    try:
-        from pdf2image import convert_from_path
-        pages = convert_from_path(pdf_path, dpi=150)
-        if not pages:
-            return None
-        out_path = pdf_path.replace('.pdf', f'_page{page}.png')
-        pages[page].save(out_path, 'PNG')
-        return out_path
-    except ImportError:
-        print('[template_matcher] pdf2image not installed. Run: pip install pdf2image')
-        return None
-    except Exception as e:
-        print(f'[template_matcher] PDF conversion failed: {e}')
-        return None
-def debug_draw_boxes(image_path: str, form_type: str, out_path: str = None):
     """
-    Draw all field bounding boxes on the ALIGNED image and save it.
-    Uses the same alignment (ORB → corner → resize) as extract_fields(),
-    so the boxes reflect where coordinates actually land during extraction.
-    Usage: python template_matcher.py myform.png 102
     """
     from PIL import ImageDraw, ImageFont
     template = TEMPLATES.get(form_type)
     if not template:
         print(f'No template for {form_type}')
-        return
-    img  = Image.open(image_path).convert('RGB')
-    img, _ = align_to_reference(img, form_type)   # ← align first
-    draw = ImageDraw.Draw(img)
     w, h = img.size
     try:
-        font = ImageFont.truetype('C:/Windows/Fonts/arial.ttf', 12)
-    except:
-        font = ImageFont.load_default()
-    colors = ['#e53935','#1e88e5','#43a047','#fb8c00','#8e24aa','#00acc1']
-    for idx, (field_name, coords) in enumerate(template.items()):
         x1r, y1r, x2r, y2r, _ = coords
-        x1 = int(x1r * w); y1 = int(y1r * h)
-        x2 = int(x2r * w); y2 = int(y2r * h)
-        color = colors[idx % len(colors)]
-        draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
     base, ext = os.path.splitext(image_path)
     out = out_path or f'{base}_debug_{form_type}{ext}'
     img.save(out)
     print(f'[template_matcher] Debug image saved: {out}')
     return out
-# ── CLI ────────────────────────────────────────────────────────
 if __name__ == '__main__':
     if len(sys.argv) < 3:
-        print('Usage: python template_matcher.py <image_path> <form_type>')
         print('       form_type: 102 | 103 | 90 | 97')
-        print('Example: python template_matcher.py form102_sample1.png 102')
         sys.exit(1)
     img_path  = sys.argv[1]
     form_type = sys.argv[2]
-    # Draw boxes on the aligned image (same as what extraction sees)
-    out = debug_draw_boxes(img_path, form_type)
-    print(f'Open {out} to verify box positions on the aligned image.\n')
-    # Extract and print fields
     result = extract_fields(img_path, form_type)
-    print(f'\nExtracted fields ({len(result)}):')
     for k, v in result.items():
-        print(f'  {k:<35} = {v}')
-    # Show which fields got nothing
     template = TEMPLATES.get(form_type, {})
-    missing = [k for k in template if k not in result]
     if missing:
         print(f'\nEmpty fields ({len(missing)}):')
         for k in missing:
-            print(f'  {k}')

 """
+template_matcher.py  (v3 — pytesseract removed)
+================================================
+Extracts field values from Philippine civil registry scanned forms.
+WHAT CHANGED FROM v2
+---------------------
+1. pytesseract removed entirely.
+2. _scan_form_text() now uses CV2 contour/MSER detection to find
+   candidate text regions, then reads each region with TrOCR
+   (the same model already loaded for field OCR).
+3. Anchor label matching (fuzzy SequenceMatcher) unchanged.
+4. No new dependencies — everything already required by the project.
 """
+import os
+import sys
+import re as _re
 import numpy as np
 from PIL import Image
 except ImportError:
     _CV2_OK = False
+# ── Reference images ─────────────────────────────────────────────
 _REF_DIR = os.path.join(os.path.dirname(__file__), 'references')
 REFERENCE_IMAGES = {
+    '102': os.path.join(_REF_DIR, 'reference_102.jpg'),
     '103': os.path.join(_REF_DIR, 'reference_103.png'),
+    '90':  os.path.join(_REF_DIR, 'reference_90.png'),
+    '97':  os.path.join(_REF_DIR, 'reference_97.jpg'),
 }
+# ── CRNN+CTC engine ──────────────────────────────────────────────
+import sys as _sys
+_CRNN_DIR = os.path.join(os.path.dirname(__file__), 'CRNN+CTC')
+if _CRNN_DIR not in _sys.path:
+    _sys.path.insert(0, _CRNN_DIR)
+_CRNN_CHECKPOINT = os.path.join(_CRNN_DIR, 'checkpoints', 'best_model.pth')
+_crnn_ocr    = None
+_crnn_decode = None   # reference to decode_ctc_predictions
+def _get_crnn():
+    global _crnn_ocr, _crnn_decode
+    if _crnn_ocr is None:
         try:
             import torch
+            from inference import CivilRegistryOCR
+            from utils import decode_ctc_predictions as _dcp
+            print('[template_matcher] Loading CRNN+CTC model...')
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            _crnn_ocr    = CivilRegistryOCR(
+                checkpoint_path=_CRNN_CHECKPOINT,
+                device=device,
+                mode='adaptive',
+            )
+            _crnn_decode = _dcp
+            print('[template_matcher] CRNN+CTC ready.')
         except Exception as e:
+            print(f'[template_matcher] CRNN+CTC load error: {e}')
+    return _crnn_ocr
+def _crnn_read(crop_img: Image.Image) -> str:
+    """Run CRNN+CTC on a PIL Image crop and return the decoded string."""
+    ocr = _get_crnn()
+    if ocr is None or _crnn_decode is None:
+        return ''
+    try:
+        import torch
+        rgb = np.array(crop_img.convert('RGB'))
+        bgr = rgb[:, :, ::-1].copy()
+        normalized = ocr.normalizer.normalize(bgr)
+        tensor = torch.FloatTensor(
+            normalized.astype(np.float32) / 255.0
+        ).unsqueeze(0).unsqueeze(0).to(ocr.device)
+        with torch.no_grad():
+            outputs = ocr.model(tensor)
+        decoded = _crnn_decode(outputs.cpu(), ocr.idx_to_char, method='greedy')
+        return decoded[0].strip()
+    except Exception as e:
+        print(f'[template_matcher] CRNN+CTC read error: {e}')
+        return ''
+# Hint constants (kept for template dict compatibility)
 _LINE  = 'line'
 _BLOCK = 'block'
 _WORD  = 'word'
+# ── Post-processing ───────────────────────────────────────────────
 _SEX_KEYWORDS = {
     'female': 'FEMALE', 'fem': 'FEMALE', 'f': 'FEMALE',
+    'male':   'MALE',   'm':  'MALE',
 }
 _FIELD_TYPE = {
     'sex': 'sex', 'groom_sex': 'sex', 'bride_sex': 'sex',
     'husband_sex': 'sex', 'wife_sex': 'sex',
     'dob_year': 'year',
     'age': 'digits', 'groom_age': 'digits', 'bride_age': 'digits',
     'husband_age': 'digits', 'wife_age': 'digits', 'dob_day': 'digits',
     'registration_date': 'date', 'marriage_date': 'date',
+    'date_of_marriage': 'date', 'date_of_death': 'date',
+    'date_of_birth': 'date', 'date_issued': 'date',
     'groom_dob': 'date', 'bride_dob': 'date',
     'husband_dob': 'date', 'wife_dob': 'date',
     'registry_no': 'registry', 'marriage_license_no': 'registry',
 }
 def _postprocess(text: str, field_name: str) -> str:
     text = text.strip()
     if not text:
         return text
     rule = _FIELD_TYPE.get(field_name)
     if rule == 'sex':
         tl = text.lower()
         for kw in sorted(_SEX_KEYWORDS, key=len, reverse=True):
             if kw in tl:
                 return _SEX_KEYWORDS[kw]
         return text
     if rule == 'year':
         m = _re.search(r'(19|20)\d{2}', text)
         if m:
             return m.group(0)
         digits = _re.sub(r'\D', '', text)
         return digits[:4] if len(digits) >= 4 else text
     if rule == 'digits':
         d = _re.sub(r'\D', '', text)
         return d if d else text
     if rule == 'date':
         return _re.sub(r'[^\w\s\-/,.]', '', text).strip()
     if rule == 'registry':
         return _re.sub(r'[^\w\s\-/]', '', text).strip()
     return text
+# ── Absolute-coordinate templates ─────────────────────────────────
+# (x1, y1, x2, y2, hint) — all values are fractions 0.0–1.0
 TEMPLATES = {
+    '102': {
+        'province':             (0.183, 0.110, 0.582, 0.128, _LINE),
+        'registry_no':          (0.617, 0.121, 0.900, 0.149, _LINE),
+        'city_municipality':    (0.224, 0.134, 0.631, 0.150, _LINE),
         'name_first':           (0.169, 0.161, 0.453, 0.181, _LINE),
         'name_middle':          (0.450, 0.161, 0.674, 0.181, _LINE),
+        'name_last':            (0.674, 0.162, 0.935, 0.182, _LINE),
+        'sex':                  (0.126, 0.195, 0.335, 0.210, _WORD),
+        'dob_day':              (0.445, 0.193, 0.562, 0.210, _WORD),
+        'dob_month':            (0.560, 0.193, 0.731, 0.211, _LINE),
+        'dob_year':             (0.735, 0.197, 0.883, 0.213, _WORD),
+        'place_of_birth':       (0.383, 0.227, 0.890, 0.245, _LINE),
+        'type_of_birth':        (0.124, 0.263, 0.328, 0.282, _WORD),
+        'birth_order':          (0.542, 0.272, 0.742, 0.285, _WORD),
+        'weight_at_birth':      (0.757, 0.258, 0.839, 0.287, _WORD),
+        'mother_name':          (0.217, 0.299, 0.894, 0.320, _LINE),
+        'mother_citizenship':   (0.125, 0.329, 0.506, 0.351, _LINE),
+        'mother_religion':      (0.508, 0.332, 0.901, 0.351, _LINE),
+        'mother_occupation':    (0.511, 0.363, 0.750, 0.385, _LINE),
+        'mother_age_at_birth':  (0.758, 0.371, 0.888, 0.390, _WORD),
+        'mother_residence':     (0.211, 0.405, 0.936, 0.425, _LINE),
+        'father_name':          (0.200, 0.436, 0.894, 0.456, _LINE),
+        'father_citizenship':   (0.128, 0.465, 0.318, 0.487, _LINE),
+        'father_religion':      (0.328, 0.467, 0.550, 0.490, _LINE),
+        'father_occupation':    (0.543, 0.466, 0.754, 0.496, _LINE),
+        'father_age_at_birth':  (0.752, 0.476, 0.902, 0.496, _WORD),
+        'father_residence':     (0.216, 0.508, 0.949, 0.527, _LINE),
+        'marriage_date':        (0.092, 0.556, 0.413, 0.573, _LINE),
+        'marriage_place':       (0.400, 0.554, 0.922, 0.571, _LINE),
+        'registration_date':    (0.635, 0.717, 0.919, 0.736, _LINE),
     },
     '103': {
+        'province':           (0.182, 0.076, 0.581, 0.094, _LINE),
+        'registry_no':        (0.649, 0.088, 0.937, 0.123, _LINE),
+        'city_municipality':  (0.222, 0.097, 0.629, 0.113, _LINE),
+        'deceased_name':      (0.105, 0.139, 0.739, 0.173, _LINE),
+        'sex':                (0.735, 0.137, 0.931, 0.170, _WORD),
+        'date_of_death':      (0.123, 0.189, 0.316, 0.216, _LINE),
+        'date_of_birth':      (0.319, 0.187, 0.567, 0.214, _LINE),
+        'age':                (0.573, 0.198, 0.717, 0.214, _WORD),
+        'place_of_death':     (0.096, 0.227, 0.727, 0.251, _LINE),
+        'civil_status':       (0.709, 0.233, 0.935, 0.257, _WORD),
+        'religion':           (0.092, 0.268, 0.324, 0.295, _LINE),
+        'citizenship':        (0.324, 0.270, 0.522, 0.295, _LINE),
+        'residence':          (0.519, 0.271, 0.936, 0.297, _LINE),
+        'occupation':         (0.095, 0.311, 0.292, 0.330, _LINE),
+        'father_name':        (0.295, 0.306, 0.614, 0.334, _LINE),
+        'mother_name':        (0.615, 0.312, 0.938, 0.332, _LINE),
+        'cause_immediate':    (0.312, 0.372, 0.961, 0.384, _LINE),
+        'cause_antecedent':   (0.320, 0.383, 0.973, 0.402, _LINE),
+        'cause_underlying':   (0.311, 0.406, 0.839, 0.424, _LINE),
+        'registration_date':  (0.635, 0.717, 0.919, 0.736, _LINE),
     },
     '90': {
+        'province':                (0.208, 0.099, 0.607, 0.117, _LINE),
+        'registry_no':             (0.641, 0.104, 0.924, 0.132, _LINE),
+        'city_municipality':       (0.231, 0.113, 0.638, 0.129, _LINE),
+        'marriage_license_no':     (0.673, 0.132, 0.928, 0.150, _LINE),
+        'date_issued':             (0.775, 0.150, 0.932, 0.168, _LINE),
+        'groom_name_first':        (0.170, 0.294, 0.483, 0.309, _LINE),
+        'groom_name_middle':       (0.176, 0.308, 0.485, 0.320, _LINE),
+        'groom_name_last':         (0.174, 0.319, 0.486, 0.333, _LINE),
+        'bride_name_first':        (0.622, 0.292, 0.937, 0.306, _LINE),
+        'bride_name_middle':       (0.622, 0.306, 0.928, 0.319, _LINE),
+        'bride_name_last':         (0.621, 0.319, 0.929, 0.334, _LINE),
+        'groom_dob':               (0.152, 0.348, 0.394, 0.369, _LINE),
+        'groom_age':               (0.400, 0.345, 0.474, 0.371, _WORD),
+        'bride_dob':               (0.576, 0.345, 0.853, 0.365, _LINE),
+        'bride_age':               (0.851, 0.346, 0.932, 0.369, _WORD),
+        'groom_place_of_birth':    (0.136, 0.371, 0.472, 0.400, _LINE),
+        'bride_place_of_birth':    (0.585, 0.377, 0.921, 0.400, _LINE),
+        'groom_sex':               (0.135, 0.408, 0.267, 0.425, _WORD),
+        'groom_citizenship':       (0.268, 0.407, 0.477, 0.425, _LINE),
+        'bride_sex':               (0.574, 0.408, 0.708, 0.424, _WORD),
+        'bride_citizenship':       (0.720, 0.408, 0.917, 0.427, _LINE),
+        'groom_residence':         (0.140, 0.436, 0.472, 0.463, _LINE),
+        'bride_residence':         (0.577, 0.434, 0.922, 0.463, _LINE),
+        'groom_religion':          (0.135, 0.465, 0.472, 0.494, _LINE),
+        'bride_religion':          (0.584, 0.463, 0.920, 0.486, _LINE),
+        'groom_civil_status':      (0.135, 0.492, 0.471, 0.517, _WORD),
+        'bride_civil_status':      (0.585, 0.491, 0.924, 0.513, _WORD),
+        'groom_father_name':       (0.133, 0.647, 0.477, 0.672, _LINE),
+        'groom_father_citizenship':(0.141, 0.669, 0.475, 0.695, _LINE),
+        'bride_father_name':       (0.580, 0.646, 0.923, 0.666, _LINE),
+        'bride_father_citizenship':(0.578, 0.667, 0.916, 0.689, _LINE),
+        'groom_mother_name':       (0.139, 0.733, 0.474, 0.762, _LINE),
+        'groom_mother_citizenship':(0.135, 0.763, 0.480, 0.779, _LINE),
+        'bride_mother_name':       (0.584, 0.736, 0.914, 0.758, _LINE),
+        'bride_mother_citizenship':(0.579, 0.758, 0.924, 0.780, _LINE),
     },
     '97': {
+        'province':                    (0.196, 0.093, 0.595, 0.111, _LINE),
+        'registry_no':                 (0.771, 0.095, 0.969, 0.130, _LINE),
+        'city_municipality':           (0.197, 0.119, 0.604, 0.135, _LINE),
+        'husband_name_first':          (0.257, 0.158, 0.572, 0.173, _LINE),
+        'husband_name_middle':         (0.251, 0.180, 0.562, 0.192, _LINE),
+        'husband_name_last':           (0.254, 0.201, 0.576, 0.216, _LINE),
+        'wife_name_first':             (0.649, 0.158, 0.953, 0.171, _LINE),
+        'wife_name_middle':            (0.649, 0.180, 0.950, 0.195, _LINE),
+        'wife_name_last':              (0.651, 0.202, 0.968, 0.214, _LINE),
+        'husband_dob':                 (0.205, 0.231, 0.493, 0.248, _LINE),
+        'husband_age':                 (0.500, 0.233, 0.557, 0.249, _WORD),
+        'wife_dob':                    (0.603, 0.234, 0.889, 0.245, _LINE),
+        'wife_age':                    (0.901, 0.233, 0.961, 0.247, _WORD),
+        'husband_place_of_birth':      (0.193, 0.262, 0.573, 0.278, _LINE),
+        'wife_place_of_birth':         (0.595, 0.263, 0.963, 0.279, _LINE),
+        'husband_sex':                 (0.221, 0.288, 0.309, 0.308, _WORD),
+        'wife_sex':                    (0.616, 0.285, 0.711, 0.305, _WORD),
+        'husband_citizenship':         (0.323, 0.295, 0.567, 0.312, _LINE),
+        'wife_citizenship':            (0.722, 0.296, 0.963, 0.313, _LINE),
+        'husband_residence':           (0.190, 0.325, 0.563, 0.362, _LINE),
+        'wife_residence':              (0.590, 0.326, 0.961, 0.361, _LINE),
+        'husband_religion':            (0.190, 0.366, 0.567, 0.383, _LINE),
+        'wife_religion':               (0.582, 0.362, 0.959, 0.383, _LINE),
+        'husband_civil_status':        (0.189, 0.397, 0.572, 0.415, _WORD),
+        'wife_civil_status':           (0.588, 0.398, 0.956, 0.414, _WORD),
+        'husband_father_name':         (0.191, 0.428, 0.574, 0.445, _LINE),
+        'wife_father_name':            (0.586, 0.429, 0.958, 0.446, _LINE),
+        'husband_father_citizenship':  (0.184, 0.451, 0.569, 0.467, _LINE),
+        'wife_father_citizenship':     (0.588, 0.449, 0.947, 0.465, _LINE),
+        'husband_mother_name':         (0.176, 0.481, 0.563, 0.498, _LINE),
+        'wife_mother_name':            (0.586, 0.480, 0.940, 0.497, _LINE),
+        'husband_mother_citizenship':  (0.191, 0.501, 0.573, 0.517, _LINE),
+        'wife_mother_citizenship':     (0.590, 0.501, 0.971, 0.517, _LINE),
+        'place_of_marriage':           (0.196, 0.650, 0.958, 0.664, _LINE),
+        'date_of_marriage':            (0.199, 0.678, 0.548, 0.692, _LINE),
+        'time_of_marriage':            (0.765, 0.680, 0.917, 0.696, _LINE),
+        'registration_date':           (0.635, 0.717, 0.919, 0.736, _LINE),
     },
 }
+# ── Alignment helpers ─────────────────────────────────────────────
+def _order_corners(pts: np.ndarray) -> np.ndarray:
+    s = pts.sum(axis=1)
+    d = np.diff(pts, axis=1).flatten()
+    return np.array([
+        pts[np.argmin(s)],
+        pts[np.argmin(d)],
+        pts[np.argmax(s)],
+        pts[np.argmax(d)],
+    ], dtype=np.float32)
+def _correct_perspective(scan_rgb: np.ndarray, ref_w: int, ref_h: int) -> np.ndarray:
+    if not _CV2_OK:
+        return scan_rgb
+    gray   = _cv2.cvtColor(scan_rgb, _cv2.COLOR_RGB2GRAY)
+    kernel = _cv2.getStructuringElement(_cv2.MORPH_RECT, (5, 5))
+    blur   = _cv2.GaussianBlur(gray, (7, 7), 0)
     _, thresh = _cv2.threshold(blur, 0, 255, _cv2.THRESH_BINARY + _cv2.THRESH_OTSU)
+    dilated   = _cv2.dilate(thresh, kernel, iterations=2)
+    contours, _ = _cv2.findContours(dilated, _cv2.RETR_EXTERNAL, _cv2.CHAIN_APPROX_SIMPLE)
     if not contours:
+        return scan_rgb
+    c    = max(contours, key=_cv2.contourArea)
     area = _cv2.contourArea(c)
+    if area < 0.30 * gray.shape[0] * gray.shape[1]:
+        print('[align] perspective: contour too small, skipping')
+        return scan_rgb
+    peri   = _cv2.arcLength(c, True)
     approx = _cv2.approxPolyDP(c, 0.02 * peri, True)
     if len(approx) != 4:
+        print(f'[align] perspective: {len(approx)} corners (need 4), skipping')
+        return scan_rgb
+    src = _order_corners(approx.reshape(4, 2).astype(np.float32))
+    dst = np.array([[0, 0], [ref_w, 0], [ref_w, ref_h], [0, ref_h]], np.float32)
+    M   = _cv2.getPerspectiveTransform(src, dst)
+    warped = _cv2.warpPerspective(
+        scan_rgb, M, (ref_w, ref_h),
+        flags=_cv2.INTER_LINEAR, borderMode=_cv2.BORDER_REPLICATE)
+    print('[align] perspective correction applied')
+    return warped
+def _ecc_align(scan_gray: np.ndarray, ref_gray: np.ndarray,
+               scan_rgb: np.ndarray) -> np.ndarray | None:
+    try:
+        h, w   = ref_gray.shape
+        scale  = min(1.0, 500.0 / max(h, w))
+        sh, sw = max(1, int(h * scale)), max(1, int(w * scale))
+        ref_s  = _cv2.resize(ref_gray, (sw, sh))
+        scn_s  = _cv2.resize(_cv2.resize(scan_gray, (w, h)), (sw, sh))
+        warp   = np.eye(2, 3, dtype=np.float32)
+        criteria = (_cv2.TERM_CRITERIA_EPS | _cv2.TERM_CRITERIA_COUNT, 100, 1e-4)
+        cc, warp = _cv2.findTransformECC(ref_s, scn_s, warp, _cv2.MOTION_EUCLIDEAN, criteria)
+        angle  = np.degrees(np.arctan2(warp[1, 0], warp[0, 0]))
+        if abs(angle) > 1.0:
+            clamped = np.radians(np.clip(angle, -1.0, 1.0))
+            warp[0, 0] =  np.cos(clamped); warp[0, 1] = -np.sin(clamped)
+            warp[1, 0] =  np.sin(clamped); warp[1, 1] =  np.cos(clamped)
+        warp[0, 2] /= scale; warp[1, 2] /= scale
+        scan_full = _cv2.resize(scan_rgb, (w, h))
+        aligned   = _cv2.warpAffine(scan_full, warp, (w, h),
+                                     flags=_cv2.INTER_LINEAR,
+                                     borderMode=_cv2.BORDER_REPLICATE)
+        print(f'[align] ECC applied (cc={cc:.4f} angle={angle:.2f}°)')
+        return aligned
+    except Exception as e:
+        print(f'[align] ECC failed: {e}')
         return None
+def _orb_align(scan_gray: np.ndarray, ref_gray: np.ndarray,
+               scan_rgb: np.ndarray) -> tuple[np.ndarray | None, int]:
+    h, w        = scan_gray.shape
     ref_resized = _cv2.resize(ref_gray, (w, h))
+    orb         = _cv2.ORB_create(nfeatures=5000)
+    kp1, des1   = orb.detectAndCompute(scan_gray, None)
+    kp2, des2   = orb.detectAndCompute(ref_resized, None)
     if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
         return None, 0
     matcher = _cv2.BFMatcher(_cv2.NORM_HAMMING, crossCheck=True)
     matches = sorted(matcher.match(des1, des2), key=lambda m: m.distance)
+    good    = matches[:max(10, len(matches) // 3)]
     if len(good) < 10:
         return None, 0
     src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
     dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
     M, mask = _cv2.estimateAffinePartial2D(
+        src_pts, dst_pts, method=_cv2.RANSAC, ransacReprojThreshold=5.0)
     if M is None:
         return None, 0
     inliers = int(mask.sum()) if mask is not None else 0
+    aligned = _cv2.warpAffine(scan_rgb, M, (w, h),
+                               flags=_cv2.INTER_LINEAR,
+                               borderMode=_cv2.BORDER_REPLICATE)
+    print(f'[align] ORB applied ({inliers} inliers)')
     return aligned, inliers
 def _orb_inliers(scan_gray: np.ndarray, ref_gray: np.ndarray) -> int:
+    orb       = _cv2.ORB_create(nfeatures=3000)
     kp1, des1 = orb.detectAndCompute(scan_gray, None)
+    kp2, des2 = orb.detectAndCompute(ref_gray,  None)
     if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
         return 0
     matcher = _cv2.BFMatcher(_cv2.NORM_HAMMING, crossCheck=True)
     matches = sorted(matcher.match(des1, des2), key=lambda m: m.distance)
+    good    = matches[:max(10, len(matches) // 3)]
     if len(good) < 10:
         return 0
     src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
     return int(mask.sum()) if mask is not None else 0
+def align_to_reference(img: Image.Image, form_type: str) -> tuple[Image.Image, int]:
     """
+    Four-stage alignment cascade:
+      Stage 0 — Perspective correction
+      Stage 1 — ECC EUCLIDEAN
+      Stage 2 — ORB RANSAC affine
+      Stage 3 — Resize only
+    Returns (aligned_image, orb_inlier_count).
     """
     if not _CV2_OK:
         return img, 0
     ref_path = REFERENCE_IMAGES.get(form_type)
     if not ref_path or not os.path.exists(ref_path):
+        print(f'[align] No reference for form {form_type}')
         return img, 0
     ref_gray = _cv2.imread(ref_path, _cv2.IMREAD_GRAYSCALE)
     if ref_gray is None:
         return img, 0
     ref_h, ref_w = ref_gray.shape
+    scan_rgb     = np.array(img.convert('RGB'))
+    scan_rgb     = _correct_perspective(scan_rgb, ref_w, ref_h)
+    scan_rgb_rs  = _cv2.resize(scan_rgb, (ref_w, ref_h))
+    scan_gray_rs = _cv2.cvtColor(scan_rgb_rs, _cv2.COLOR_RGB2GRAY)
     print(f'[align] Form {form_type}: trying ECC...')
     aligned = _ecc_align(scan_gray_rs, ref_gray, scan_rgb_rs)
     if aligned is not None:
+        return Image.fromarray(aligned), 25
     print(f'[align] Form {form_type}: ECC failed, trying ORB...')
     aligned, inliers = _orb_align(scan_gray_rs, ref_gray, scan_rgb_rs)
     if aligned is not None:
         return Image.fromarray(aligned), inliers
+    print(f'[align] Form {form_type}: all alignment failed, resizing only')
     resized = _cv2.resize(scan_rgb, (ref_w, ref_h))
     return Image.fromarray(resized), 0
+# ── Image preprocessing ───────────────────────────────────────────
 def _deskew(gray: np.ndarray) -> np.ndarray:
     if not _CV2_OK:
         return gray
     edges = _cv2.Canny(gray, 50, 150, apertureSize=3)
+    lines = _cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
                               minLineLength=100, maxLineGap=10)
+    if lines is None:
         return gray
+    angles = [np.degrees(np.arctan2(y2-y1, x2-x1))
+              for x1, y1, x2, y2 in lines[:, 0]
+              if -15 < np.degrees(np.arctan2(y2-y1, x2-x1)) < 15]
     if not angles:
         return gray
+    angle = float(np.median(angles))
+    if abs(angle) < 0.3:
         return gray
     h, w = gray.shape
+    M = _cv2.getRotationMatrix2D((w/2, h/2), angle, 1.0)
     return _cv2.warpAffine(gray, M, (w, h),
                            flags=_cv2.INTER_CUBIC,
                            borderMode=_cv2.BORDER_REPLICATE)
 def _preprocess(img: Image.Image) -> Image.Image:
     if not _CV2_OK:
         return img.convert('L')
     gray = np.array(img.convert('L'))
 def _crop_field(img: Image.Image, x1r, y1r, x2r, y2r) -> Image.Image:
     w, h = img.size
+    pad  = 4
+    x1 = max(0, int(x1r * w) - pad); y1 = max(0, int(y1r * h) - pad)
+    x2 = min(w, int(x2r * w) + pad); y2 = min(h, int(y2r * h) + pad)
     return img.crop((x1, y1, x2, y2))
+def _ocr(crop: Image.Image) -> str:
+    """Run CRNN+CTC on a cropped field image."""
+    return _crnn_read(crop)
+# ── Form type detection ───────────────────────────────────────────
 def detect_form_type(image_path: str) -> str:
+    """Auto-detect form type using ORB inlier scoring, falling back to OCR title."""
     if _CV2_OK:
         try:
+            img       = Image.open(image_path).convert('RGB')
             scan_rgb  = np.array(img)
             scan_gray = _cv2.cvtColor(scan_rgb, _cv2.COLOR_RGB2GRAY)
+            best_type, best_inliers = None, 0
+            DET_W = 800
             for ft, ref_path in REFERENCE_IMAGES.items():
                 if not os.path.exists(ref_path):
                     continue
                 if ref_gray is None:
                     continue
                 ref_h, ref_w = ref_gray.shape
                 sc = min(1.0, DET_W / ref_w)
+                dw, dh   = max(1, int(ref_w*sc)), max(1, int(ref_h*sc))
+                ref_ds   = _cv2.resize(ref_gray, (dw, dh))
+                scan_ds  = _cv2.resize(_cv2.resize(scan_gray, (ref_w, ref_h)), (dw, dh))
+                count    = _orb_inliers(scan_ds, ref_ds)
                 print(f'[detect] Form {ft}: {count} ORB inliers')
                 if count > best_inliers:
+                    best_inliers, best_type = count, ft
             if best_type and best_inliers >= 15:
+                print(f'[detect] Best: Form {best_type} ({best_inliers} inliers)')
                 return best_type
+            print(f'[detect] ORB inconclusive ({best_inliers}), trying OCR title')
         except Exception as e:
             print(f'[template_matcher] detect_form_type ORB error: {e}')
+    # CRNN+CTC title fallback
     try:
         img_l = Image.open(image_path).convert('L')
         w, h  = img_l.size
+        title = _crnn_read(img_l.crop((0, int(h*0.04), w, int(h*0.15)))).upper()
         if title:
             if 'LIVE BIRTH' in title or ('BIRTH' in title
                     and 'DEATH' not in title and 'MARRIAGE' not in title):
                 return '102'
+            if 'DEATH' in title:
                 return '103'
+            if 'MARRIAGE' in title and 'LICENSE' in title:
                 return '90'
+            if 'MARRIAGE' in title:
                 return '97'
+        print('[detect] Could not detect form type; defaulting to 102.')
     except Exception as e:
         print(f'[template_matcher] detect_form_type OCR error: {e}')
     return '102'
+# ── Main extraction ───────────────────────────────────────────────
 def extract_fields(image_path: str, form_type: str) -> dict:
     """
+    Extract handwritten field values from a civil registry form scan.
     Args:
+        image_path : path to uploaded form image (PNG / JPG / PDF page)
+        form_type  : '102' | '103' | '90' | '97'
     Returns:
         dict of { field_name: extracted_text }
     if template is None:
         print(f'[template_matcher] No template for form type: {form_type}')
         return {}
+    if _get_crnn() is None:
+        print('[template_matcher] CRNN+CTC not available')
         return {}
     try:
         img = Image.open(image_path).convert('RGB')
     except Exception as e:
         print(f'[template_matcher] Cannot open image: {e}')
         return {}
     img, orb_inliers = align_to_reference(img, form_type)
+    processed        = _preprocess(img)
+    # Anchor detection disabled: CRNN+CTC is trained on handwritten text and
+    # reads printed labels inconsistently, causing fields to jump between
+    # anchor-relative and absolute positions across runs.
+    # After ECC/ORB alignment the absolute coordinates are stable and sufficient.
     form_w, form_h = img.size
+    field_names, crops, methods = [], [], []
     for field_name, coords in template.items():
+        x1r, y1r, x2r, y2r, _ = coords
+        crop   = _crop_field(processed, x1r, y1r, x2r, y2r)
+        method = 'absolute'
         field_names.append(field_name)
         crops.append(crop)
+        methods.append(method)
+    fields      = {}
+    anchor_hits = 0
+    for field_name, crop, method in zip(field_names, crops, methods):
         text = _postprocess(_ocr(crop), field_name)
         if text:
             fields[field_name] = text
+    print(f'[template_matcher] Anchor hits: {anchor_hits}/{len(anchor_defs)} | '
+          f'Extracted: {len(fields)}/{len(template)} fields')
     return fields
+# ── Debug visualisation ───────────────────────────────────────────
+def debug_draw_boxes(image_path: str, form_type: str, out_path: str = None) -> str:
     """
+    Draw all field boxes on the aligned image and save it.
+    Colour coding:
+      GREEN  — anchor found and crop succeeded
+      RED    — anchor search region (label NOT found)
+      BLUE   — absolute coordinate crop (no anchor defined for field)
+      ORANGE — anchor found but crop produced empty region
     """
     from PIL import ImageDraw, ImageFont
     template = TEMPLATES.get(form_type)
     if not template:
         print(f'No template for {form_type}')
+        return None
+    img, _      = align_to_reference(Image.open(image_path).convert('RGB'), form_type)
+    draw        = ImageDraw.Draw(img)
     w, h = img.size
     try:
+        font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', 11)
+    except Exception:
+        try:
+            font = ImageFont.truetype('C:/Windows/Fonts/arial.ttf', 11)
+        except Exception:
+            font = ImageFont.load_default()
+    for field_name, coords in template.items():
         x1r, y1r, x2r, y2r, _ = coords
+        bx1, by1 = int(x1r*w), int(y1r*h)
+        bx2, by2 = int(x2r*w), int(y2r*h)
+        draw.rectangle([bx1, by1, bx2, by2], outline='#1a6fd4', width=1)
+        draw.text((bx1+2, by1+2), field_name, fill='#1a6fd4', font=font)
     base, ext = os.path.splitext(image_path)
     out = out_path or f'{base}_debug_{form_type}{ext}'
     img.save(out)
     print(f'[template_matcher] Debug image saved: {out}')
+    print('  GREEN  = anchor found + crop region')
+    print('  RED    = anchor label NOT found (search region shown)')
+    print('  BLUE   = no anchor defined (absolute coords used)')
+    print('  ORANGE = anchor found but crop was empty')
     return out
+# ── PDF helper ────────────────────────────────────────────────────
+def pdf_to_image(pdf_path: str, page: int = 0) -> str:
+    try:
+        from pdf2image import convert_from_path
+        pages    = convert_from_path(pdf_path, dpi=150)
+        out_path = pdf_path.replace('.pdf', f'_page{page}.png')
+        pages[page].save(out_path, 'PNG')
+        return out_path
+    except ImportError:
+        print('[template_matcher] pdf2image not installed.')
+        return None
+    except Exception as e:
+        print(f'[template_matcher] PDF conversion failed: {e}')
+        return None
+# ── CLI ───────────────────────────────────────────────────────────
 if __name__ == '__main__':
     if len(sys.argv) < 3:
+        print('Usage: python template_matcher.py <image_path> <form_type> [out_path]')
         print('       form_type: 102 | 103 | 90 | 97')
         sys.exit(1)
     img_path  = sys.argv[1]
     form_type = sys.argv[2]
+    out_path  = sys.argv[3] if len(sys.argv) > 3 else None
+    out = debug_draw_boxes(img_path, form_type, out_path)
+    print(f'\nDebug image: {out}')
+    print('  GREEN = anchor hit | RED = anchor miss | BLUE = absolute fallback\n')
     result = extract_fields(img_path, form_type)
+    print(f'Extracted fields ({len(result)}):')
     for k, v in result.items():
+        print(f'  {k:<40} = {v}')
     template = TEMPLATES.get(form_type, {})
+    missing  = [k for k in template if k not in result]
     if missing:
         print(f'\nEmpty fields ({len(missing)}):')
         for k in missing:
+            print(f'  {k}')