Spaces:

ifgr003
/

ocr

Sleeping

App Files Files Community

hanz245 commited on Apr 30

Commit

41cfb0b

1 Parent(s): c7a86ba

update

Browse files

Files changed (1) hide show

template_matcher.py +65 -14

template_matcher.py CHANGED Viewed

@@ -93,11 +93,51 @@ def _get_crnn():
     return _crnn_ocr
-def _crnn_read(crop_img: Image.Image) -> str:
-    """Run CRNN+CTC on a single PIL Image crop and return decoded text."""
     ocr = _get_crnn()
-    if ocr is None or _crnn_decode is None:
-        return ''
     try:
         import torch
@@ -111,23 +151,24 @@ def _crnn_read(crop_img: Image.Image) -> str:
         with torch.no_grad():
             outputs = ocr.model(tensor)
-        decoded = _crnn_decode(outputs.cpu(), ocr.idx_to_char, method='greedy')
-        return decoded[0].strip()
     except Exception as e:
         print(f'[template_matcher] CRNN+CTC read error: {e}')
-        return ''
 def _crnn_read_batch(crops: list) -> list:
     """
     Run CRNN+CTC on a list of PIL Image crops in one forward pass.
     """
     if not crops:
         return []
     ocr = _get_crnn()
-    if ocr is None or _crnn_decode is None:
-        return [''] * len(crops)
     try:
         import torch
@@ -147,8 +188,7 @@ def _crnn_read_batch(crops: list) -> list:
         with torch.no_grad():
             outputs = ocr.model(batch)
-        decoded = _crnn_decode(outputs.cpu(), ocr.idx_to_char, method='greedy')
-        return [d.strip() for d in decoded]
     except Exception as e:
         print(f'[template_matcher] CRNN batch error: {e}; falling back to serial')
@@ -1285,7 +1325,8 @@ def detect_form_type(image_path: str) -> str:
         img_l = Image.open(image_path).convert('L')
         w, h = img_l.size
         title_crop = img_l.crop((0, int(h * 0.04), w, int(h * 0.15)))
-        title = _crnn_read(title_crop).upper()
         if title:
             if 'LIVE BIRTH' in title or ('BIRTH' in title and 'DEATH' not in title and 'MARRIAGE' not in title):
@@ -1365,12 +1406,21 @@ def extract_fields(image_path: str, form_type: str = None):
                     assist_text = _paddle_read(crop)
             assist_texts.append(assist_text)
-        crnn_texts = _crnn_read_batch(crops)
-        for field_name, crnn_text, assist_text in zip(field_names, crnn_texts, assist_texts):
             final_text = _smart_merge(field_name, crnn_text, assist_text)
             if final_text:
                 fields[field_name] = final_text
         print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
         paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
@@ -1382,6 +1432,7 @@ def extract_fields(image_path: str, form_type: str = None):
         fields['_quality'] = quality
         fields['_corrections'] = corrections
         return fields
     except Exception as e:
         print(f'[template_matcher] extract_fields error: {e}')

     return _crnn_ocr
+def _decode_ctc_with_confidence(outputs, idx_to_char) -> list:
+    """
+    Decode CRNN+CTC logits and compute per-field confidence.
+    Confidence is the mean probability of the kept non-blank characters after
+    greedy CTC collapse. This gives a real OCR confidence from the CRNN logits.
+    Assumption: CTC blank index is 0. If your model uses a different blank
+    index, change blank_idx below.
+    """
+    import torch
+    blank_idx = 0
+    probs = torch.softmax(outputs, dim=2)
+    max_probs, preds = torch.max(probs, dim=2)
+    results = []
+    for pred_seq, prob_seq in zip(preds, max_probs):
+        text_chars = []
+        char_probs = []
+        prev_idx = None
+        for idx, prob in zip(pred_seq.tolist(), prob_seq.tolist()):
+            if idx != blank_idx and idx != prev_idx:
+                char = idx_to_char.get(idx, '')
+                if char:
+                    text_chars.append(char)
+                    char_probs.append(float(prob))
+            prev_idx = idx
+        text = ''.join(text_chars).strip()
+        confidence = sum(char_probs) / len(char_probs) if char_probs else 0.0
+        results.append({
+            'text': text,
+            'confidence': float(confidence),
+        })
+    return results
+def _crnn_read(crop_img: Image.Image) -> dict:
+    """Run CRNN+CTC on a single PIL Image crop and return text + confidence."""
     ocr = _get_crnn()
+    if ocr is None:
+        return {'text': '', 'confidence': 0.0}
     try:
         import torch
         with torch.no_grad():
             outputs = ocr.model(tensor)
+        decoded = _decode_ctc_with_confidence(outputs.cpu(), ocr.idx_to_char)
+        return decoded[0] if decoded else {'text': '', 'confidence': 0.0}
     except Exception as e:
         print(f'[template_matcher] CRNN+CTC read error: {e}')
+        return {'text': '', 'confidence': 0.0}
 def _crnn_read_batch(crops: list) -> list:
     """
     Run CRNN+CTC on a list of PIL Image crops in one forward pass.
+    Returns a list of {'text': str, 'confidence': float}.
     """
     if not crops:
         return []
     ocr = _get_crnn()
+    if ocr is None:
+        return [{'text': '', 'confidence': 0.0} for _ in crops]
     try:
         import torch
         with torch.no_grad():
             outputs = ocr.model(batch)
+        return _decode_ctc_with_confidence(outputs.cpu(), ocr.idx_to_char)
     except Exception as e:
         print(f'[template_matcher] CRNN batch error: {e}; falling back to serial')
         img_l = Image.open(image_path).convert('L')
         w, h = img_l.size
         title_crop = img_l.crop((0, int(h * 0.04), w, int(h * 0.15)))
+        title_result = _crnn_read(title_crop)
+        title = (title_result.get('text', '') if isinstance(title_result, dict) else str(title_result)).upper()
         if title:
             if 'LIVE BIRTH' in title or ('BIRTH' in title and 'DEATH' not in title and 'MARRIAGE' not in title):
                     assist_text = _paddle_read(crop)
             assist_texts.append(assist_text)
+        crnn_results = _crnn_read_batch(crops)
+        confidence = {}
+        for field_name, crnn_res, assist_text in zip(field_names, crnn_results, assist_texts):
+            if isinstance(crnn_res, dict):
+                crnn_text = crnn_res.get('text', '')
+                crnn_conf = float(crnn_res.get('confidence', 0.0) or 0.0)
+            else:
+                crnn_text = str(crnn_res or '')
+                crnn_conf = 0.0
             final_text = _smart_merge(field_name, crnn_text, assist_text)
             if final_text:
                 fields[field_name] = final_text
+                confidence[field_name] = crnn_conf
         print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
         paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
         fields['_quality'] = quality
         fields['_corrections'] = corrections
+        fields['_confidence'] = confidence
         return fields
     except Exception as e:
         print(f'[template_matcher] extract_fields error: {e}')