Spaces:

ifgr003
/

ocr

Sleeping

App Files Files Community

hanz245 commited on Apr 30

Commit

0cdc412

1 Parent(s): 41cfb0b

update

Browse files

Files changed (1) hide show

template_matcher.py +14 -65

template_matcher.py CHANGED Viewed

@@ -93,51 +93,11 @@ def _get_crnn():
     return _crnn_ocr
-def _decode_ctc_with_confidence(outputs, idx_to_char) -> list:
-    """
-    Decode CRNN+CTC logits and compute per-field confidence.
-    Confidence is the mean probability of the kept non-blank characters after
-    greedy CTC collapse. This gives a real OCR confidence from the CRNN logits.
-    Assumption: CTC blank index is 0. If your model uses a different blank
-    index, change blank_idx below.
-    """
-    import torch
-    blank_idx = 0
-    probs = torch.softmax(outputs, dim=2)
-    max_probs, preds = torch.max(probs, dim=2)
-    results = []
-    for pred_seq, prob_seq in zip(preds, max_probs):
-        text_chars = []
-        char_probs = []
-        prev_idx = None
-        for idx, prob in zip(pred_seq.tolist(), prob_seq.tolist()):
-            if idx != blank_idx and idx != prev_idx:
-                char = idx_to_char.get(idx, '')
-                if char:
-                    text_chars.append(char)
-                    char_probs.append(float(prob))
-            prev_idx = idx
-        text = ''.join(text_chars).strip()
-        confidence = sum(char_probs) / len(char_probs) if char_probs else 0.0
-        results.append({
-            'text': text,
-            'confidence': float(confidence),
-        })
-    return results
-def _crnn_read(crop_img: Image.Image) -> dict:
-    """Run CRNN+CTC on a single PIL Image crop and return text + confidence."""
     ocr = _get_crnn()
-    if ocr is None:
-        return {'text': '', 'confidence': 0.0}
     try:
         import torch
@@ -151,24 +111,23 @@ def _crnn_read(crop_img: Image.Image) -> dict:
         with torch.no_grad():
             outputs = ocr.model(tensor)
-        decoded = _decode_ctc_with_confidence(outputs.cpu(), ocr.idx_to_char)
-        return decoded[0] if decoded else {'text': '', 'confidence': 0.0}
     except Exception as e:
         print(f'[template_matcher] CRNN+CTC read error: {e}')
-        return {'text': '', 'confidence': 0.0}
 def _crnn_read_batch(crops: list) -> list:
     """
     Run CRNN+CTC on a list of PIL Image crops in one forward pass.
-    Returns a list of {'text': str, 'confidence': float}.
     """
     if not crops:
         return []
     ocr = _get_crnn()
-    if ocr is None:
-        return [{'text': '', 'confidence': 0.0} for _ in crops]
     try:
         import torch
@@ -188,7 +147,8 @@ def _crnn_read_batch(crops: list) -> list:
         with torch.no_grad():
             outputs = ocr.model(batch)
-        return _decode_ctc_with_confidence(outputs.cpu(), ocr.idx_to_char)
     except Exception as e:
         print(f'[template_matcher] CRNN batch error: {e}; falling back to serial')
@@ -1325,8 +1285,7 @@ def detect_form_type(image_path: str) -> str:
         img_l = Image.open(image_path).convert('L')
         w, h = img_l.size
         title_crop = img_l.crop((0, int(h * 0.04), w, int(h * 0.15)))
-        title_result = _crnn_read(title_crop)
-        title = (title_result.get('text', '') if isinstance(title_result, dict) else str(title_result)).upper()
         if title:
             if 'LIVE BIRTH' in title or ('BIRTH' in title and 'DEATH' not in title and 'MARRIAGE' not in title):
@@ -1406,21 +1365,12 @@ def extract_fields(image_path: str, form_type: str = None):
                     assist_text = _paddle_read(crop)
             assist_texts.append(assist_text)
-        crnn_results = _crnn_read_batch(crops)
-        confidence = {}
-        for field_name, crnn_res, assist_text in zip(field_names, crnn_results, assist_texts):
-            if isinstance(crnn_res, dict):
-                crnn_text = crnn_res.get('text', '')
-                crnn_conf = float(crnn_res.get('confidence', 0.0) or 0.0)
-            else:
-                crnn_text = str(crnn_res or '')
-                crnn_conf = 0.0
             final_text = _smart_merge(field_name, crnn_text, assist_text)
             if final_text:
                 fields[field_name] = final_text
-                confidence[field_name] = crnn_conf
         print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
         paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
@@ -1432,7 +1382,6 @@ def extract_fields(image_path: str, form_type: str = None):
         fields['_quality'] = quality
         fields['_corrections'] = corrections
-        fields['_confidence'] = confidence
         return fields
     except Exception as e:
         print(f'[template_matcher] extract_fields error: {e}')

     return _crnn_ocr
+def _crnn_read(crop_img: Image.Image) -> str:
+    """Run CRNN+CTC on a single PIL Image crop and return decoded text."""
     ocr = _get_crnn()
+    if ocr is None or _crnn_decode is None:
+        return ''
     try:
         import torch
         with torch.no_grad():
             outputs = ocr.model(tensor)
+        decoded = _crnn_decode(outputs.cpu(), ocr.idx_to_char, method='greedy')
+        return decoded[0].strip()
     except Exception as e:
         print(f'[template_matcher] CRNN+CTC read error: {e}')
+        return ''
 def _crnn_read_batch(crops: list) -> list:
     """
     Run CRNN+CTC on a list of PIL Image crops in one forward pass.
     """
     if not crops:
         return []
     ocr = _get_crnn()
+    if ocr is None or _crnn_decode is None:
+        return [''] * len(crops)
     try:
         import torch
         with torch.no_grad():
             outputs = ocr.model(batch)
+        decoded = _crnn_decode(outputs.cpu(), ocr.idx_to_char, method='greedy')
+        return [d.strip() for d in decoded]
     except Exception as e:
         print(f'[template_matcher] CRNN batch error: {e}; falling back to serial')
         img_l = Image.open(image_path).convert('L')
         w, h = img_l.size
         title_crop = img_l.crop((0, int(h * 0.04), w, int(h * 0.15)))
+        title = _crnn_read(title_crop).upper()
         if title:
             if 'LIVE BIRTH' in title or ('BIRTH' in title and 'DEATH' not in title and 'MARRIAGE' not in title):
                     assist_text = _paddle_read(crop)
             assist_texts.append(assist_text)
+        crnn_texts = _crnn_read_batch(crops)
+        for field_name, crnn_text, assist_text in zip(field_names, crnn_texts, assist_texts):
             final_text = _smart_merge(field_name, crnn_text, assist_text)
             if final_text:
                 fields[field_name] = final_text
         print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
         paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
         fields['_quality'] = quality
         fields['_corrections'] = corrections
         return fields
     except Exception as e:
         print(f'[template_matcher] extract_fields error: {e}')