update
Browse files- app.py +30 -3
- template_matcher.py +5 -2
app.py
CHANGED
|
@@ -456,6 +456,9 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 456 |
|
| 457 |
raw = extract_fields(img_path, source_type)
|
| 458 |
|
|
|
|
|
|
|
|
|
|
| 459 |
if isinstance(raw, dict) and raw.get('status') == 'error':
|
| 460 |
print('[app.py] Blank page detected — skipping extraction')
|
| 461 |
raise ValueError('Blank page detected. Please try another file.')
|
|
@@ -471,6 +474,26 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 471 |
raw = {**raw, **raw2}
|
| 472 |
|
| 473 |
fields = _map_template_output(raw, form_hint)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
|
| 475 |
|
| 476 |
if _bridge is not None:
|
|
@@ -522,13 +545,17 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
| 522 |
|
| 523 |
ner_count = sum(1 for v in ner_fields.values() if v)
|
| 524 |
print(f'[app.py] NER enriched {ner_count} fields')
|
| 525 |
-
confidence = {k:
|
| 526 |
|
| 527 |
except Exception as _ner_err:
|
| 528 |
print(f'[app.py] NER error (using template only): {_ner_err}')
|
| 529 |
-
confidence = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
else:
|
| 531 |
-
confidence = {k: 0.
|
| 532 |
|
| 533 |
non_empty = {k: v for k, v in fields.items() if v}
|
| 534 |
print(f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
|
|
|
|
| 456 |
|
| 457 |
raw = extract_fields(img_path, source_type)
|
| 458 |
|
| 459 |
+
confidence = raw.get("_crnn_confidence", {})
|
| 460 |
+
fields = {k: v for k, v in raw.items() if not k.startswith("_")}
|
| 461 |
+
|
| 462 |
if isinstance(raw, dict) and raw.get('status') == 'error':
|
| 463 |
print('[app.py] Blank page detected — skipping extraction')
|
| 464 |
raise ValueError('Blank page detected. Please try another file.')
|
|
|
|
| 474 |
raw = {**raw, **raw2}
|
| 475 |
|
| 476 |
fields = _map_template_output(raw, form_hint)
|
| 477 |
+
confidence = {
|
| 478 |
+
"registry_no": raw_confidence.get("registry_no", 0.60),
|
| 479 |
+
"city_municipality": raw_confidence.get("city_municipality", 0.60),
|
| 480 |
+
"province": raw_confidence.get("province", 0.60),
|
| 481 |
+
"date_submitted": raw_confidence.get("registration_date", 0.60),
|
| 482 |
+
"child_first": raw_confidence.get("name_first", 0.60),
|
| 483 |
+
"child_middle": raw_confidence.get("name_middle", 0.60),
|
| 484 |
+
"child_last": raw_confidence.get("name_last", 0.60),
|
| 485 |
+
"sex": raw_confidence.get("sex", 0.60),
|
| 486 |
+
"dob_day": raw_confidence.get("dob_day", 0.60),
|
| 487 |
+
"dob_month": raw_confidence.get("dob_month", 0.60),
|
| 488 |
+
"dob_year": raw_confidence.get("dob_year", 0.60),
|
| 489 |
+
"pob_city": raw_confidence.get("place_of_birth", 0.60),
|
| 490 |
+
"mother_first": raw_confidence.get("mother_name", 0.60),
|
| 491 |
+
"mother_citizenship": raw_confidence.get("mother_citizenship", 0.60),
|
| 492 |
+
"father_first": raw_confidence.get("father_name", 0.60),
|
| 493 |
+
"father_citizenship": raw_confidence.get("father_citizenship", 0.60),
|
| 494 |
+
"parents_marriage_month": raw_confidence.get("marriage_date", 0.60),
|
| 495 |
+
"parents_marriage_city": raw_confidence.get("marriage_place", 0.60),
|
| 496 |
+
}
|
| 497 |
form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
|
| 498 |
|
| 499 |
if _bridge is not None:
|
|
|
|
| 545 |
|
| 546 |
ner_count = sum(1 for v in ner_fields.values() if v)
|
| 547 |
print(f'[app.py] NER enriched {ner_count} fields')
|
| 548 |
+
confidence = {k: confidence.get(k, 0.60) for k in fields}
|
| 549 |
|
| 550 |
except Exception as _ner_err:
|
| 551 |
print(f'[app.py] NER error (using template only): {_ner_err}')
|
| 552 |
+
confidence = {}
|
| 553 |
+
|
| 554 |
+
for key, value in fields.items():
|
| 555 |
+
# fallback if no confidence yet
|
| 556 |
+
confidence[key] = 0.60
|
| 557 |
else:
|
| 558 |
+
confidence = {k: confidence.get(k, 0.60) for k in fields}
|
| 559 |
|
| 560 |
non_empty = {k: v for k, v in fields.items() if v}
|
| 561 |
print(f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
|
template_matcher.py
CHANGED
|
@@ -1436,7 +1436,7 @@ def extract_fields(image_path: str, form_type: str = None):
|
|
| 1436 |
if final_text:
|
| 1437 |
fields[field_name] = final_text
|
| 1438 |
crnn_confidences[field_name] = crnn_conf
|
| 1439 |
-
|
| 1440 |
print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
|
| 1441 |
paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
|
| 1442 |
abs_count = sum(1 for m in debug_methods.values() if m == 'absolute')
|
|
@@ -1456,7 +1456,10 @@ def extract_fields(image_path: str, form_type: str = None):
|
|
| 1456 |
fields['_quality'] = quality
|
| 1457 |
fields['_corrections'] = corrections
|
| 1458 |
fields['_crnn_confidence'] = crnn_confidences
|
| 1459 |
-
return
|
|
|
|
|
|
|
|
|
|
| 1460 |
|
| 1461 |
except Exception as e:
|
| 1462 |
print(f'[template_matcher] extract_fields error: {e}')
|
|
|
|
| 1436 |
if final_text:
|
| 1437 |
fields[field_name] = final_text
|
| 1438 |
crnn_confidences[field_name] = crnn_conf
|
| 1439 |
+
|
| 1440 |
print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
|
| 1441 |
paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
|
| 1442 |
abs_count = sum(1 for m in debug_methods.values() if m == 'absolute')
|
|
|
|
| 1456 |
fields['_quality'] = quality
|
| 1457 |
fields['_corrections'] = corrections
|
| 1458 |
fields['_crnn_confidence'] = crnn_confidences
|
| 1459 |
+
return {
|
| 1460 |
+
**fields,
|
| 1461 |
+
"_crnn_confidence": crnn_confidences
|
| 1462 |
+
}
|
| 1463 |
|
| 1464 |
except Exception as e:
|
| 1465 |
print(f'[template_matcher] extract_fields error: {e}')
|