Update app.py
Browse files
app.py
CHANGED
|
@@ -133,11 +133,13 @@ def process_document():
|
|
| 133 |
except Exception as e:
|
| 134 |
tb = traceback.format_exc()
|
| 135 |
print(f"[app.py] β Processing error:\n{tb}")
|
|
|
|
|
|
|
| 136 |
return jsonify({
|
| 137 |
'status': 'error',
|
| 138 |
'message': str(e),
|
| 139 |
-
'trace': tb
|
| 140 |
-
}), 500
|
| 141 |
finally:
|
| 142 |
try:
|
| 143 |
os.remove(saved_path)
|
|
@@ -479,38 +481,40 @@ def _map_pipeline_output_form90(raw: dict, role: str):
|
|
| 479 |
# TEMPLATE MATCHING PIPELINE β coordinate crop + Tesseract OCR
|
| 480 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 481 |
def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
| 482 |
-
"""
|
| 483 |
-
Use coordinate templates to crop and OCR each field region.
|
| 484 |
-
form_hint: '1A' / '2A' / '3A' / '90'
|
| 485 |
-
Maps form_hint β source form type (102/103/97/90).
|
| 486 |
-
"""
|
| 487 |
-
# Convert PDF to image if needed
|
| 488 |
img_path = file_path
|
| 489 |
if file_path.lower().endswith('.pdf'):
|
| 490 |
img_path = pdf_to_image(file_path) or file_path
|
| 491 |
|
| 492 |
-
# Auto-detect form type if hint is generic 'cert'
|
| 493 |
hint_to_source = {'1A': '102', '2A': '103', '3A': '97', '90': '90'}
|
| 494 |
if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
|
| 495 |
detected = detect_form_type(img_path)
|
| 496 |
source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
|
| 497 |
form_hint = source_map.get(detected, '1A')
|
| 498 |
source_type = detected
|
| 499 |
-
print(
|
| 500 |
-
f'[app.py] Auto-detected form type: {detected} β output: {form_hint}')
|
| 501 |
else:
|
| 502 |
source_type = hint_to_source.get(form_hint, '102')
|
| 503 |
|
| 504 |
# Extract fields from primary file
|
| 505 |
raw = extract_fields(img_path, source_type)
|
| 506 |
|
| 507 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
if form_hint == '90' and file2_path:
|
| 509 |
img_path2 = file2_path
|
| 510 |
if file2_path.lower().endswith('.pdf'):
|
| 511 |
img_path2 = pdf_to_image(file2_path) or file2_path
|
| 512 |
raw2 = extract_fields(img_path2, '90')
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
# Map source field names β output cert field names
|
| 516 |
fields = _map_template_output(raw, form_hint)
|
|
|
|
| 133 |
except Exception as e:
|
| 134 |
tb = traceback.format_exc()
|
| 135 |
print(f"[app.py] β Processing error:\n{tb}")
|
| 136 |
+
# Distinguish user-facing errors (blank page) from real crashes
|
| 137 |
+
is_user_error = isinstance(e, ValueError)
|
| 138 |
return jsonify({
|
| 139 |
'status': 'error',
|
| 140 |
'message': str(e),
|
| 141 |
+
'trace': '' if is_user_error else tb,
|
| 142 |
+
}), 200 if is_user_error else 500
|
| 143 |
finally:
|
| 144 |
try:
|
| 145 |
os.remove(saved_path)
|
|
|
|
| 481 |
# TEMPLATE MATCHING PIPELINE β coordinate crop + Tesseract OCR
|
| 482 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 483 |
def _run_template_pipeline(file_path, form_hint, file2_path=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
img_path = file_path
|
| 485 |
if file_path.lower().endswith('.pdf'):
|
| 486 |
img_path = pdf_to_image(file_path) or file_path
|
| 487 |
|
|
|
|
| 488 |
hint_to_source = {'1A': '102', '2A': '103', '3A': '97', '90': '90'}
|
| 489 |
if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
|
| 490 |
detected = detect_form_type(img_path)
|
| 491 |
source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
|
| 492 |
form_hint = source_map.get(detected, '1A')
|
| 493 |
source_type = detected
|
| 494 |
+
print(f'[app.py] Auto-detected form type: {detected} β output: {form_hint}')
|
|
|
|
| 495 |
else:
|
| 496 |
source_type = hint_to_source.get(form_hint, '102')
|
| 497 |
|
| 498 |
# Extract fields from primary file
|
| 499 |
raw = extract_fields(img_path, source_type)
|
| 500 |
|
| 501 |
+
# ββ EARLY EXIT: blank or unreadable image βββββββββββββββββ
|
| 502 |
+
if isinstance(raw, dict) and raw.get('status') == 'error':
|
| 503 |
+
print(f'[app.py] Extraction aborted: {raw["message"]}')
|
| 504 |
+
raise ValueError(raw['message']) # caught by the try/except in process_document()
|
| 505 |
+
|
| 506 |
+
# For Form 90, also process bride file if provided
|
| 507 |
if form_hint == '90' and file2_path:
|
| 508 |
img_path2 = file2_path
|
| 509 |
if file2_path.lower().endswith('.pdf'):
|
| 510 |
img_path2 = pdf_to_image(file2_path) or file2_path
|
| 511 |
raw2 = extract_fields(img_path2, '90')
|
| 512 |
+
# Guard bride file too
|
| 513 |
+
if isinstance(raw2, dict) and raw2.get('status') == 'error':
|
| 514 |
+
print(f'[app.py] Bride file aborted: {raw2["message"]}')
|
| 515 |
+
else:
|
| 516 |
+
raw = {**raw, **raw2}
|
| 517 |
+
|
| 518 |
|
| 519 |
# Map source field names β output cert field names
|
| 520 |
fields = _map_template_output(raw, form_hint)
|