hanz245 commited on
Commit
82f5b22
Β·
verified Β·
1 Parent(s): 31c64c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -133,11 +133,13 @@ def process_document():
133
  except Exception as e:
134
  tb = traceback.format_exc()
135
  print(f"[app.py] ❌ Processing error:\n{tb}")
 
 
136
  return jsonify({
137
  'status': 'error',
138
  'message': str(e),
139
- 'trace': tb
140
- }), 500
141
  finally:
142
  try:
143
  os.remove(saved_path)
@@ -479,38 +481,40 @@ def _map_pipeline_output_form90(raw: dict, role: str):
479
  # TEMPLATE MATCHING PIPELINE β€” coordinate crop + Tesseract OCR
480
  # ═════════════════════════════════════════════════════════════
481
  def _run_template_pipeline(file_path, form_hint, file2_path=None):
482
- """
483
- Use coordinate templates to crop and OCR each field region.
484
- form_hint: '1A' / '2A' / '3A' / '90'
485
- Maps form_hint β†’ source form type (102/103/97/90).
486
- """
487
- # Convert PDF to image if needed
488
  img_path = file_path
489
  if file_path.lower().endswith('.pdf'):
490
  img_path = pdf_to_image(file_path) or file_path
491
 
492
- # Auto-detect form type if hint is generic 'cert'
493
  hint_to_source = {'1A': '102', '2A': '103', '3A': '97', '90': '90'}
494
  if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
495
  detected = detect_form_type(img_path)
496
  source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
497
  form_hint = source_map.get(detected, '1A')
498
  source_type = detected
499
- print(
500
- f'[app.py] Auto-detected form type: {detected} β†’ output: {form_hint}')
501
  else:
502
  source_type = hint_to_source.get(form_hint, '102')
503
 
504
  # Extract fields from primary file
505
  raw = extract_fields(img_path, source_type)
506
 
507
- # For Form 90 (marriage license), also process bride file if provided
 
 
 
 
 
508
  if form_hint == '90' and file2_path:
509
  img_path2 = file2_path
510
  if file2_path.lower().endswith('.pdf'):
511
  img_path2 = pdf_to_image(file2_path) or file2_path
512
  raw2 = extract_fields(img_path2, '90')
513
- raw = {**raw, **raw2} # merge β€” bride fields fill any gaps
 
 
 
 
 
514
 
515
  # Map source field names β†’ output cert field names
516
  fields = _map_template_output(raw, form_hint)
 
133
  except Exception as e:
134
  tb = traceback.format_exc()
135
  print(f"[app.py] ❌ Processing error:\n{tb}")
136
+ # Distinguish user-facing errors (blank page) from real crashes
137
+ is_user_error = isinstance(e, ValueError)
138
  return jsonify({
139
  'status': 'error',
140
  'message': str(e),
141
+ 'trace': '' if is_user_error else tb,
142
+ }), 200 if is_user_error else 500
143
  finally:
144
  try:
145
  os.remove(saved_path)
 
481
  # TEMPLATE MATCHING PIPELINE β€” coordinate crop + Tesseract OCR
482
  # ═════════════════════════════════════════════════════════════
483
  def _run_template_pipeline(file_path, form_hint, file2_path=None):
 
 
 
 
 
 
484
  img_path = file_path
485
  if file_path.lower().endswith('.pdf'):
486
  img_path = pdf_to_image(file_path) or file_path
487
 
 
488
  hint_to_source = {'1A': '102', '2A': '103', '3A': '97', '90': '90'}
489
  if form_hint == '1A' and hint_to_source.get(form_hint) == '102':
490
  detected = detect_form_type(img_path)
491
  source_map = {'102': '1A', '103': '2A', '97': '3A', '90': '90'}
492
  form_hint = source_map.get(detected, '1A')
493
  source_type = detected
494
+ print(f'[app.py] Auto-detected form type: {detected} β†’ output: {form_hint}')
 
495
  else:
496
  source_type = hint_to_source.get(form_hint, '102')
497
 
498
  # Extract fields from primary file
499
  raw = extract_fields(img_path, source_type)
500
 
501
+ # ── EARLY EXIT: blank or unreadable image ─────────────────
502
+ if isinstance(raw, dict) and raw.get('status') == 'error':
503
+ print(f'[app.py] Extraction aborted: {raw["message"]}')
504
+ raise ValueError(raw['message']) # caught by the try/except in process_document()
505
+
506
+ # For Form 90, also process bride file if provided
507
  if form_hint == '90' and file2_path:
508
  img_path2 = file2_path
509
  if file2_path.lower().endswith('.pdf'):
510
  img_path2 = pdf_to_image(file2_path) or file2_path
511
  raw2 = extract_fields(img_path2, '90')
512
+ # Guard bride file too
513
+ if isinstance(raw2, dict) and raw2.get('status') == 'error':
514
+ print(f'[app.py] Bride file aborted: {raw2["message"]}')
515
+ else:
516
+ raw = {**raw, **raw2}
517
+
518
 
519
  # Map source field names β†’ output cert field names
520
  fields = _map_template_output(raw, form_hint)