hanz245 commited on
Commit
e9ecccb
·
1 Parent(s): 808959b
Files changed (2) hide show
  1. app.py +30 -3
  2. template_matcher.py +5 -2
app.py CHANGED
@@ -456,6 +456,9 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
456
 
457
  raw = extract_fields(img_path, source_type)
458
 
 
 
 
459
  if isinstance(raw, dict) and raw.get('status') == 'error':
460
  print('[app.py] Blank page detected — skipping extraction')
461
  raise ValueError('Blank page detected. Please try another file.')
@@ -471,6 +474,26 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
471
  raw = {**raw, **raw2}
472
 
473
  fields = _map_template_output(raw, form_hint)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
475
 
476
  if _bridge is not None:
@@ -522,13 +545,17 @@ def _run_template_pipeline(file_path, form_hint, file2_path=None):
522
 
523
  ner_count = sum(1 for v in ner_fields.values() if v)
524
  print(f'[app.py] NER enriched {ner_count} fields')
525
- confidence = {k: mnb_result['confidence'] for k in fields}
526
 
527
  except Exception as _ner_err:
528
  print(f'[app.py] NER error (using template only): {_ner_err}')
529
- confidence = {k: 0.85 for k in fields}
 
 
 
 
530
  else:
531
- confidence = {k: 0.85 for k in fields}
532
 
533
  non_empty = {k: v for k, v in fields.items() if v}
534
  print(f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
 
456
 
457
  raw = extract_fields(img_path, source_type)
458
 
459
+ confidence = raw.get("_crnn_confidence", {})
460
+ fields = {k: v for k, v in raw.items() if not k.startswith("_")}
461
+
462
  if isinstance(raw, dict) and raw.get('status') == 'error':
463
  print('[app.py] Blank page detected — skipping extraction')
464
  raise ValueError('Blank page detected. Please try another file.')
 
474
  raw = {**raw, **raw2}
475
 
476
  fields = _map_template_output(raw, form_hint)
477
+ confidence = {
478
+ "registry_no": raw_confidence.get("registry_no", 0.60),
479
+ "city_municipality": raw_confidence.get("city_municipality", 0.60),
480
+ "province": raw_confidence.get("province", 0.60),
481
+ "date_submitted": raw_confidence.get("registration_date", 0.60),
482
+ "child_first": raw_confidence.get("name_first", 0.60),
483
+ "child_middle": raw_confidence.get("name_middle", 0.60),
484
+ "child_last": raw_confidence.get("name_last", 0.60),
485
+ "sex": raw_confidence.get("sex", 0.60),
486
+ "dob_day": raw_confidence.get("dob_day", 0.60),
487
+ "dob_month": raw_confidence.get("dob_month", 0.60),
488
+ "dob_year": raw_confidence.get("dob_year", 0.60),
489
+ "pob_city": raw_confidence.get("place_of_birth", 0.60),
490
+ "mother_first": raw_confidence.get("mother_name", 0.60),
491
+ "mother_citizenship": raw_confidence.get("mother_citizenship", 0.60),
492
+ "father_first": raw_confidence.get("father_name", 0.60),
493
+ "father_citizenship": raw_confidence.get("father_citizenship", 0.60),
494
+ "parents_marriage_month": raw_confidence.get("marriage_date", 0.60),
495
+ "parents_marriage_city": raw_confidence.get("marriage_place", 0.60),
496
+ }
497
  form_class = form_hint if form_hint in ('1A', '2A', '3A', '90') else '1A'
498
 
499
  if _bridge is not None:
 
545
 
546
  ner_count = sum(1 for v in ner_fields.values() if v)
547
  print(f'[app.py] NER enriched {ner_count} fields')
548
+ confidence = {k: confidence.get(k, 0.60) for k in fields}
549
 
550
  except Exception as _ner_err:
551
  print(f'[app.py] NER error (using template only): {_ner_err}')
552
+ confidence = {}
553
+
554
+ for key, value in fields.items():
555
+ # fallback if no confidence yet
556
+ confidence[key] = 0.60
557
  else:
558
+ confidence = {k: confidence.get(k, 0.60) for k in fields}
559
 
560
  non_empty = {k: v for k, v in fields.items() if v}
561
  print(f'[app.py] form_class={form_class}, {len(non_empty)}/{len(fields)} non-empty fields')
template_matcher.py CHANGED
@@ -1436,7 +1436,7 @@ def extract_fields(image_path: str, form_type: str = None):
1436
  if final_text:
1437
  fields[field_name] = final_text
1438
  crnn_confidences[field_name] = crnn_conf
1439
-
1440
  print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
1441
  paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
1442
  abs_count = sum(1 for m in debug_methods.values() if m == 'absolute')
@@ -1456,7 +1456,10 @@ def extract_fields(image_path: str, form_type: str = None):
1456
  fields['_quality'] = quality
1457
  fields['_corrections'] = corrections
1458
  fields['_crnn_confidence'] = crnn_confidences
1459
- return fields
 
 
 
1460
 
1461
  except Exception as e:
1462
  print(f'[template_matcher] extract_fields error: {e}')
 
1436
  if final_text:
1437
  fields[field_name] = final_text
1438
  crnn_confidences[field_name] = crnn_conf
1439
+
1440
  print(f'[template_matcher] Extracted: {len(fields)}/{len(template)} fields')
1441
  paddle_count = sum(1 for m in debug_methods.values() if m == 'paddle-detect')
1442
  abs_count = sum(1 for m in debug_methods.values() if m == 'absolute')
 
1456
  fields['_quality'] = quality
1457
  fields['_corrections'] = corrections
1458
  fields['_crnn_confidence'] = crnn_confidences
1459
+ return {
1460
+ **fields,
1461
+ "_crnn_confidence": crnn_confidences
1462
+ }
1463
 
1464
  except Exception as e:
1465
  print(f'[template_matcher] extract_fields error: {e}')