Sathvik-kota commited on
Commit
9548699
·
verified ·
1 Parent(s): 6a1f77f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +21 -0
app.py CHANGED
@@ -68,6 +68,24 @@ def vision_client():
68
  _vision_client = vision.ImageAnnotatorClient()
69
  return _vision_client
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # -------------------------------------------------------------------------
72
  # Enhanced Name Correction Dictionary
73
  # -------------------------------------------------------------------------
@@ -523,6 +541,9 @@ def detect_totals_in_rows(rows: List[List[Dict[str, Any]]]) -> Tuple[Optional[fl
523
  for row in rows:
524
  row_text = " ".join([c["text"] for c in row])
525
  row_lower = row_text.lower()
 
 
 
526
  tokens = row_text.split()
527
 
528
  amounts = []
 
68
  _vision_client = vision.ImageAnnotatorClient()
69
  return _vision_client
70
 
71
+ # -------------------------------------------------------------------------
72
+ # Header Detection for Tables
73
+ # -------------------------------------------------------------------------
74
+ HEADER_KEYWORDS = [
75
+ "description", "qty", "hrs", "rate", "discount", "net", "amt", "amount",
76
+ "consultation", "address", "sex", "age", "mobile", "patient", "category",
77
+ "doctor", "dr", "invoice", "bill", "subtotal", "total", "charges", "service"
78
+ ]
79
+
80
+ HEADER_PHRASES = [
81
+ "description qty / hrs consultation rate discount net amt",
82
+ "description qty / hrs rate discount net amt",
83
+ "description qty / hrs rate net amt",
84
+ "description qty hrs rate discount net amt",
85
+ "description qty / hrs rate discount net amt",
86
+ ]
87
+ HEADER_PHRASES = [h.lower() for h in HEADER_PHRASES]
88
+
89
  # -------------------------------------------------------------------------
90
  # Enhanced Name Correction Dictionary
91
  # -------------------------------------------------------------------------
 
541
  for row in rows:
542
  row_text = " ".join([c["text"] for c in row])
543
  row_lower = row_text.lower()
544
+ header_hit_count = sum(1 for h in HEADER_KEYWORDS if h in row_lower)
545
+ if row_lower in HEADER_PHRASES or header_hit_count >= 3:
546
+ continue
547
  tokens = row_text.split()
548
 
549
  amounts = []