Spaces:

Sathvik-kota
/

Datathon

Sleeping

Sathvik-kota commited on Nov 30, 2025

Commit

9548699

verified ·

1 Parent(s): 6a1f77f

Upload folder using huggingface_hub

Files changed (1) hide show

app.py CHANGED Viewed

@@ -68,6 +68,24 @@ def vision_client():
         _vision_client = vision.ImageAnnotatorClient()
     return _vision_client
 # -------------------------------------------------------------------------
 # Enhanced Name Correction Dictionary
 # -------------------------------------------------------------------------
@@ -523,6 +541,9 @@ def detect_totals_in_rows(rows: List[List[Dict[str, Any]]]) -> Tuple[Optional[fl
     for row in rows:
         row_text = " ".join([c["text"] for c in row])
         row_lower = row_text.lower()
         tokens = row_text.split()
         amounts = []

         _vision_client = vision.ImageAnnotatorClient()
     return _vision_client
+# -------------------------------------------------------------------------
+# Header Detection for Tables
+# -------------------------------------------------------------------------
+HEADER_KEYWORDS = [
+    "description", "qty", "hrs", "rate", "discount", "net", "amt", "amount",
+    "consultation", "address", "sex", "age", "mobile", "patient", "category",
+    "doctor", "dr", "invoice", "bill", "subtotal", "total", "charges", "service"
+]
+HEADER_PHRASES = [
+    "description qty / hrs consultation rate discount net amt",
+    "description qty / hrs rate discount net amt",
+    "description qty / hrs rate net amt",
+    "description qty hrs rate discount net amt",
+    "description qty / hrs rate discount net amt",
+]
+HEADER_PHRASES = [h.lower() for h in HEADER_PHRASES]
 # -------------------------------------------------------------------------
 # Enhanced Name Correction Dictionary
 # -------------------------------------------------------------------------
     for row in rows:
         row_text = " ".join([c["text"] for c in row])
         row_lower = row_text.lower()
+        header_hit_count = sum(1 for h in HEADER_KEYWORDS if h in row_lower)
+        if row_lower in HEADER_PHRASES or header_hit_count >= 3:
+            continue
         tokens = row_text.split()
         amounts = []