Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -68,6 +68,24 @@ def vision_client():
|
|
| 68 |
_vision_client = vision.ImageAnnotatorClient()
|
| 69 |
return _vision_client
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
# -------------------------------------------------------------------------
|
| 72 |
# Enhanced Name Correction Dictionary
|
| 73 |
# -------------------------------------------------------------------------
|
|
@@ -523,6 +541,9 @@ def detect_totals_in_rows(rows: List[List[Dict[str, Any]]]) -> Tuple[Optional[fl
|
|
| 523 |
for row in rows:
|
| 524 |
row_text = " ".join([c["text"] for c in row])
|
| 525 |
row_lower = row_text.lower()
|
|
|
|
|
|
|
|
|
|
| 526 |
tokens = row_text.split()
|
| 527 |
|
| 528 |
amounts = []
|
|
|
|
| 68 |
_vision_client = vision.ImageAnnotatorClient()
|
| 69 |
return _vision_client
|
| 70 |
|
| 71 |
+
# -------------------------------------------------------------------------
|
| 72 |
+
# Header Detection for Tables
|
| 73 |
+
# -------------------------------------------------------------------------
|
| 74 |
+
HEADER_KEYWORDS = [
|
| 75 |
+
"description", "qty", "hrs", "rate", "discount", "net", "amt", "amount",
|
| 76 |
+
"consultation", "address", "sex", "age", "mobile", "patient", "category",
|
| 77 |
+
"doctor", "dr", "invoice", "bill", "subtotal", "total", "charges", "service"
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
HEADER_PHRASES = [
|
| 81 |
+
"description qty / hrs consultation rate discount net amt",
|
| 82 |
+
"description qty / hrs rate discount net amt",
|
| 83 |
+
"description qty / hrs rate net amt",
|
| 84 |
+
"description qty hrs rate discount net amt",
|
| 85 |
+
"description qty / hrs rate discount net amt",
|
| 86 |
+
]
|
| 87 |
+
HEADER_PHRASES = [h.lower() for h in HEADER_PHRASES]
|
| 88 |
+
|
| 89 |
# -------------------------------------------------------------------------
|
| 90 |
# Enhanced Name Correction Dictionary
|
| 91 |
# -------------------------------------------------------------------------
|
|
|
|
| 541 |
for row in rows:
|
| 542 |
row_text = " ".join([c["text"] for c in row])
|
| 543 |
row_lower = row_text.lower()
|
| 544 |
+
header_hit_count = sum(1 for h in HEADER_KEYWORDS if h in row_lower)
|
| 545 |
+
if row_lower in HEADER_PHRASES or header_hit_count >= 3:
|
| 546 |
+
continue
|
| 547 |
tokens = row_text.split()
|
| 548 |
|
| 549 |
amounts = []
|