Spaces:

Sathvik-kota
/

Datathon

Sleeping

App Files Files Community

Sathvik-kota commited on Nov 29, 2025

Commit

b100b23

verified ·

1 Parent(s): 2eca474

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +43 -0

app.py CHANGED Viewed

@@ -244,7 +244,50 @@ def parse_row_to_item(cells_row: List[Dict[str, Any]]) -> Optional[Dict[str, Any
         "item_rate": float(round(rate_val, 2)) if rate_val else 0.0,
         "item_quantity": float(qty_val)
     }
 # ---------------- FALLBACK REGEX EXTRACTOR ----------------

         "item_rate": float(round(rate_val, 2)) if rate_val else 0.0,
         "item_quantity": float(qty_val)
     }
+# ---------------- Duplicate suppression & subtotal detection ----------------
+def dedupe_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Remove obvious duplicates:
+    - same item_name normalized + same amount -> keep first
+    """
+    seen = set()
+    out = []
+    for it in items:
+        n = it["item_name"].lower()
+        n = re.sub(r"\s+", " ", n).strip()
+        key = (n[:80], round(float(it["item_amount"]), 2))  # use first 80 chars to be safe
+        if key in seen:
+            continue
+        seen.add(key)
+        out.append(it)
+    return out
+def detect_subtotals_and_totals(rows_texts: List[str]) -> Dict[str, Any]:
+    """
+    Look through page-level row texts for subtotal/final total cues and values.
+    Returns dict with keys: subtotal (float or None), final_total (float or None)
+    """
+    subtotal = None
+    final_total = None
+    for rt in rows_texts[::-1]:  # scan bottom-up
+        if not rt or rt.strip() == "":
+            continue
+        if TOTAL_KEYWORDS.search(rt):
+            # find first number in the line
+            m = NUM_RE.search(rt)
+            if m:
+                val = normalize_num_str(m.group(0))
+                if val is None:
+                    continue
+                # decide if subtotal or final_total based on keyword
+                if re.search(r"sub", rt, re.I):
+                    if subtotal is None:
+                        subtotal = float(round(val, 2))
+                else:
+                    # treat as final total likely
+                    if final_total is None:
+                        final_total = float(round(val, 2))
+    return {"subtotal": subtotal, "final_total": final_total}
 # ---------------- FALLBACK REGEX EXTRACTOR ----------------