Sathvik-kota commited on
Commit
b100b23
·
verified ·
1 Parent(s): 2eca474

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py CHANGED
@@ -244,7 +244,50 @@ def parse_row_to_item(cells_row: List[Dict[str, Any]]) -> Optional[Dict[str, Any
244
  "item_rate": float(round(rate_val, 2)) if rate_val else 0.0,
245
  "item_quantity": float(qty_val)
246
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
 
250
  # ---------------- FALLBACK REGEX EXTRACTOR ----------------
 
244
  "item_rate": float(round(rate_val, 2)) if rate_val else 0.0,
245
  "item_quantity": float(qty_val)
246
  }
247
+ # ---------------- Duplicate suppression & subtotal detection ----------------
248
+ def dedupe_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
249
+ """
250
+ Remove obvious duplicates:
251
+ - same item_name normalized + same amount -> keep first
252
+ """
253
+ seen = set()
254
+ out = []
255
+ for it in items:
256
+ n = it["item_name"].lower()
257
+ n = re.sub(r"\s+", " ", n).strip()
258
+ key = (n[:80], round(float(it["item_amount"]), 2)) # use first 80 chars to be safe
259
+ if key in seen:
260
+ continue
261
+ seen.add(key)
262
+ out.append(it)
263
+ return out
264
 
265
+ def detect_subtotals_and_totals(rows_texts: List[str]) -> Dict[str, Any]:
266
+ """
267
+ Look through page-level row texts for subtotal/final total cues and values.
268
+ Returns dict with keys: subtotal (float or None), final_total (float or None)
269
+ """
270
+ subtotal = None
271
+ final_total = None
272
+ for rt in rows_texts[::-1]: # scan bottom-up
273
+ if not rt or rt.strip() == "":
274
+ continue
275
+ if TOTAL_KEYWORDS.search(rt):
276
+ # find first number in the line
277
+ m = NUM_RE.search(rt)
278
+ if m:
279
+ val = normalize_num_str(m.group(0))
280
+ if val is None:
281
+ continue
282
+ # decide if subtotal or final_total based on keyword
283
+ if re.search(r"sub", rt, re.I):
284
+ if subtotal is None:
285
+ subtotal = float(round(val, 2))
286
+ else:
287
+ # treat as final total likely
288
+ if final_total is None:
289
+ final_total = float(round(val, 2))
290
+ return {"subtotal": subtotal, "final_total": final_total}
291
 
292
 
293
  # ---------------- FALLBACK REGEX EXTRACTOR ----------------