Sathvik-kota commited on
Commit
6bc7302
·
verified ·
1 Parent(s): 03cff90

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -28,17 +28,34 @@ app = FastAPI(title="Bajaj Datathon - Bill Extractor")
28
 
29
  class BillRequest(BaseModel):
30
  document: str
31
- # ---------------- FASTAPI APP ----------------
32
-
33
- app = FastAPI()
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- class BillRequest(BaseModel):
37
- """
38
- Request body model.
39
- Expects a public URL to a bill document (image/PDF).
40
- """
41
- document: str
42
 
43
 
44
  # ---------------- FALLBACK REGEX EXTRACTOR ----------------
 
28
 
29
  class BillRequest(BaseModel):
30
  document: str
31
+ # ---------------- Helpers: number normalization & detection ----------------
32
+ NUM_RE = re.compile(r"[-+]?\d{1,3}(?:[,0-9]*)(?:\.\d+)?") # matches numbers with commas, decimals
33
+ TOTAL_KEYWORDS = re.compile(r"(grand\s*total|net\s*payable|total\s*amount|amount\s*payable|bill\s*amount|final\s*amount|balance\s*due|sub\s*total|subtotal)", re.I)
34
+
35
+ def normalize_num_str(s: str) -> Optional[float]:
36
+ if s is None:
37
+ return None
38
+ s = s.strip()
39
+ # remove currency symbols, whitespace
40
+ s = re.sub(r"[^\d\-\+\,\.()]", "", s)
41
+ # remove parentheses meaning negative
42
+ negative = False
43
+ if s.startswith("(") and s.endswith(")"):
44
+ negative = True
45
+ s = s[1:-1]
46
+ s = s.replace(",", "")
47
+ if s == "":
48
+ return None
49
+ try:
50
+ val = float(s)
51
+ return -val if negative else val
52
+ except Exception:
53
+ return None
54
 
55
+ def looks_like_amount_token(token: str) -> bool:
56
+ if not token or token.strip() == "":
57
+ return False
58
+ return NUM_RE.search(token) is not None
 
 
59
 
60
 
61
  # ---------------- FALLBACK REGEX EXTRACTOR ----------------