Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -291,29 +291,31 @@ def extract_text_from_unstract(uploaded_file):
|
|
| 291 |
|
| 292 |
def clean_num(val):
|
| 293 |
"""
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
-
|
| 297 |
-
-
|
| 298 |
-
-
|
| 299 |
-
- Returns None if not found.
|
| 300 |
"""
|
| 301 |
if val is None:
|
| 302 |
return None
|
| 303 |
if isinstance(val, (int, float)):
|
| 304 |
return float(val)
|
| 305 |
-
#
|
| 306 |
matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
|
| 307 |
if matches:
|
| 308 |
-
#
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
|
|
|
|
|
|
| 314 |
return None
|
| 315 |
|
| 316 |
|
|
|
|
| 317 |
def normalize(s):
|
| 318 |
if not s: return ""
|
| 319 |
return re.sub(r"\W+", "", str(s).lower().strip())
|
|
|
|
| 291 |
|
| 292 |
def clean_num(val):
|
| 293 |
"""
|
| 294 |
+
Extract the most relevant numeric value from a string (currency, label, commas, etc.).
|
| 295 |
+
Examples:
|
| 296 |
+
- 'Invoice Total USD 9,070.26' -> 9070.26
|
| 297 |
+
- '$194.41' -> 194.41
|
| 298 |
+
- 194.41 -> 194.41
|
|
|
|
| 299 |
"""
|
| 300 |
if val is None:
|
| 301 |
return None
|
| 302 |
if isinstance(val, (int, float)):
|
| 303 |
return float(val)
|
| 304 |
+
# Find *all* numbers in the string (with commas, decimals, etc.)
|
| 305 |
matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
|
| 306 |
if matches:
|
| 307 |
+
# Pick the number with the most digits after removing commas
|
| 308 |
+
cleaned = [m.replace(',', '') for m in matches if m]
|
| 309 |
+
if cleaned:
|
| 310 |
+
# Return the largest float (usually the total)
|
| 311 |
+
as_floats = [float(c) for c in cleaned if c.replace('.', '', 1).isdigit()]
|
| 312 |
+
if as_floats:
|
| 313 |
+
# Pick the biggest one (most likely to be the invoice total)
|
| 314 |
+
return max(as_floats)
|
| 315 |
return None
|
| 316 |
|
| 317 |
|
| 318 |
+
|
| 319 |
def normalize(s):
|
| 320 |
if not s: return ""
|
| 321 |
return re.sub(r"\W+", "", str(s).lower().strip())
|