Seth0330 commited on
Commit
788110c
·
verified ·
1 Parent(s): 1d73f48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -290,12 +290,29 @@ def extract_text_from_unstract(uploaded_file):
290
  return r.text
291
 
292
  def clean_num(val):
293
- if not val: return None
294
- val = re.sub(r"[^0-9.\-]", "", str(val))
295
- try:
296
- return float(val)
297
- except Exception:
 
 
 
 
298
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  def normalize(s):
301
  if not s: return ""
 
290
  return r.text
291
 
292
  def clean_num(val):
293
+ """
294
+ Extracts and converts a numeric value from a string.
295
+ Handles:
296
+ - Commas (e.g., "9,070.26")
297
+ - Currency symbols (e.g., "USD", "$")
298
+ - Words in the string (e.g., "Invoice Total USD 9,070.26")
299
+ - Returns None if not found.
300
+ """
301
+ if val is None:
302
  return None
303
+ if isinstance(val, (int, float)):
304
+ return float(val)
305
+ # Look for the last valid number in the string
306
+ matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
307
+ if matches:
308
+ # Remove commas and convert to float
309
+ num = matches[-1].replace(",", "")
310
+ try:
311
+ return float(num)
312
+ except Exception:
313
+ return None
314
+ return None
315
+
316
 
317
  def normalize(s):
318
  if not s: return ""