Qwen-UI

Running

App Files Files Community

Ankushbl6 commited on Jan 16

Commit

8d68d78

verified ·

1 Parent(s): 3b295dc

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +33 -20

src/streamlit_app.py CHANGED Viewed

@@ -229,14 +229,18 @@ def clean_tax_percentage(x) -> float:
     except ValueError:
         return 0.0
-def clean_float(x) -> float:
     """
     Parse a number string handling both US and European formats.
     Use this for MONETARY AMOUNTS only, NOT for tax percentages.
     US Format:      1,234,567.89  (comma = thousands, period = decimal)
     European:       1.234.567,89  (period = thousands, comma = decimal)
     Examples:
         "1,234.56"    → 1234.56  (US)
         "1.234,56"    → 1234.56  (European)
@@ -296,14 +300,26 @@ def clean_float(x) -> float:
         # Only commas present
         # Check what comes after the LAST comma
         after_last_comma = s[last_comma + 1:] if last_comma < len(s) - 1 else ""
-        if comma_count == 1 and len(after_last_comma) <= 4 and after_last_comma.isdigit():
-            # Single comma with 1-4 digits after → European decimal
             # "261,49" → 261.49, "1234,5678" → 1234.5678
             s = s.replace(',', '.')
         elif len(after_last_comma) == 3 and comma_count >= 1:
-            # 3 digits after comma(s) → likely thousands separator
-            # "1,234" → 1234, "1,234,567" → 1234567
             s = s.replace(',', '')
         else:
             # Multiple commas → thousands separator
@@ -611,7 +627,7 @@ def parse_date_to_object(date_str, currency=None):
         "%m-%d-%y", "%m/%d/%y", "%m.%d.%y", "%m %d %y",
         # ISO with 2-digit year
-        "%y-%m-%d", "%y/%m/%d", "%y.%m.%d", "%y %m %y",
         # Compact formats
         "%y%m%d", "%d%m%y", "%m%d%y", "%d%m%Y", "%m%d%Y", "%Y%d%m",
@@ -652,7 +668,6 @@ def run_inference_vllm(image: Image.Image):
     # Extraction prompt (JSON format)
     EXTRACTION_PROMPT = """Please carefully examine this invoice image and extract all the information into the following structured JSON format. Pay close attention to details and ensure accuracy in number formatting and text extraction.
 Extract the data into this exact JSON structure:
 {
   "header": {
     "invoice_no": "Invoice number or reference ID",
@@ -689,8 +704,6 @@ Extract the data into this exact JSON structure:
     "currency": "Currency code (USD, EUR, etc.)"
   }
 }
 IMPORTANT GUIDELINES:
 - Extract only the bank account details matching the invoice currency.
   Example:
@@ -808,17 +821,17 @@ def parse_vllm_json(raw_json_text):
         data = json.loads(text_to_parse)
-        def clean_amount(value):
-            """Parse monetary amounts using clean_float"""
-            return clean_float(value)
         header = data.get("header", {})
         summary = data.get("summary", {})
         items = data.get("items", [])
-        # Get currency first for date parsing (USD uses MM/DD/YYYY for numeric dates)
         currency = summary.get("currency", "")
         result = {
             "Invoice Number": header.get("invoice_no", ""),
             "Invoice Date": normalize_date(header.get("invoice_date", ""), currency),
@@ -1081,8 +1094,8 @@ def map_prediction_to_ui(pred):
         return None
     def clean_number(x):
-        """Parse monetary amounts - use clean_float"""
-        return clean_float(x)
     def collect_keys(obj, out):
         if isinstance(obj, dict):

     except ValueError:
         return 0.0
+def clean_float(x, currency=None) -> float:
     """
     Parse a number string handling both US and European formats.
     Use this for MONETARY AMOUNTS only, NOT for tax percentages.
     US Format:      1,234,567.89  (comma = thousands, period = decimal)
     European:       1.234.567,89  (period = thousands, comma = decimal)
+    Currency-aware for ambiguous cases (3 digits after comma):
+        - EUR: "10,000" → 10.0 (European decimal)
+        - USD/other: "10,000" → 10000 (thousands separator)
     Examples:
         "1,234.56"    → 1234.56  (US)
         "1.234,56"    → 1234.56  (European)
         # Only commas present
         # Check what comes after the LAST comma
         after_last_comma = s[last_comma + 1:] if last_comma < len(s) - 1 else ""
+        if comma_count == 1 and len(after_last_comma) == 3 and after_last_comma.isdigit():
+            # AMBIGUOUS CASE: Single comma with exactly 3 digits after
+            # Could be US thousands ("10,000" = 10000) or European decimal ("10,000" = 10.0)
+            # Use currency to decide:
+            if currency and currency.upper() == 'EUR':
+                # European: treat comma as decimal separator
+                # "10,000" → 10.000 → 10.0
+                s = s.replace(',', '.')
+            else:
+                # USD/other: treat comma as thousands separator
+                # "10,000" → 10000
+                s = s.replace(',', '')
+        elif comma_count == 1 and len(after_last_comma) <= 4 and after_last_comma.isdigit():
+            # Single comma with 1, 2, or 4 digits after → European decimal
             # "261,49" → 261.49, "1234,5678" → 1234.5678
             s = s.replace(',', '.')
         elif len(after_last_comma) == 3 and comma_count >= 1:
+            # Multiple commas with 3 digits after last → thousands separator
+            # "1,234,567" → 1234567
             s = s.replace(',', '')
         else:
             # Multiple commas → thousands separator
         "%m-%d-%y", "%m/%d/%y", "%m.%d.%y", "%m %d %y",
         # ISO with 2-digit year
+        "%y-%m-%d", "%y/%m/%d", "%y.%m.%d", "%y %m %d",
         # Compact formats
         "%y%m%d", "%d%m%y", "%m%d%y", "%d%m%Y", "%m%d%Y", "%Y%d%m",
     # Extraction prompt (JSON format)
     EXTRACTION_PROMPT = """Please carefully examine this invoice image and extract all the information into the following structured JSON format. Pay close attention to details and ensure accuracy in number formatting and text extraction.
 Extract the data into this exact JSON structure:
 {
   "header": {
     "invoice_no": "Invoice number or reference ID",
     "currency": "Currency code (USD, EUR, etc.)"
   }
 }
 IMPORTANT GUIDELINES:
 - Extract only the bank account details matching the invoice currency.
   Example:
         data = json.loads(text_to_parse)
         header = data.get("header", {})
         summary = data.get("summary", {})
         items = data.get("items", [])
+        # Get currency first for date parsing and amount parsing
         currency = summary.get("currency", "")
+        def clean_amount(value):
+            """Parse monetary amounts using clean_float with currency awareness"""
+            return clean_float(value, currency)
         result = {
             "Invoice Number": header.get("invoice_no", ""),
             "Invoice Date": normalize_date(header.get("invoice_date", ""), currency),
         return None
     def clean_number(x):
+        """Parse monetary amounts - use clean_float with currency awareness"""
+        return clean_float(x, currency)
     def collect_keys(obj, out):
         if isinstance(obj, dict):