Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -67,8 +67,7 @@ def extract_entities(text):
|
|
| 67 |
|
| 68 |
# Flexible regex patterns to handle variations
|
| 69 |
invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)\s*[:\-\s]*)([\w-]+)"
|
| 70 |
-
|
| 71 |
-
vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From)\s*[:\-\s]*([A-Za-z\s&]+)(?=\s*(?:Invoice|Date|$|\d))"
|
| 72 |
invoice_date_pattern = r"(?:Invoice\s*Date\s*[:\-\s]*|Date\s*[:\-\s]*)(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
|
| 73 |
total_amount_pattern = r"(?:Total\s*(?:Amount|Due)?\s*[:\-\s]*\$?)([\d,]+\.?\d*)"
|
| 74 |
|
|
@@ -93,7 +92,6 @@ def extract_entities(text):
|
|
| 93 |
elif entity['entity'].startswith('I-ORG') and org_name_parts:
|
| 94 |
org_name_parts.append(entity['word'])
|
| 95 |
if org_name_parts:
|
| 96 |
-
# Clean up NER output (remove ## from subword tokens)
|
| 97 |
vendor_name = " ".join(part.replace("##", "") for part in org_name_parts)
|
| 98 |
print(f"NER Matched Vendor Name: {vendor_name}") # Debug
|
| 99 |
|
|
|
|
| 67 |
|
| 68 |
# Flexible regex patterns to handle variations
|
| 69 |
invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)\s*[:\-\s]*)([\w-]+)"
|
| 70 |
+
vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From)\s*[:\-\s]*([A-Za-z\s&\.]+)(?=\s*(?:Invoice|No\.?|Date|$|\d))"
|
|
|
|
| 71 |
invoice_date_pattern = r"(?:Invoice\s*Date\s*[:\-\s]*|Date\s*[:\-\s]*)(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
|
| 72 |
total_amount_pattern = r"(?:Total\s*(?:Amount|Due)?\s*[:\-\s]*\$?)([\d,]+\.?\d*)"
|
| 73 |
|
|
|
|
| 92 |
elif entity['entity'].startswith('I-ORG') and org_name_parts:
|
| 93 |
org_name_parts.append(entity['word'])
|
| 94 |
if org_name_parts:
|
|
|
|
| 95 |
vendor_name = " ".join(part.replace("##", "") for part in org_name_parts)
|
| 96 |
print(f"NER Matched Vendor Name: {vendor_name}") # Debug
|
| 97 |
|