Abhisesh7 commited on
Commit
fc14cba
·
verified ·
1 Parent(s): d129694

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -3
app.py CHANGED
@@ -67,8 +67,7 @@ def extract_entities(text):
67
 
68
  # Flexible regex patterns to handle variations
69
  invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)\s*[:\-\s]*)([\w-]+)"
70
- # Simplified vendor pattern to capture only the name, stopping at specific patterns
71
- vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From)\s*[:\-\s]*([A-Za-z\s&]+)(?=\s*(?:Invoice|Date|$|\d))"
72
  invoice_date_pattern = r"(?:Invoice\s*Date\s*[:\-\s]*|Date\s*[:\-\s]*)(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
73
  total_amount_pattern = r"(?:Total\s*(?:Amount|Due)?\s*[:\-\s]*\$?)([\d,]+\.?\d*)"
74
 
@@ -93,7 +92,6 @@ def extract_entities(text):
93
  elif entity['entity'].startswith('I-ORG') and org_name_parts:
94
  org_name_parts.append(entity['word'])
95
  if org_name_parts:
96
- # Clean up NER output (remove ## from subword tokens)
97
  vendor_name = " ".join(part.replace("##", "") for part in org_name_parts)
98
  print(f"NER Matched Vendor Name: {vendor_name}") # Debug
99
 
 
67
 
68
  # Flexible regex patterns to handle variations
69
  invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)\s*[:\-\s]*)([\w-]+)"
70
+ vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From)\s*[:\-\s]*([A-Za-z\s&\.]+)(?=\s*(?:Invoice|No\.?|Date|$|\d))"
 
71
  invoice_date_pattern = r"(?:Invoice\s*Date\s*[:\-\s]*|Date\s*[:\-\s]*)(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
72
  total_amount_pattern = r"(?:Total\s*(?:Amount|Due)?\s*[:\-\s]*\$?)([\d,]+\.?\d*)"
73
 
 
92
  elif entity['entity'].startswith('I-ORG') and org_name_parts:
93
  org_name_parts.append(entity['word'])
94
  if org_name_parts:
 
95
  vendor_name = " ".join(part.replace("##", "") for part in org_name_parts)
96
  print(f"NER Matched Vendor Name: {vendor_name}") # Debug
97