pavansuresh commited on
Commit
305182c
·
verified ·
1 Parent(s): 17db344

Update ai_mapping.py

Browse files
Files changed (1) hide show
  1. ai_mapping.py +6 -7
ai_mapping.py CHANGED
@@ -31,14 +31,13 @@ def extract_key_values_with_layoutlm(page_data: list, pdf_path: str) -> Dict[str
31
  key_values = {}
32
  # Enhanced regex patterns with flexibility
33
  dates = re.findall(r'(Agreement\s+(?:Start|End)\s+Date(?:s)?)\s*[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
34
- # Capture date anywhere with context like "executed as of" or "Effective Date"
35
- date_context = re.findall(r'(?:executed\s+as\s+of|Effective\s+Date)\s*[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
36
  amounts = re.findall(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text_data)
37
- # Refined Agreement Name near Order Form or document start
38
- name_context = re.findall(r'(?:Order\s+Form|Agreement)\s*[:\s]*([A-Za-z0-9\s-]+)(?=\s*(?:Product|Quantity|List|Net|\Z))', text_data, re.IGNORECASE)
39
- # Prioritize first meaningful name, avoiding procedural text
40
  if name_context:
41
- key_values["Agreement Name"] = next((name.strip() for name in name_context if len(name.split()) > 1 and not name.lower().startswith("no")), "Unknown")
42
  # Update key_values with matched fields
43
  for key, value in dates:
44
  key_values[key] = value
@@ -128,7 +127,7 @@ def extract_clauses(page_data: list) -> Dict[str, str]:
128
  # Target exact "NO WAIVER" text only
129
  no_waiver_match = re.search(r'NO\s+WAIVER\s*[:\s]*(.*?)(?=\n\n|\Z)', text_data, re.IGNORECASE)
130
  if no_waiver_match:
131
- clauses["NO WAIVER"] = no_waiver_match.group(1).strip()
132
  return clauses if clauses else {}
133
 
134
  def run_ai_mapping_with_layoutlm(key_values: Dict[str, str], object_field_names: List[str], pdf_path: str) -> Dict:
 
31
  key_values = {}
32
  # Enhanced regex patterns with flexibility
33
  dates = re.findall(r'(Agreement\s+(?:Start|End)\s+Date(?:s)?)\s*[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
34
+ # Capture date with line break tolerance for "executed as of: 7/5/25"
35
+ date_context = re.findall(r'(?:executed\s+as\s+of|Effective\s+Date)[\s:]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
36
  amounts = re.findall(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text_data)
37
+ # Refined Agreement Name near Order Form or document start, avoiding procedural text
38
+ name_context = re.findall(r'(?:Order\s+Form|Agreement)\s*[:\s]*([A-Za-z0-9\s-]+)(?=\s*(?:Product|Quantity|List|Net|\n\n|\Z))', text_data, re.IGNORECASE)
 
39
  if name_context:
40
+ key_values["Agreement Name"] = next((name.strip() for name in name_context if len(name.split()) > 1 and not name.lower().startswith("no purchase")), "Unknown")
41
  # Update key_values with matched fields
42
  for key, value in dates:
43
  key_values[key] = value
 
127
  # Target exact "NO WAIVER" text only
128
  no_waiver_match = re.search(r'NO\s+WAIVER\s*[:\s]*(.*?)(?=\n\n|\Z)', text_data, re.IGNORECASE)
129
  if no_waiver_match:
130
+ clauses["NO WAIVER"] = no_waiver_match.group(1).strip() if no_waiver_match.group(1).strip() else "NO WAIVER"
131
  return clauses if clauses else {}
132
 
133
  def run_ai_mapping_with_layoutlm(key_values: Dict[str, str], object_field_names: List[str], pdf_path: str) -> Dict: