Spaces:
Sleeping
Sleeping
Update ai_mapping.py
Browse files- ai_mapping.py +6 -7
ai_mapping.py
CHANGED
|
@@ -31,14 +31,13 @@ def extract_key_values_with_layoutlm(page_data: list, pdf_path: str) -> Dict[str
|
|
| 31 |
key_values = {}
|
| 32 |
# Enhanced regex patterns with flexibility
|
| 33 |
dates = re.findall(r'(Agreement\s+(?:Start|End)\s+Date(?:s)?)\s*[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
|
| 34 |
-
# Capture date
|
| 35 |
-
date_context = re.findall(r'(?:executed\s+as\s+of|Effective\s+Date)\s
|
| 36 |
amounts = re.findall(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text_data)
|
| 37 |
-
# Refined Agreement Name near Order Form or document start
|
| 38 |
-
name_context = re.findall(r'(?:Order\s+Form|Agreement)\s*[:\s]*([A-Za-z0-9\s-]+)(?=\s*(?:Product|Quantity|List|Net|\Z))', text_data, re.IGNORECASE)
|
| 39 |
-
# Prioritize first meaningful name, avoiding procedural text
|
| 40 |
if name_context:
|
| 41 |
-
key_values["Agreement Name"] = next((name.strip() for name in name_context if len(name.split()) > 1 and not name.lower().startswith("no")), "Unknown")
|
| 42 |
# Update key_values with matched fields
|
| 43 |
for key, value in dates:
|
| 44 |
key_values[key] = value
|
|
@@ -128,7 +127,7 @@ def extract_clauses(page_data: list) -> Dict[str, str]:
|
|
| 128 |
# Target exact "NO WAIVER" text only
|
| 129 |
no_waiver_match = re.search(r'NO\s+WAIVER\s*[:\s]*(.*?)(?=\n\n|\Z)', text_data, re.IGNORECASE)
|
| 130 |
if no_waiver_match:
|
| 131 |
-
clauses["NO WAIVER"] = no_waiver_match.group(1).strip()
|
| 132 |
return clauses if clauses else {}
|
| 133 |
|
| 134 |
def run_ai_mapping_with_layoutlm(key_values: Dict[str, str], object_field_names: List[str], pdf_path: str) -> Dict:
|
|
|
|
| 31 |
key_values = {}
|
| 32 |
# Enhanced regex patterns with flexibility
|
| 33 |
dates = re.findall(r'(Agreement\s+(?:Start|End)\s+Date(?:s)?)\s*[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
|
| 34 |
+
# Capture date with line break tolerance for "executed as of: 7/5/25"
|
| 35 |
+
date_context = re.findall(r'(?:executed\s+as\s+of|Effective\s+Date)[\s:]*(\d{1,2}[/-]\d{1,2}[/-]\d{4})', text_data, re.IGNORECASE)
|
| 36 |
amounts = re.findall(r'\$\d{1,3}(?:,\d{3})*(?:\.\d{2})?', text_data)
|
| 37 |
+
# Refined Agreement Name near Order Form or document start, avoiding procedural text
|
| 38 |
+
name_context = re.findall(r'(?:Order\s+Form|Agreement)\s*[:\s]*([A-Za-z0-9\s-]+)(?=\s*(?:Product|Quantity|List|Net|\n\n|\Z))', text_data, re.IGNORECASE)
|
|
|
|
| 39 |
if name_context:
|
| 40 |
+
key_values["Agreement Name"] = next((name.strip() for name in name_context if len(name.split()) > 1 and not name.lower().startswith("no purchase")), "Unknown")
|
| 41 |
# Update key_values with matched fields
|
| 42 |
for key, value in dates:
|
| 43 |
key_values[key] = value
|
|
|
|
| 127 |
# Target exact "NO WAIVER" text only
|
| 128 |
no_waiver_match = re.search(r'NO\s+WAIVER\s*[:\s]*(.*?)(?=\n\n|\Z)', text_data, re.IGNORECASE)
|
| 129 |
if no_waiver_match:
|
| 130 |
+
clauses["NO WAIVER"] = no_waiver_match.group(1).strip() if no_waiver_match.group(1).strip() else "NO WAIVER"
|
| 131 |
return clauses if clauses else {}
|
| 132 |
|
| 133 |
def run_ai_mapping_with_layoutlm(key_values: Dict[str, str], object_field_names: List[str], pdf_path: str) -> Dict:
|