Spaces:
Runtime error
Runtime error
| import re | |
| def clean_section_title(raw_title): | |
| """ | |
| Turns '2. PURCHASE PRICE (U.S. currency)' -> 'Purchase Price' | |
| """ | |
| if not raw_title: return "General Information" | |
| # Remove leading numbers/bullets (e.g., "1.", "A.") | |
| clean = re.sub(r'^[A-Z0-9]+\.\s*', '', raw_title) | |
| # Remove things in parentheses (e.g., "(U.S. Currency)") | |
| clean = re.sub(r'\s*\(.*?\)', '', clean) | |
| # Title Case | |
| return clean.strip().title() | |
| UNGROUPABLE_TYPES = ["signature", "initial"] | |
| def group_fields_by_section(fields): | |
| """ | |
| Organizes flat fields into logical groups based on the | |
| 'section' context extracted by the Vision model. | |
| """ | |
| groups_map = {} | |
| for f in fields: | |
| # Get the raw section from Vision (now populated!) | |
| if f.get("semanticType") in UNGROUPABLE_TYPES: | |
| continue | |
| raw_section = f.get("section", "General Information") | |
| group_title = clean_section_title(raw_section) | |
| # Create a stable ID for the group | |
| group_id = f"grp_{group_title.lower().replace(' ', '_')[:30]}" | |
| # Create group if not exists | |
| if group_id not in groups_map: | |
| groups_map[group_id] = { | |
| "id": group_id, | |
| "title": group_title, | |
| "fieldIds": [] | |
| } | |
| # Link field to group | |
| groups_map[group_id]["fieldIds"].append(f["id"]) | |
| # Mutate the field object to include the link | |
| f["groupId"] = group_id | |
| # Sort groups by the page/y-position of their first field | |
| sorted_groups = sorted( | |
| groups_map.values(), | |
| key=lambda g: [ | |
| next((x for x in fields if x["id"] == g["fieldIds"][0]), {}).get("page", 0), | |
| next((x for x in fields if x["id"] == g["fieldIds"][0]), {}).get("rect", {}).get("y", 0) | |
| ] | |
| ) | |
| return sorted_groups, fields |