import re

def clean_section_title(raw_title):
    """
    Turns '2. PURCHASE PRICE (U.S. currency)' -> 'Purchase Price'
    """
    if not raw_title: return "General Information"
    
    # Remove leading numbers/bullets (e.g., "1.", "A.")
    clean = re.sub(r'^[A-Z0-9]+\.\s*', '', raw_title)
    # Remove things in parentheses (e.g., "(U.S. Currency)")
    clean = re.sub(r'\s*\(.*?\)', '', clean)
    # Title Case
    return clean.strip().title()

UNGROUPABLE_TYPES = ["signature", "initial"]

def group_fields_by_section(fields):
    """
    Organizes flat fields into logical groups based on the 
    'section' context extracted by the Vision model.
    """
    groups_map = {}
    
    for f in fields:
        # Get the raw section from Vision (now populated!)
        if f.get("semanticType") in UNGROUPABLE_TYPES:
            continue

        raw_section = f.get("section", "General Information")
        group_title = clean_section_title(raw_section)
        
        # Create a stable ID for the group
        group_id = f"grp_{group_title.lower().replace(' ', '_')[:30]}"
        
        # Create group if not exists
        if group_id not in groups_map:
            groups_map[group_id] = {
                "id": group_id,
                "title": group_title,
                "fieldIds": []
            }
        
        # Link field to group
        groups_map[group_id]["fieldIds"].append(f["id"])
        
        # Mutate the field object to include the link
        f["groupId"] = group_id

    # Sort groups by the page/y-position of their first field
    sorted_groups = sorted(
        groups_map.values(), 
        key=lambda g: [
            next((x for x in fields if x["id"] == g["fieldIds"][0]), {}).get("page", 0),
            next((x for x in fields if x["id"] == g["fieldIds"][0]), {}).get("rect", {}).get("y", 0)
        ]
    )
    
    return sorted_groups, fields