| """
|
| backend/utils/layout_template.py
|
|
|
| INVOICE LAYOUT TEMPLATE MAPPER
|
| ================================
|
| Converts any VLM/OCR extraction result into a STANDARDIZED output where each field
|
| always appears at the same position in the document — regardless of the source invoice's
|
| language, style, or quality (including handwritten/unclear images).
|
|
|
| CONCEPT:
|
| - Real invoices all share the same SPATIAL ZONES (top-left=vendor, top-right=invoice#, etc.)
|
| - VLM extracts raw field values from image
|
| - This module maps those values onto a fixed positional template
|
| - Output is always consistent: same field names, same order, same structure
|
|
|
| ADVANTAGE FOR HANDWRITTEN/UNCLEAR IMAGES:
|
| - Even if OCR reads garbled text, VLM can infer fields from position
|
| - Template forces a complete output with all expected fields (empty if not found)
|
| - Downstream systems always receive the same schema regardless of image quality
|
|
|
| STANDARD INVOICE ZONES (spatial reference):
|
| ┌─────────────────────────────────────────────────┐
|
| │ [TOP-LEFT] │ [TOP-RIGHT] │
|
| │ Vendor Name │ Invoice Number │
|
| │ Vendor Address │ Invoice Date │
|
| │ Vendor GSTIN │ Due Date │
|
| ├─────────────────────────────────────────────────┤
|
| │ [MID-LEFT] │ [MID-RIGHT] │
|
| │ Bill To (Buyer) │ Ship To │
|
| │ Buyer Address │ PO Number │
|
| │ Buyer GSTIN │ │
|
| ├─────────────────────────────────────────────────┤
|
| │ [CENTER - TABLE] │
|
| │ Item | Qty | Rate | HSN | Tax | Amount │
|
| ├─────────────────────────────────────────────────┤
|
| │ [BOTTOM-RIGHT] │
|
| │ Subtotal / Taxable Amount │
|
| │ CGST / SGST / IGST │
|
| │ Total Amount │
|
| └─────────────────────────────────────────────────┘
|
| """
|
| from __future__ import annotations
|
| import logging
|
| from typing import Any
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
| INVOICE_TEMPLATE: list[tuple[str, str, list[str]]] = [
|
|
|
| ("vendor_name", "top_left", ["vendor/shop name", "vendor", "seller", "from", "company", "firm", "shop", "store", "biller", "issued_by", "supplier"]),
|
| ("vendor_address", "top_left", ["address", "vendor_address", "seller_address", "from_address", "office_address"]),
|
| ("vendor_gstin", "top_left", ["phone / gstin / tax id", "gstin", "gst", "gst_number", "gstin_number", "vendor_gstin", "seller_gstin", "tax_id"]),
|
| ("vendor_phone", "top_left", ["phone / gstin / tax id", "phone", "mobile", "contact", "tel", "telephone", "vendor_phone"]),
|
| ("vendor_email", "top_left", ["email", "e-mail", "vendor_email"]),
|
|
|
|
|
| ("invoice_number", "top_right", ["bill no / customer id", "invoice_number", "invoice_no", "bill_number", "bill_no", "receipt_no", "challan_no", "ref_no"]),
|
| ("invoice_date", "top_right", ["date", "invoice_date", "bill_date", "issued_date", "dated"]),
|
| ("due_date", "top_right", ["due_date", "payment_due", "pay_by"]),
|
| ("po_number", "top_right", ["po_number", "purchase_order", "order_number", "po_no"]),
|
|
|
|
|
| ("buyer_name", "mid_left", ["buyer", "bill_to", "customer", "client", "consignee", "to", "billed_to", "sold_to"]),
|
| ("buyer_address", "mid_left", ["buyer_address", "bill_to_address", "customer_address", "delivery_address"]),
|
| ("buyer_gstin", "mid_left", ["buyer_gstin", "customer_gstin", "bill_to_gstin"]),
|
|
|
|
|
| ("items", "center", ["items", "line_items", "products", "goods", "services", "description", "particulars"]),
|
|
|
|
|
| ("subtotal", "bottom_right", ["subtotal", "taxable_amount", "taxable_value", "net_amount", "before_tax"]),
|
| ("cgst", "bottom_right", ["cgst", "central_gst", "cgst_amount"]),
|
| ("sgst", "bottom_right", ["sgst", "state_gst", "sgst_amount"]),
|
| ("igst", "bottom_right", ["igst", "integrated_gst", "igst_amount"]),
|
| ("total_tax", "bottom_right", ["total_tax", "tax_total", "gst_total", "vat"]),
|
| ("total_amount", "bottom_right", ["total amount", "total", "grand_total", "total_amount", "net_total", "amount_due", "bill_total", "payable", "final_amount"]),
|
| ("amount_in_words", "bottom_right", ["amount_in_words", "in_words", "rupees_in_words", "total_words"]),
|
|
|
|
|
| ("bank_name", "bottom", ["bank", "bank_name"]),
|
| ("account_number", "bottom", ["account_number", "account_no", "acc_no", "bank_account"]),
|
| ("ifsc", "bottom", ["ifsc", "ifsc_code", "bank_ifsc"]),
|
| ("upi", "bottom", ["upi", "upi_id", "gpay", "phonepe"]),
|
|
|
|
|
| ("notes", "footer", ["notes", "terms", "conditions", "remarks", "note"]),
|
| ]
|
|
|
|
|
| ZONE_ORDER = ["top_left", "top_right", "mid_left", "center", "bottom_right", "bottom", "footer"]
|
| ZONE_LABELS = {
|
| "top_left": "VENDOR / SELLER",
|
| "top_right": "INVOICE DETAILS",
|
| "mid_left": "BUYER / BILL TO",
|
| "center": "LINE ITEMS",
|
| "bottom_right": "TAX & TOTALS",
|
| "bottom": "PAYMENT INFO",
|
| "footer": "NOTES & TERMS",
|
| }
|
|
|
|
|
| def _normalize_key(key: str) -> str:
|
| """Normalize a key for fuzzy alias matching."""
|
| return key.lower().strip().replace(" ", "_").replace("-", "_").replace(".", "")
|
|
|
|
|
| def _flatten_dict(d: dict[str, Any], parent_key: str = '') -> dict[str, Any]:
|
| items: list[tuple[str, Any]] = []
|
| if isinstance(d, list):
|
| return {parent_key: d}
|
| for k, v in d.items():
|
| new_key = f"{parent_key}_{k}" if parent_key else k
|
| if isinstance(v, dict):
|
| items.extend(_flatten_dict(v, new_key).items())
|
| else:
|
| items.append((new_key, v))
|
| return dict(items)
|
|
|
| def _find_value(raw: dict[str, Any], aliases: list[str]) -> Any:
|
| """
|
| Search the raw extraction dict for a value matching any of the given aliases.
|
| Handles nested dicts and case-insensitive keys.
|
| Returns the first match found, or None.
|
| """
|
| if not isinstance(raw, dict): return None
|
| flat_raw = _flatten_dict(raw)
|
|
|
|
|
| norm_map = {_normalize_key(k): v for k, v in flat_raw.items()}
|
|
|
| for alias in aliases:
|
| norm_alias = _normalize_key(alias)
|
|
|
| if norm_alias in norm_map:
|
| val = norm_map[norm_alias]
|
| if val and str(val).strip():
|
| return val
|
|
|
| for norm_k, v in norm_map.items():
|
| if (norm_alias in norm_k or norm_k in norm_alias) and v and str(v).strip():
|
| return v
|
|
|
|
|
| full_text = raw.get("full_extraction", "")
|
| return None
|
|
|
|
|
| def _extract_from_full_text(full_text: str, aliases: list[str]) -> str | None:
|
| """
|
| When raw extraction has a 'full_extraction' text blob (not structured),
|
| try to find a field value by looking for known label patterns.
|
| e.g., 'Invoice No: 1234' → '1234'
|
| """
|
| if not full_text:
|
| return None
|
| import re
|
| for alias in aliases:
|
|
|
| pattern = rf'(?i){re.escape(alias.replace("_", "[ _-]"))}[\s:.\-]+([^\n]+)'
|
| match = re.search(pattern, full_text)
|
| if match:
|
| val = match.group(1).strip().rstrip(",;")
|
| if val:
|
| return val
|
| return None
|
|
|
|
|
| def _parse_markdown_table(full_text: str) -> list[dict[str, str]]:
|
| """
|
| Parse a markdown table from the Digital Twin text into a list of dicts.
|
| Look for | Header | style lines.
|
| """
|
| import re
|
| lines = full_text.splitlines()
|
| table_lines = [l.strip() for l in lines if l.strip().startswith("|")]
|
|
|
| if len(table_lines) < 3:
|
| return []
|
|
|
|
|
| try:
|
| header_raw = table_lines[0].strip("|").split("|")
|
| headers = [h.strip().lower() for h in header_raw]
|
|
|
| rows = []
|
| for line in table_lines[2:]:
|
| if "---" in line: continue
|
| cells = [c.strip() for c in line.strip("|").split("|")]
|
| if len(cells) >= len(headers):
|
| row = {headers[i]: cells[i] for i in range(len(headers))}
|
| rows.append(row)
|
| return rows
|
| except Exception:
|
| return []
|
|
|
|
|
| def map_to_standard_template(raw_extraction: dict[str, Any]) -> dict[str, Any]:
|
| """
|
| Map any raw VLM extraction result to the standard invoice template.
|
|
|
| Args:
|
| raw_extraction: Raw dict from VLM (any key names, any structure)
|
|
|
| Returns:
|
| Standardized dict with canonical field names, always same structure.
|
| Empty string for fields not found.
|
| """
|
|
|
| full_text = raw_extraction.get("full_extraction", "")
|
| is_text_blob = bool(full_text) and len(raw_extraction) <= 4
|
|
|
| result: dict[str, Any] = {}
|
| matched_count = 0
|
|
|
| for field_name, zone, aliases in INVOICE_TEMPLATE:
|
| val = None
|
| if field_name == "items" and full_text:
|
|
|
| val = _parse_markdown_table(full_text)
|
|
|
| if not val and not is_text_blob:
|
| val = _find_value(raw_extraction, aliases)
|
|
|
| if not val and full_text:
|
| val = _extract_from_full_text(full_text, aliases + [field_name])
|
|
|
| result[field_name] = val or ""
|
| if val:
|
| matched_count += 1
|
|
|
|
|
| if matched_count == 0 and full_text:
|
| result["_raw_text"] = full_text
|
| logger.info("[layout_template] No structured fields found. Preserving raw text blob.")
|
| else:
|
| logger.info("[layout_template] Mapped %d/%d fields from extraction.", matched_count, len(INVOICE_TEMPLATE))
|
|
|
| result["_template_version"] = "invoice_v1"
|
| return result
|
|
|
|
|
| def format_standardized_output(mapped: dict[str, Any]) -> str:
|
| """
|
| Format the mapped fields into a human-readable standardized text output.
|
| Fields always appear in the same zone order — consistent across all invoices.
|
| Empty fields are shown as '—' to maintain the template structure.
|
| """
|
| lines = ["=" * 60, " STANDARDIZED INVOICE EXTRACTION", "=" * 60]
|
|
|
|
|
| zone_fields: dict[str, list[tuple[str, str, Any]]] = {z: [] for z in ZONE_ORDER}
|
| for field_name, zone, aliases in INVOICE_TEMPLATE:
|
| val = mapped.get(field_name, "")
|
| zone_fields[zone].append((field_name, aliases[0].replace("_", " ").title(), val))
|
|
|
| for zone in ZONE_ORDER:
|
| fields = zone_fields[zone]
|
| if not fields:
|
| continue
|
|
|
| if all(not v for _, _, v in fields):
|
| continue
|
| lines.append(f"\n[{ZONE_LABELS[zone]}]")
|
| lines.append("-" * 40)
|
| for field_name, label, val in fields:
|
| display_val = str(val).strip() if val else "\u2014"
|
| lines.append(f" {label:<25} {display_val}")
|
|
|
|
|
| raw_text = mapped.get("_raw_text", "")
|
| if raw_text:
|
| lines.append("\n[RAW EXTRACTED TEXT]")
|
| lines.append("-" * 40)
|
| lines.append(raw_text[:2000])
|
|
|
| lines.append("\n" + "=" * 60)
|
| return "\n".join(lines)
|
|
|