#!/usr/bin/env python3 """Generate a DELIBERATELY HARD invoice image to stress the OCR backends: • rotated vertical "INVOICE" banner + sideways Terms block (orientation problems) • a diagonal translucent "ORIGINAL COPY" watermark overlapping text • scattered header fields (invoice #, dates, vendor, bill-to in offset boxes) • a misaligned line-item table (inconsistent column alignment) • totals scattered across corners (subtotal / tax / shipping / grand total / balance due) This is the kind of layout where classic OCR (Tesseract) struggles but a vision LLM (MiniCPM-V) reads it correctly. Writes: backend/evals/datasets/complex_invoice_messy.png backend/evals/datasets/complex_invoice_messy.gt.json (skip_eval — showcase only) No .txt sidecar on purpose — this document REQUIRES a real OCR engine. """ from __future__ import annotations import json from pathlib import Path from PIL import Image, ImageDraw, ImageFont ROOT = Path(__file__).resolve().parent.parent OUT = ROOT / "backend" / "evals" / "datasets" W, H = 1240, 1600 GT = { "doc_type": "invoice", "invoice_number": "INV-9X-44821", "issue_date": "2026-04-18", "due_date": "2026-05-18", "vendor_name": "Meridian Industrial Components Ltd", "bill_to_name": "Aperture Retail Group", "currency": "USD", "subtotal": 9111.50, "tax_amount": 751.70, "total": 10043.20, "line_items": [ {"description": "Hydraulic pump HX-220", "quantity": 4, "unit_price": 1250.00, "line_total": 5000.00}, {"description": "Seal kit (set of 12)", "quantity": 10, "unit_price": 85.50, "line_total": 855.00}, {"description": "Pressure gauge 0-300psi", "quantity": 6, "unit_price": 142.75, "line_total": 856.50}, {"description": "Installation labor", "quantity": 1, "unit_price": 2400.00, "line_total": 2400.00}, ], "_meta": {"doc_type": "invoice", "channel": "scanned", "difficulty": "complex_layout", "skip_eval": True}, } def font(sz, bold=False): for p in ([ "/System/Library/Fonts/Supplemental/Arial Bold.ttf" if bold else "/System/Library/Fonts/Supplemental/Arial.ttf", "/System/Library/Fonts/Helvetica.ttc", "/Library/Fonts/Arial.ttf", ]): try: return ImageFont.truetype(p, sz) except Exception: continue return ImageFont.load_default() def rotated(base, text, xy, angle, fnt, fill=(20, 20, 20)): tmp = Image.new("RGBA", (max(20, len(text) * fnt.size), fnt.size + 16), (0, 0, 0, 0)) ImageDraw.Draw(tmp).text((2, 2), text, font=fnt, fill=fill) tmp = tmp.rotate(angle, expand=True) base.paste(tmp, xy, tmp) def main(): OUT.mkdir(parents=True, exist_ok=True) img = Image.new("RGB", (W, H), "white") d = ImageDraw.Draw(img) # --- diagonal translucent watermark overlapping content --- wm = Image.new("RGBA", (W, H), (0, 0, 0, 0)) wd = ImageDraw.Draw(wm) wd.text((140, 120), "ORIGINAL COPY", font=font(120, True), fill=(150, 150, 150, 60)) wm = wm.rotate(28, center=(W // 2, H // 2)) img.paste(wm, (0, 0), wm) # --- left vertical INVOICE banner (rotated 90) --- d.rectangle([18, 60, 96, 760], fill=(28, 40, 80)) rotated(img, "INVOICE", (24, 470), 90, font(54, True), fill=(255, 255, 255)) # --- scattered header fields (offset boxes, inconsistent placement) --- d.text((900, 70), "Invoice No.", font=font(20, True), fill=(90, 90, 90)) d.text((900, 98), GT["invoice_number"], font=font(26, True), fill=(10, 10, 10)) d.text((690, 150), f"Issued: {GT['issue_date']}", font=font(20), fill=(10, 10, 10)) d.text((980, 200), f"Due {GT['due_date']}", font=font(20), fill=(160, 30, 30)) # offset, different spot # Balance due repeated near top in red (inconsistent) d.text((620, 60), "BALANCE DUE $10,043.20", font=font(26, True), fill=(190, 20, 20)) # vendor block (top-left, after banner) and bill-to (offset right-middle) d.rectangle([130, 90, 560, 220], outline=(120, 120, 120), width=2) d.text((145, 100), "FROM / Remit to:", font=font(18, True), fill=(70, 70, 70)) d.text((145, 128), GT["vendor_name"], font=font(22, True), fill=(10, 10, 10)) d.text((145, 160), "Unit 7, Kvaerner Estate", font=font(18), fill=(40, 40, 40)) d.text((145, 184), "VAT GB-882-114", font=font(18), fill=(40, 40, 40)) d.rectangle([640, 300, 1080, 420], outline=(120, 120, 120), width=2) d.text((655, 310), "Bill To", font=font(18, True), fill=(70, 70, 70)) d.text((655, 338), GT["bill_to_name"], font=font(22, True), fill=(10, 10, 10)) d.text((655, 372), "Accounts Payable, Floor 3", font=font(18), fill=(40, 40, 40)) d.text((655, 396), "PO ref: PO-77-3391", font=font(18), fill=(40, 40, 40)) # --- misaligned line-item table --- ty = 470 d.line([130, ty - 10, 1110, ty - 10], fill=(40, 40, 40), width=2) # headers placed inconsistently (not above their columns) d.text((150, ty), "Item / Description", font=font(20, True), fill=(20, 20, 20)) d.text((720, ty), "Unit", font=font(20, True), fill=(20, 20, 20)) d.text((640, ty), "Qty", font=font(20, True), fill=(20, 20, 20)) d.text((980, ty), "Amount", font=font(20, True), fill=(20, 20, 20)) ry = ty + 44 for i, it in enumerate(GT["line_items"], 1): d.text((150, ry), f"{i}. {it['description']}", font=font(20), fill=(15, 15, 15)) # qty/unit/amount with inconsistent alignment (left vs right vs centered) d.text((648, ry), str(int(it["quantity"])), font=font(20), fill=(15, 15, 15)) d.text((700, ry), f"${it['unit_price']:,.2f}", font=font(20), fill=(15, 15, 15)) amt = f"${it['line_total']:,.2f}" d.text((1090 - d.textlength(amt, font=font(20)), ry), amt, font=font(20), fill=(15, 15, 15)) ry += 46 d.line([130, ry + 6, 1110, ry + 6], fill=(40, 40, 40), width=1) # --- totals scattered across corners --- d.text((180, ry + 60), "Tax @ 8.25%", font=font(20), fill=(15, 15, 15)) # bottom-left d.text((320, ry + 60), "$751.70", font=font(20), fill=(15, 15, 15)) d.text((640, ry + 30), "Sub-total", font=font(20), fill=(15, 15, 15)) # middle d.text((780, ry + 30), "$9,111.50", font=font(20), fill=(15, 15, 15)) d.text((640, ry + 90), "Freight/Shipping", font=font(20), fill=(15, 15, 15)) d.text((820, ry + 90), "$180.00", font=font(20), fill=(15, 15, 15)) # grand total in a bold box, bottom-right d.rectangle([800, ry + 140, 1110, ry + 210], fill=(28, 40, 80)) d.text((820, ry + 150), "GRAND TOTAL", font=font(20, True), fill=(255, 255, 255)) d.text((820, ry + 178), "$10,043.20 USD", font=font(24, True), fill=(255, 255, 255)) # --- sideways Terms & Conditions block (rotated 90) on the right edge --- rotated(img, "Terms: Net 30 days. Late fee 1.5%/mo. Goods remain property of seller until paid.", (1150, 360), 90, font(18), fill=(90, 90, 90)) # footer note (two-column-ish) d.text((150, H - 120), "Notes: Partial back-order on item 3 may apply.", font=font(18), fill=(60, 60, 60)) d.text((150, H - 92), "Remittance: SWIFT MERIGB2L / IBAN GB22 MERI 0099 8812", font=font(18), fill=(60, 60, 60)) out_png = OUT / "complex_invoice_messy.png" img.save(out_png) (OUT / "complex_invoice_messy.gt.json").write_text(json.dumps(GT, indent=2)) print(f"✓ wrote {out_png} ({W}x{H})") print(f"✓ wrote {OUT/'complex_invoice_messy.gt.json'}") if __name__ == "__main__": main()