| |
| """Generate a DELIBERATELY HARD invoice image to stress the OCR backends: |
| • rotated vertical "INVOICE" banner + sideways Terms block (orientation problems) |
| • a diagonal translucent "ORIGINAL COPY" watermark overlapping text |
| • scattered header fields (invoice #, dates, vendor, bill-to in offset boxes) |
| • a misaligned line-item table (inconsistent column alignment) |
| • totals scattered across corners (subtotal / tax / shipping / grand total / balance due) |
| |
| This is the kind of layout where classic OCR (Tesseract) struggles but a vision |
| LLM (MiniCPM-V) reads it correctly. Writes: |
| backend/evals/datasets/complex_invoice_messy.png |
| backend/evals/datasets/complex_invoice_messy.gt.json (skip_eval — showcase only) |
| No .txt sidecar on purpose — this document REQUIRES a real OCR engine. |
| """ |
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| from PIL import Image, ImageDraw, ImageFont |
|
|
| ROOT = Path(__file__).resolve().parent.parent |
| OUT = ROOT / "backend" / "evals" / "datasets" |
| W, H = 1240, 1600 |
|
|
| GT = { |
| "doc_type": "invoice", |
| "invoice_number": "INV-9X-44821", |
| "issue_date": "2026-04-18", |
| "due_date": "2026-05-18", |
| "vendor_name": "Meridian Industrial Components Ltd", |
| "bill_to_name": "Aperture Retail Group", |
| "currency": "USD", |
| "subtotal": 9111.50, |
| "tax_amount": 751.70, |
| "total": 10043.20, |
| "line_items": [ |
| {"description": "Hydraulic pump HX-220", "quantity": 4, "unit_price": 1250.00, "line_total": 5000.00}, |
| {"description": "Seal kit (set of 12)", "quantity": 10, "unit_price": 85.50, "line_total": 855.00}, |
| {"description": "Pressure gauge 0-300psi", "quantity": 6, "unit_price": 142.75, "line_total": 856.50}, |
| {"description": "Installation labor", "quantity": 1, "unit_price": 2400.00, "line_total": 2400.00}, |
| ], |
| "_meta": {"doc_type": "invoice", "channel": "scanned", "difficulty": "complex_layout", "skip_eval": True}, |
| } |
|
|
|
|
| def font(sz, bold=False): |
| for p in ([ |
| "/System/Library/Fonts/Supplemental/Arial Bold.ttf" if bold else "/System/Library/Fonts/Supplemental/Arial.ttf", |
| "/System/Library/Fonts/Helvetica.ttc", |
| "/Library/Fonts/Arial.ttf", |
| ]): |
| try: |
| return ImageFont.truetype(p, sz) |
| except Exception: |
| continue |
| return ImageFont.load_default() |
|
|
|
|
| def rotated(base, text, xy, angle, fnt, fill=(20, 20, 20)): |
| tmp = Image.new("RGBA", (max(20, len(text) * fnt.size), fnt.size + 16), (0, 0, 0, 0)) |
| ImageDraw.Draw(tmp).text((2, 2), text, font=fnt, fill=fill) |
| tmp = tmp.rotate(angle, expand=True) |
| base.paste(tmp, xy, tmp) |
|
|
|
|
| def main(): |
| OUT.mkdir(parents=True, exist_ok=True) |
| img = Image.new("RGB", (W, H), "white") |
| d = ImageDraw.Draw(img) |
|
|
| |
| wm = Image.new("RGBA", (W, H), (0, 0, 0, 0)) |
| wd = ImageDraw.Draw(wm) |
| wd.text((140, 120), "ORIGINAL COPY", font=font(120, True), fill=(150, 150, 150, 60)) |
| wm = wm.rotate(28, center=(W // 2, H // 2)) |
| img.paste(wm, (0, 0), wm) |
|
|
| |
| d.rectangle([18, 60, 96, 760], fill=(28, 40, 80)) |
| rotated(img, "INVOICE", (24, 470), 90, font(54, True), fill=(255, 255, 255)) |
|
|
| |
| d.text((900, 70), "Invoice No.", font=font(20, True), fill=(90, 90, 90)) |
| d.text((900, 98), GT["invoice_number"], font=font(26, True), fill=(10, 10, 10)) |
| d.text((690, 150), f"Issued: {GT['issue_date']}", font=font(20), fill=(10, 10, 10)) |
| d.text((980, 200), f"Due {GT['due_date']}", font=font(20), fill=(160, 30, 30)) |
| |
| d.text((620, 60), "BALANCE DUE $10,043.20", font=font(26, True), fill=(190, 20, 20)) |
|
|
| |
| d.rectangle([130, 90, 560, 220], outline=(120, 120, 120), width=2) |
| d.text((145, 100), "FROM / Remit to:", font=font(18, True), fill=(70, 70, 70)) |
| d.text((145, 128), GT["vendor_name"], font=font(22, True), fill=(10, 10, 10)) |
| d.text((145, 160), "Unit 7, Kvaerner Estate", font=font(18), fill=(40, 40, 40)) |
| d.text((145, 184), "VAT GB-882-114", font=font(18), fill=(40, 40, 40)) |
|
|
| d.rectangle([640, 300, 1080, 420], outline=(120, 120, 120), width=2) |
| d.text((655, 310), "Bill To", font=font(18, True), fill=(70, 70, 70)) |
| d.text((655, 338), GT["bill_to_name"], font=font(22, True), fill=(10, 10, 10)) |
| d.text((655, 372), "Accounts Payable, Floor 3", font=font(18), fill=(40, 40, 40)) |
| d.text((655, 396), "PO ref: PO-77-3391", font=font(18), fill=(40, 40, 40)) |
|
|
| |
| ty = 470 |
| d.line([130, ty - 10, 1110, ty - 10], fill=(40, 40, 40), width=2) |
| |
| d.text((150, ty), "Item / Description", font=font(20, True), fill=(20, 20, 20)) |
| d.text((720, ty), "Unit", font=font(20, True), fill=(20, 20, 20)) |
| d.text((640, ty), "Qty", font=font(20, True), fill=(20, 20, 20)) |
| d.text((980, ty), "Amount", font=font(20, True), fill=(20, 20, 20)) |
| ry = ty + 44 |
| for i, it in enumerate(GT["line_items"], 1): |
| d.text((150, ry), f"{i}. {it['description']}", font=font(20), fill=(15, 15, 15)) |
| |
| d.text((648, ry), str(int(it["quantity"])), font=font(20), fill=(15, 15, 15)) |
| d.text((700, ry), f"${it['unit_price']:,.2f}", font=font(20), fill=(15, 15, 15)) |
| amt = f"${it['line_total']:,.2f}" |
| d.text((1090 - d.textlength(amt, font=font(20)), ry), amt, font=font(20), fill=(15, 15, 15)) |
| ry += 46 |
| d.line([130, ry + 6, 1110, ry + 6], fill=(40, 40, 40), width=1) |
|
|
| |
| d.text((180, ry + 60), "Tax @ 8.25%", font=font(20), fill=(15, 15, 15)) |
| d.text((320, ry + 60), "$751.70", font=font(20), fill=(15, 15, 15)) |
| d.text((640, ry + 30), "Sub-total", font=font(20), fill=(15, 15, 15)) |
| d.text((780, ry + 30), "$9,111.50", font=font(20), fill=(15, 15, 15)) |
| d.text((640, ry + 90), "Freight/Shipping", font=font(20), fill=(15, 15, 15)) |
| d.text((820, ry + 90), "$180.00", font=font(20), fill=(15, 15, 15)) |
| |
| d.rectangle([800, ry + 140, 1110, ry + 210], fill=(28, 40, 80)) |
| d.text((820, ry + 150), "GRAND TOTAL", font=font(20, True), fill=(255, 255, 255)) |
| d.text((820, ry + 178), "$10,043.20 USD", font=font(24, True), fill=(255, 255, 255)) |
|
|
| |
| rotated(img, "Terms: Net 30 days. Late fee 1.5%/mo. Goods remain property of seller until paid.", |
| (1150, 360), 90, font(18), fill=(90, 90, 90)) |
|
|
| |
| d.text((150, H - 120), "Notes: Partial back-order on item 3 may apply.", font=font(18), fill=(60, 60, 60)) |
| d.text((150, H - 92), "Remittance: SWIFT MERIGB2L / IBAN GB22 MERI 0099 8812", font=font(18), fill=(60, 60, 60)) |
|
|
| out_png = OUT / "complex_invoice_messy.png" |
| img.save(out_png) |
| (OUT / "complex_invoice_messy.gt.json").write_text(json.dumps(GT, indent=2)) |
| print(f"✓ wrote {out_png} ({W}x{H})") |
| print(f"✓ wrote {OUT/'complex_invoice_messy.gt.json'}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|