Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Generate a DELIBERATELY HARD invoice image to stress the OCR backends: | |
| • rotated vertical "INVOICE" banner + sideways Terms block (orientation problems) | |
| • a diagonal translucent "ORIGINAL COPY" watermark overlapping text | |
| • scattered header fields (invoice #, dates, vendor, bill-to in offset boxes) | |
| • a misaligned line-item table (inconsistent column alignment) | |
| • totals scattered across corners (subtotal / tax / shipping / grand total / balance due) | |
| This is the kind of layout where classic OCR (Tesseract) struggles but a vision | |
| LLM (MiniCPM-V) reads it correctly. Writes: | |
| backend/evals/datasets/complex_invoice_messy.png | |
| backend/evals/datasets/complex_invoice_messy.gt.json (skip_eval — showcase only) | |
| No .txt sidecar on purpose — this document REQUIRES a real OCR engine. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from PIL import Image, ImageDraw, ImageFont | |
| ROOT = Path(__file__).resolve().parent.parent | |
| OUT = ROOT / "backend" / "evals" / "datasets" | |
| W, H = 1240, 1600 | |
| GT = { | |
| "doc_type": "invoice", | |
| "invoice_number": "INV-9X-44821", | |
| "issue_date": "2026-04-18", | |
| "due_date": "2026-05-18", | |
| "vendor_name": "Meridian Industrial Components Ltd", | |
| "bill_to_name": "Aperture Retail Group", | |
| "currency": "USD", | |
| "subtotal": 9111.50, | |
| "tax_amount": 751.70, | |
| "total": 10043.20, | |
| "line_items": [ | |
| {"description": "Hydraulic pump HX-220", "quantity": 4, "unit_price": 1250.00, "line_total": 5000.00}, | |
| {"description": "Seal kit (set of 12)", "quantity": 10, "unit_price": 85.50, "line_total": 855.00}, | |
| {"description": "Pressure gauge 0-300psi", "quantity": 6, "unit_price": 142.75, "line_total": 856.50}, | |
| {"description": "Installation labor", "quantity": 1, "unit_price": 2400.00, "line_total": 2400.00}, | |
| ], | |
| "_meta": {"doc_type": "invoice", "channel": "scanned", "difficulty": "complex_layout", "skip_eval": True}, | |
| } | |
| def font(sz, bold=False): | |
| for p in ([ | |
| "/System/Library/Fonts/Supplemental/Arial Bold.ttf" if bold else "/System/Library/Fonts/Supplemental/Arial.ttf", | |
| "/System/Library/Fonts/Helvetica.ttc", | |
| "/Library/Fonts/Arial.ttf", | |
| ]): | |
| try: | |
| return ImageFont.truetype(p, sz) | |
| except Exception: | |
| continue | |
| return ImageFont.load_default() | |
| def rotated(base, text, xy, angle, fnt, fill=(20, 20, 20)): | |
| tmp = Image.new("RGBA", (max(20, len(text) * fnt.size), fnt.size + 16), (0, 0, 0, 0)) | |
| ImageDraw.Draw(tmp).text((2, 2), text, font=fnt, fill=fill) | |
| tmp = tmp.rotate(angle, expand=True) | |
| base.paste(tmp, xy, tmp) | |
| def main(): | |
| OUT.mkdir(parents=True, exist_ok=True) | |
| img = Image.new("RGB", (W, H), "white") | |
| d = ImageDraw.Draw(img) | |
| # --- diagonal translucent watermark overlapping content --- | |
| wm = Image.new("RGBA", (W, H), (0, 0, 0, 0)) | |
| wd = ImageDraw.Draw(wm) | |
| wd.text((140, 120), "ORIGINAL COPY", font=font(120, True), fill=(150, 150, 150, 60)) | |
| wm = wm.rotate(28, center=(W // 2, H // 2)) | |
| img.paste(wm, (0, 0), wm) | |
| # --- left vertical INVOICE banner (rotated 90) --- | |
| d.rectangle([18, 60, 96, 760], fill=(28, 40, 80)) | |
| rotated(img, "INVOICE", (24, 470), 90, font(54, True), fill=(255, 255, 255)) | |
| # --- scattered header fields (offset boxes, inconsistent placement) --- | |
| d.text((900, 70), "Invoice No.", font=font(20, True), fill=(90, 90, 90)) | |
| d.text((900, 98), GT["invoice_number"], font=font(26, True), fill=(10, 10, 10)) | |
| d.text((690, 150), f"Issued: {GT['issue_date']}", font=font(20), fill=(10, 10, 10)) | |
| d.text((980, 200), f"Due {GT['due_date']}", font=font(20), fill=(160, 30, 30)) # offset, different spot | |
| # Balance due repeated near top in red (inconsistent) | |
| d.text((620, 60), "BALANCE DUE $10,043.20", font=font(26, True), fill=(190, 20, 20)) | |
| # vendor block (top-left, after banner) and bill-to (offset right-middle) | |
| d.rectangle([130, 90, 560, 220], outline=(120, 120, 120), width=2) | |
| d.text((145, 100), "FROM / Remit to:", font=font(18, True), fill=(70, 70, 70)) | |
| d.text((145, 128), GT["vendor_name"], font=font(22, True), fill=(10, 10, 10)) | |
| d.text((145, 160), "Unit 7, Kvaerner Estate", font=font(18), fill=(40, 40, 40)) | |
| d.text((145, 184), "VAT GB-882-114", font=font(18), fill=(40, 40, 40)) | |
| d.rectangle([640, 300, 1080, 420], outline=(120, 120, 120), width=2) | |
| d.text((655, 310), "Bill To", font=font(18, True), fill=(70, 70, 70)) | |
| d.text((655, 338), GT["bill_to_name"], font=font(22, True), fill=(10, 10, 10)) | |
| d.text((655, 372), "Accounts Payable, Floor 3", font=font(18), fill=(40, 40, 40)) | |
| d.text((655, 396), "PO ref: PO-77-3391", font=font(18), fill=(40, 40, 40)) | |
| # --- misaligned line-item table --- | |
| ty = 470 | |
| d.line([130, ty - 10, 1110, ty - 10], fill=(40, 40, 40), width=2) | |
| # headers placed inconsistently (not above their columns) | |
| d.text((150, ty), "Item / Description", font=font(20, True), fill=(20, 20, 20)) | |
| d.text((720, ty), "Unit", font=font(20, True), fill=(20, 20, 20)) | |
| d.text((640, ty), "Qty", font=font(20, True), fill=(20, 20, 20)) | |
| d.text((980, ty), "Amount", font=font(20, True), fill=(20, 20, 20)) | |
| ry = ty + 44 | |
| for i, it in enumerate(GT["line_items"], 1): | |
| d.text((150, ry), f"{i}. {it['description']}", font=font(20), fill=(15, 15, 15)) | |
| # qty/unit/amount with inconsistent alignment (left vs right vs centered) | |
| d.text((648, ry), str(int(it["quantity"])), font=font(20), fill=(15, 15, 15)) | |
| d.text((700, ry), f"${it['unit_price']:,.2f}", font=font(20), fill=(15, 15, 15)) | |
| amt = f"${it['line_total']:,.2f}" | |
| d.text((1090 - d.textlength(amt, font=font(20)), ry), amt, font=font(20), fill=(15, 15, 15)) | |
| ry += 46 | |
| d.line([130, ry + 6, 1110, ry + 6], fill=(40, 40, 40), width=1) | |
| # --- totals scattered across corners --- | |
| d.text((180, ry + 60), "Tax @ 8.25%", font=font(20), fill=(15, 15, 15)) # bottom-left | |
| d.text((320, ry + 60), "$751.70", font=font(20), fill=(15, 15, 15)) | |
| d.text((640, ry + 30), "Sub-total", font=font(20), fill=(15, 15, 15)) # middle | |
| d.text((780, ry + 30), "$9,111.50", font=font(20), fill=(15, 15, 15)) | |
| d.text((640, ry + 90), "Freight/Shipping", font=font(20), fill=(15, 15, 15)) | |
| d.text((820, ry + 90), "$180.00", font=font(20), fill=(15, 15, 15)) | |
| # grand total in a bold box, bottom-right | |
| d.rectangle([800, ry + 140, 1110, ry + 210], fill=(28, 40, 80)) | |
| d.text((820, ry + 150), "GRAND TOTAL", font=font(20, True), fill=(255, 255, 255)) | |
| d.text((820, ry + 178), "$10,043.20 USD", font=font(24, True), fill=(255, 255, 255)) | |
| # --- sideways Terms & Conditions block (rotated 90) on the right edge --- | |
| rotated(img, "Terms: Net 30 days. Late fee 1.5%/mo. Goods remain property of seller until paid.", | |
| (1150, 360), 90, font(18), fill=(90, 90, 90)) | |
| # footer note (two-column-ish) | |
| d.text((150, H - 120), "Notes: Partial back-order on item 3 may apply.", font=font(18), fill=(60, 60, 60)) | |
| d.text((150, H - 92), "Remittance: SWIFT MERIGB2L / IBAN GB22 MERI 0099 8812", font=font(18), fill=(60, 60, 60)) | |
| out_png = OUT / "complex_invoice_messy.png" | |
| img.save(out_png) | |
| (OUT / "complex_invoice_messy.gt.json").write_text(json.dumps(GT, indent=2)) | |
| print(f"✓ wrote {out_png} ({W}x{H})") | |
| print(f"✓ wrote {OUT/'complex_invoice_messy.gt.json'}") | |
| if __name__ == "__main__": | |
| main() | |