File size: 7,490 Bytes
ae053b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python3
"""Generate a DELIBERATELY HARD invoice image to stress the OCR backends:
  • rotated vertical "INVOICE" banner + sideways Terms block (orientation problems)
  • a diagonal translucent "ORIGINAL COPY" watermark overlapping text
  • scattered header fields (invoice #, dates, vendor, bill-to in offset boxes)
  • a misaligned line-item table (inconsistent column alignment)
  • totals scattered across corners (subtotal / tax / shipping / grand total / balance due)

This is the kind of layout where classic OCR (Tesseract) struggles but a vision
LLM (MiniCPM-V) reads it correctly. Writes:
  backend/evals/datasets/complex_invoice_messy.png
  backend/evals/datasets/complex_invoice_messy.gt.json   (skip_eval — showcase only)
No .txt sidecar on purpose — this document REQUIRES a real OCR engine.
"""
from __future__ import annotations

import json
from pathlib import Path

from PIL import Image, ImageDraw, ImageFont

ROOT = Path(__file__).resolve().parent.parent
OUT = ROOT / "backend" / "evals" / "datasets"
W, H = 1240, 1600

GT = {
    "doc_type": "invoice",
    "invoice_number": "INV-9X-44821",
    "issue_date": "2026-04-18",
    "due_date": "2026-05-18",
    "vendor_name": "Meridian Industrial Components Ltd",
    "bill_to_name": "Aperture Retail Group",
    "currency": "USD",
    "subtotal": 9111.50,
    "tax_amount": 751.70,
    "total": 10043.20,
    "line_items": [
        {"description": "Hydraulic pump HX-220", "quantity": 4, "unit_price": 1250.00, "line_total": 5000.00},
        {"description": "Seal kit (set of 12)", "quantity": 10, "unit_price": 85.50, "line_total": 855.00},
        {"description": "Pressure gauge 0-300psi", "quantity": 6, "unit_price": 142.75, "line_total": 856.50},
        {"description": "Installation labor", "quantity": 1, "unit_price": 2400.00, "line_total": 2400.00},
    ],
    "_meta": {"doc_type": "invoice", "channel": "scanned", "difficulty": "complex_layout", "skip_eval": True},
}


def font(sz, bold=False):
    for p in ([
        "/System/Library/Fonts/Supplemental/Arial Bold.ttf" if bold else "/System/Library/Fonts/Supplemental/Arial.ttf",
        "/System/Library/Fonts/Helvetica.ttc",
        "/Library/Fonts/Arial.ttf",
    ]):
        try:
            return ImageFont.truetype(p, sz)
        except Exception:
            continue
    return ImageFont.load_default()


def rotated(base, text, xy, angle, fnt, fill=(20, 20, 20)):
    tmp = Image.new("RGBA", (max(20, len(text) * fnt.size), fnt.size + 16), (0, 0, 0, 0))
    ImageDraw.Draw(tmp).text((2, 2), text, font=fnt, fill=fill)
    tmp = tmp.rotate(angle, expand=True)
    base.paste(tmp, xy, tmp)


def main():
    OUT.mkdir(parents=True, exist_ok=True)
    img = Image.new("RGB", (W, H), "white")
    d = ImageDraw.Draw(img)

    # --- diagonal translucent watermark overlapping content ---
    wm = Image.new("RGBA", (W, H), (0, 0, 0, 0))
    wd = ImageDraw.Draw(wm)
    wd.text((140, 120), "ORIGINAL COPY", font=font(120, True), fill=(150, 150, 150, 60))
    wm = wm.rotate(28, center=(W // 2, H // 2))
    img.paste(wm, (0, 0), wm)

    # --- left vertical INVOICE banner (rotated 90) ---
    d.rectangle([18, 60, 96, 760], fill=(28, 40, 80))
    rotated(img, "INVOICE", (24, 470), 90, font(54, True), fill=(255, 255, 255))

    # --- scattered header fields (offset boxes, inconsistent placement) ---
    d.text((900, 70), "Invoice No.", font=font(20, True), fill=(90, 90, 90))
    d.text((900, 98), GT["invoice_number"], font=font(26, True), fill=(10, 10, 10))
    d.text((690, 150), f"Issued: {GT['issue_date']}", font=font(20), fill=(10, 10, 10))
    d.text((980, 200), f"Due {GT['due_date']}", font=font(20), fill=(160, 30, 30))  # offset, different spot
    # Balance due repeated near top in red (inconsistent)
    d.text((620, 60), "BALANCE DUE  $10,043.20", font=font(26, True), fill=(190, 20, 20))

    # vendor block (top-left, after banner) and bill-to (offset right-middle)
    d.rectangle([130, 90, 560, 220], outline=(120, 120, 120), width=2)
    d.text((145, 100), "FROM / Remit to:", font=font(18, True), fill=(70, 70, 70))
    d.text((145, 128), GT["vendor_name"], font=font(22, True), fill=(10, 10, 10))
    d.text((145, 160), "Unit 7, Kvaerner Estate", font=font(18), fill=(40, 40, 40))
    d.text((145, 184), "VAT GB-882-114", font=font(18), fill=(40, 40, 40))

    d.rectangle([640, 300, 1080, 420], outline=(120, 120, 120), width=2)
    d.text((655, 310), "Bill To", font=font(18, True), fill=(70, 70, 70))
    d.text((655, 338), GT["bill_to_name"], font=font(22, True), fill=(10, 10, 10))
    d.text((655, 372), "Accounts Payable, Floor 3", font=font(18), fill=(40, 40, 40))
    d.text((655, 396), "PO ref: PO-77-3391", font=font(18), fill=(40, 40, 40))

    # --- misaligned line-item table ---
    ty = 470
    d.line([130, ty - 10, 1110, ty - 10], fill=(40, 40, 40), width=2)
    # headers placed inconsistently (not above their columns)
    d.text((150, ty), "Item / Description", font=font(20, True), fill=(20, 20, 20))
    d.text((720, ty), "Unit", font=font(20, True), fill=(20, 20, 20))
    d.text((640, ty), "Qty", font=font(20, True), fill=(20, 20, 20))
    d.text((980, ty), "Amount", font=font(20, True), fill=(20, 20, 20))
    ry = ty + 44
    for i, it in enumerate(GT["line_items"], 1):
        d.text((150, ry), f"{i}.  {it['description']}", font=font(20), fill=(15, 15, 15))
        # qty/unit/amount with inconsistent alignment (left vs right vs centered)
        d.text((648, ry), str(int(it["quantity"])), font=font(20), fill=(15, 15, 15))
        d.text((700, ry), f"${it['unit_price']:,.2f}", font=font(20), fill=(15, 15, 15))
        amt = f"${it['line_total']:,.2f}"
        d.text((1090 - d.textlength(amt, font=font(20)), ry), amt, font=font(20), fill=(15, 15, 15))
        ry += 46
    d.line([130, ry + 6, 1110, ry + 6], fill=(40, 40, 40), width=1)

    # --- totals scattered across corners ---
    d.text((180, ry + 60), "Tax @ 8.25%", font=font(20), fill=(15, 15, 15))   # bottom-left
    d.text((320, ry + 60), "$751.70", font=font(20), fill=(15, 15, 15))
    d.text((640, ry + 30), "Sub-total", font=font(20), fill=(15, 15, 15))      # middle
    d.text((780, ry + 30), "$9,111.50", font=font(20), fill=(15, 15, 15))
    d.text((640, ry + 90), "Freight/Shipping", font=font(20), fill=(15, 15, 15))
    d.text((820, ry + 90), "$180.00", font=font(20), fill=(15, 15, 15))
    # grand total in a bold box, bottom-right
    d.rectangle([800, ry + 140, 1110, ry + 210], fill=(28, 40, 80))
    d.text((820, ry + 150), "GRAND TOTAL", font=font(20, True), fill=(255, 255, 255))
    d.text((820, ry + 178), "$10,043.20  USD", font=font(24, True), fill=(255, 255, 255))

    # --- sideways Terms & Conditions block (rotated 90) on the right edge ---
    rotated(img, "Terms: Net 30 days. Late fee 1.5%/mo. Goods remain property of seller until paid.",
            (1150, 360), 90, font(18), fill=(90, 90, 90))

    # footer note (two-column-ish)
    d.text((150, H - 120), "Notes: Partial back-order on item 3 may apply.", font=font(18), fill=(60, 60, 60))
    d.text((150, H - 92), "Remittance: SWIFT MERIGB2L  /  IBAN GB22 MERI 0099 8812", font=font(18), fill=(60, 60, 60))

    out_png = OUT / "complex_invoice_messy.png"
    img.save(out_png)
    (OUT / "complex_invoice_messy.gt.json").write_text(json.dumps(GT, indent=2))
    print(f"✓ wrote {out_png} ({W}x{H})")
    print(f"✓ wrote {OUT/'complex_invoice_messy.gt.json'}")


if __name__ == "__main__":
    main()