File size: 20,638 Bytes
082d661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
#!/usr/bin/env python3
"""Generate EXTREMELY hard OCR documents β€” embedded images + heavy degradation:

  1. extreme_receipt_photo   β€” thermal receipt PHOTOGRAPHED on a desk: perspective
     warp, uneven lighting, shadow, crinkle lines, faded thermal band, printed logo.
  2. extreme_po_collage      β€” image-heavy purchase order: product THUMBNAIL IMAGES
     in table rows, QR code, barcode, rotated APPROVED stamp over the table,
     signature scribble, misaligned columns.
  3. extreme_contract_fax    β€” dense two-column contract received BY FAX: low
     contrast, salt-and-pepper noise, skew, scanline streaks, punch-hole shadows,
     handwritten blue margin note, red RECEIVED stamp.

Each writes <id>.png + <id>.gt.json + <id>.txt (sidecar reference text, drawn from
the SAME strings as the image so CER/WER is fair). All are tagged skip_eval so the
main eval harness is unchanged; the OCR quality benchmark picks them up.
"""
from __future__ import annotations

import json
import math
import random
from pathlib import Path

import numpy as np
from PIL import Image, ImageDraw, ImageFilter, ImageFont

ROOT = Path(__file__).resolve().parent.parent
OUT = ROOT / "backend" / "evals" / "datasets"
rng = random.Random(42)


def font(sz, bold=False, mono=False):
    paths = (["/System/Library/Fonts/Supplemental/Courier New Bold.ttf",
              "/System/Library/Fonts/Supplemental/Courier New.ttf"] if mono else []) + [
        "/System/Library/Fonts/Supplemental/Arial Bold.ttf" if bold else "/System/Library/Fonts/Supplemental/Arial.ttf",
        "/System/Library/Fonts/Helvetica.ttc",
        "/Library/Fonts/Arial.ttf",
    ]
    for p in paths:
        try:
            return ImageFont.truetype(p, sz)
        except Exception:
            continue
    return ImageFont.load_default()


def _find_coeffs(dst, src):
    """Perspective coefficients so that src corners land on dst corners."""
    A, B = [], []
    for (x, y), (u, v) in zip(dst, src):
        A.append([x, y, 1, 0, 0, 0, -u * x, -u * y])
        A.append([0, 0, 0, x, y, 1, -v * x, -v * y])
        B.extend([u, v])
    res, *_ = np.linalg.lstsq(np.array(A, float), np.array(B, float), rcond=None)
    return res.tolist()


def stamp(text, color, angle, size=(360, 120), fsz=34):
    im = Image.new("RGBA", size, (0, 0, 0, 0))
    d = ImageDraw.Draw(im)
    d.rounded_rectangle([4, 4, size[0] - 4, size[1] - 4], radius=16, outline=color + (190,), width=5)
    f = font(fsz, bold=True)
    tw = d.textlength(text, font=f)
    d.text(((size[0] - tw) / 2, (size[1] - fsz) / 2 - 6), text, font=f, fill=color + (190,))
    return im.rotate(angle, expand=True, resample=Image.BICUBIC)


def signature(w=220, h=60, color=(25, 30, 120)):
    im = Image.new("RGBA", (w, h), (0, 0, 0, 0))
    d = ImageDraw.Draw(im)
    pts = []
    for i in range(60):
        t = i / 59
        x = 8 + t * (w - 16)
        y = h / 2 + math.sin(t * 14 + 1.3) * (h / 3) * (1 - 0.5 * t) + rng.uniform(-2, 2)
        pts.append((x, y))
    d.line(pts, fill=color + (230,), width=3, joint="curve")
    return im


# ── 1. extreme_receipt_photo ──────────────────────────────────────────────────
R_LINES = [
    "BREW & BEAN COFFEE Co.",
    "412 Harbor Lane, Portland OR",
    "Receipt #R-88341  Reg 02",
    "Date: 2026-06-02  14:37",
    "Currency: USD",
    "--------------------------------",
    "Flat White         2 x 4.75   9.50",
    "Butter Croissant   3 x 3.25   9.75",
    "Cold Brew Growler  1 x 14.00 14.00",
    "Loyalty discount             -2.50",
    "--------------------------------",
    "Subtotal                     30.75",
    "Tax 8.8%                      2.71",
    "TOTAL                        33.46",
    "Payment: VISA ****4421",
    "--------------------------------",
    "Thank you! brewandbean.example",
]
R_GT = {
    "doc_type": "receipt",
    "merchant": "BREW & BEAN COFFEE Co.",
    "date": "2026-06-02",
    "currency": "USD",
    "subtotal": 30.75,
    "tax_amount": 2.71,
    "total": 33.46,
    "payment_method": "VISA ****4421",
    "line_items": [
        {"description": "Flat White", "quantity": 2, "unit_price": 4.75, "line_total": 9.50},
        {"description": "Butter Croissant", "quantity": 3, "unit_price": 3.25, "line_total": 9.75},
        {"description": "Cold Brew Growler", "quantity": 1, "unit_price": 14.00, "line_total": 14.00},
    ],
    "_meta": {"doc_type": "receipt", "channel": "photo", "difficulty": "extreme", "skip_eval": True},
}


def gen_receipt():
    pw, ph = 560, 1010
    paper = Image.new("RGBA", (pw, ph), (250, 248, 242, 255))
    d = ImageDraw.Draw(paper)
    # printed logo: filled coffee-cup glyph in a ring
    cx, cy = pw // 2, 64
    d.ellipse([cx - 44, cy - 44, cx + 44, cy + 44], outline=(60, 50, 45), width=4)
    d.rounded_rectangle([cx - 20, cy - 14, cx + 14, cy + 22], radius=5, fill=(60, 50, 45))
    d.arc([cx + 8, cy - 8, cx + 30, cy + 14], 270, 90, fill=(60, 50, 45), width=4)
    fm = font(24, mono=True)
    y = 130
    for ln in R_LINES:
        w = d.textlength(ln, font=fm)
        x = (pw - w) / 2 if not ln.startswith(("Flat", "Butter", "Cold", "Loyal", "Subt", "Tax", "TOTAL", "Paym")) else 28
        d.text((x, y), ln, font=fm, fill=(40, 38, 36))
        y += 36
    d.line([(0, ph - 14), (pw, ph - 6)], fill=(250, 248, 242, 0))  # keep bottom edge clean
    # crinkle lines
    for _ in range(7):
        x0 = rng.randint(0, pw)
        d.line([(x0, 0), (x0 + rng.randint(-90, 90), ph)], fill=(208, 204, 196, 90), width=2)
    # faded thermal band (blend toward white)
    arr = np.asarray(paper).astype(np.float32)
    y0, y1 = 430, 560
    fade = arr[y0:y1, :, :3]
    arr[y0:y1, :, :3] = fade + (255 - fade) * 0.55
    paper = Image.fromarray(arr.astype(np.uint8))

    # desk background with wood grain + vignette
    W, H = 1000, 1400
    desk = Image.new("RGB", (W, H), (96, 74, 54))
    dd = ImageDraw.Draw(desk)
    for yy in range(0, H, 7):
        dd.line([(0, yy), (W, yy + rng.randint(-3, 3))],
                fill=(96 + rng.randint(-10, 8), 74 + rng.randint(-8, 6), 54 + rng.randint(-6, 6)), width=3)
    # shadow under receipt
    sh = Image.new("RGBA", (W, H), (0, 0, 0, 0))
    ImageDraw.Draw(sh).polygon([(232, 152), (798, 198), (742, 1292), (172, 1232)], fill=(0, 0, 0, 110))
    desk.paste(Image.new("RGB", (W, H), 0), (0, 0), sh.filter(ImageFilter.GaussianBlur(18)))
    # perspective-warp the receipt onto the desk
    dst = [(248, 138), (786, 186), (730, 1276), (188, 1218)]
    coeffs = _find_coeffs(dst, [(0, 0), (pw, 0), (pw, ph), (0, ph)])
    warped = paper.transform((W, H), Image.PERSPECTIVE, coeffs, Image.BICUBIC)
    desk.paste(warped, (0, 0), warped)
    # uneven lighting: bright top-left, dim bottom-right + vignette
    a = np.asarray(desk).astype(np.float32)
    yy, xx = np.mgrid[0:H, 0:W]
    light = 1.12 - 0.32 * ((xx / W) * 0.6 + (yy / H) * 0.4)
    r2 = ((xx - W / 2) / (W / 2)) ** 2 + ((yy - H / 2) / (H / 2)) ** 2
    light *= 1 - 0.18 * np.clip(r2 - 0.45, 0, 1)
    a *= light[..., None]
    a += np.random.default_rng(7).normal(0, 4.5, a.shape)
    img = Image.fromarray(np.clip(a, 0, 255).astype(np.uint8)).filter(ImageFilter.GaussianBlur(0.6))
    img.save(OUT / "extreme_receipt_photo.png")
    (OUT / "extreme_receipt_photo.txt").write_text("\n".join(R_LINES) + "\n")
    (OUT / "extreme_receipt_photo.gt.json").write_text(json.dumps(R_GT, indent=2))


# ── 2. extreme_po_collage ─────────────────────────────────────────────────────
PO_ITEMS = [
    ("SHELF UNIT S-200 heavy gauge", 24, 189.00, 4536.00),
    ("LED STRIP 2m retail white", 60, 22.40, 1344.00),
    ("ENDCAP DISPLAY birch finish", 12, 310.00, 3720.00),
]
PO_GT = {
    "doc_type": "purchase_order",
    "order_number": "PO-77RX-3309",
    "order_date": "2026-05-21",
    "delivery_date": "2026-06-15",
    "vendor_name": "Nordic Fixture Works AB",
    "buyer_name": "Aperture Retail Group",
    "ship_to": "DC-7, 4420 Logistics Pkwy, Columbus OH",
    "currency": "USD",
    "payment_terms": "Net 45",
    "subtotal": 9600.00,
    "tax_amount": 792.00,
    "total": 10392.00,
    "line_items": [{"description": d_, "quantity": q, "unit_price": u, "line_total": t}
                   for d_, q, u, t in PO_ITEMS],
    "_meta": {"doc_type": "purchase_order", "channel": "scanned", "difficulty": "extreme", "skip_eval": True},
}


def _thumb(kind):
    im = Image.new("RGB", (76, 76), (235, 238, 242))
    d = ImageDraw.Draw(im)
    if kind == 0:  # shelf unit
        for i in range(4):
            d.rectangle([10, 12 + i * 15, 66, 18 + i * 15], fill=(120, 128, 140))
        d.line([(12, 12), (12, 66)], fill=(80, 86, 96), width=3)
        d.line([(64, 12), (64, 66)], fill=(80, 86, 96), width=3)
    elif kind == 1:  # LED strip
        d.rounded_rectangle([8, 30, 68, 46], radius=8, fill=(60, 64, 70))
        for x in range(14, 66, 9):
            d.ellipse([x, 34, x + 6, 42], fill=(255, 240, 160))
    else:  # endcap display
        d.polygon([(14, 64), (26, 14), (50, 14), (62, 64)], fill=(196, 164, 120))
        d.rectangle([20, 40, 56, 46], fill=(160, 128, 88))
        d.rectangle([24, 26, 52, 32], fill=(160, 128, 88))
    d.rectangle([0, 0, 75, 75], outline=(150, 150, 150))
    return im


def _qr(d, x, y, n=21, cell=5):
    g = random.Random(9)
    for r in range(n):
        for c in range(n):
            if g.random() < 0.45:
                d.rectangle([x + c * cell, y + r * cell, x + c * cell + cell - 1, y + r * cell + cell - 1], fill=0)
    for fx, fy in [(0, 0), (n - 7, 0), (0, n - 7)]:  # finder squares
        d.rectangle([x + fx * cell, y + fy * cell, x + (fx + 7) * cell, y + (fy + 7) * cell], outline=0, width=3)
        d.rectangle([x + (fx + 2) * cell, y + (fy + 2) * cell, x + (fx + 5) * cell, y + (fy + 5) * cell], fill=0)


def gen_po():
    W, H = 1240, 1600
    im = Image.new("RGB", (W, H), (252, 252, 250))
    d = ImageDraw.Draw(im)
    h1, h2, h3, body, small = font(40, True), font(22, True), font(18, True), font(19), font(15)
    # header: drawn logo + vendor (left), meta box (right), QR top-right corner
    d.rectangle([40, 40, 120, 120], fill=(30, 90, 160))
    d.polygon([(52, 108), (80, 52), (108, 108)], fill=(252, 252, 250))
    d.text((136, 48), "Nordic Fixture Works AB", font=h2, fill=(20, 20, 30))
    d.text((136, 80), "Industrigatan 14, Malmo SE  Β·  VAT SE5566778899", font=small, fill=(90, 90, 100))
    d.text((40, 150), "PURCHASE ORDER", font=h1, fill=(30, 90, 160))
    _qr(d, 1060, 40)
    meta = [("PO Number:", "PO-77RX-3309"), ("Order Date:", "2026-05-21"),
            ("Delivery Date:", "2026-06-15"), ("Payment Terms:", "Net 45"), ("Currency:", "USD")]
    d.rounded_rectangle([720, 150, 1200, 320], radius=10, outline=(30, 90, 160), width=2)
    for i, (k, v) in enumerate(meta):
        d.text((740, 165 + i * 30), k, font=h3, fill=(90, 90, 100))
        d.text((920, 165 + i * 30), v, font=body, fill=(20, 20, 30))
    d.text((40, 230), "Buyer: Aperture Retail Group", font=body, fill=(20, 20, 30))
    d.text((40, 260), "Ship To: DC-7, 4420 Logistics Pkwy, Columbus OH", font=body, fill=(20, 20, 30))

    # table with thumbnails + deliberately misaligned columns
    d.rectangle([40, 360, 1200, 404], fill=(30, 90, 160))
    for x, t in [(56, "IMG"), (160, "DESCRIPTION"), (700, "QTY"), (840, "UNIT USD"), (1040, "AMOUNT")]:
        d.text((x, 370), t, font=h3, fill=(255, 255, 255))
    y = 420
    for i, (desc, qty, unit, tot) in enumerate(PO_ITEMS):
        off = [-14, 22, 6][i]  # column misalignment per row
        im.paste(_thumb(i), (52, y))
        d.text((160 + off, y + 24), desc, font=body, fill=(25, 25, 30))
        d.text((706 + off // 2, y + 24), str(qty), font=body, fill=(25, 25, 30))
        d.text((846 - off, y + 24), f"{unit:,.2f}", font=body, fill=(25, 25, 30))
        d.text((1042 + off, y + 24), f"{tot:,.2f}", font=body, fill=(25, 25, 30))
        d.line([(40, y + 88), (1200, y + 88)], fill=(210, 210, 215))
        y += 96
    # totals (right) + barcode (left) + signature
    d.text((840, y + 24), "Subtotal:", font=h3, fill=(90, 90, 100)); d.text((1042, y + 24), "9,600.00", font=body, fill=(20, 20, 30))
    d.text((840, y + 58), "Tax 8.25%:", font=h3, fill=(90, 90, 100)); d.text((1042, y + 58), "792.00", font=body, fill=(20, 20, 30))
    d.rectangle([820, y + 92, 1200, y + 134], fill=(240, 244, 250))
    d.text((840, y + 100), "TOTAL:", font=h2, fill=(30, 90, 160)); d.text((1042, y + 100), "10,392.00 USD", font=h2, fill=(30, 90, 160))
    bx = 40
    g = random.Random(5)
    for _ in range(60):
        wbar = g.choice((2, 2, 3, 5))
        d.rectangle([bx, y + 40, bx + wbar, y + 110], fill=0)
        bx += wbar + g.choice((2, 3))
    d.text((40, y + 116), "*PO77RX3309*", font=small, fill=(60, 60, 60))
    sig = signature()
    im.paste(sig, (760, H - 220), sig)
    d.line([(740, H - 160), (1010, H - 160)], fill=(60, 60, 60), width=2)
    d.text((740, H - 150), "Authorized β€” K. Lindqvist, Procurement", font=small, fill=(60, 60, 60))
    # green APPROVED stamp overlapping the table
    st = stamp("APPROVED Β· OPS DESK", (20, 130, 60), 12)
    im.paste(st, (430, 560), st)
    # mild scan noise + tiny skew
    a = np.asarray(im).astype(np.float32) + np.random.default_rng(3).normal(0, 5, (H, W, 3))
    im = Image.fromarray(np.clip(a, 0, 255).astype(np.uint8)).rotate(-0.7, expand=False, fillcolor=(252, 252, 250))
    im.save(OUT / "extreme_po_collage.png")
    txt = ["PURCHASE ORDER", "Nordic Fixture Works AB", "Industrigatan 14, Malmo SE",
           "PO Number: PO-77RX-3309", "Order Date: 2026-05-21", "Delivery Date: 2026-06-15",
           "Payment Terms: Net 45", "Currency: USD",
           "Buyer: Aperture Retail Group", "Ship To: DC-7, 4420 Logistics Pkwy, Columbus OH",
           "IMG DESCRIPTION QTY UNIT USD AMOUNT"] + [
           f"{desc} {q} {u:,.2f} {t:,.2f}" for desc, q, u, t in PO_ITEMS] + [
           "Subtotal: 9,600.00", "Tax 8.25%: 792.00", "TOTAL: 10,392.00 USD",
           "*PO77RX3309*", "APPROVED Β· OPS DESK", "Authorized β€” K. Lindqvist, Procurement"]
    (OUT / "extreme_po_collage.txt").write_text("\n".join(txt) + "\n")
    (OUT / "extreme_po_collage.gt.json").write_text(json.dumps(PO_GT, indent=2))


# ── 3. extreme_contract_fax ───────────────────────────────────────────────────
C_GT = {
    "doc_type": "contract",
    "contract_number": "MSA-2026-0481",
    "title": "Master Services Agreement - Store Fit-Out Program",
    "party_a": "Aperture Retail Group",
    "party_b": "Halcyon Build Partners LLC",
    "effective_date": "2026-03-01",
    "expiration_date": "2029-02-28",
    "contract_value": 1250000.00,
    "currency": "USD",
    "governing_law": "State of Ohio",
    "auto_renew": False,
    "termination_notice_days": 60,
    "_meta": {"doc_type": "contract", "channel": "fax", "difficulty": "extreme", "skip_eval": True},
}
C_HEAD = [
    "MASTER SERVICES AGREEMENT - STORE FIT-OUT PROGRAM",
    "Contract No: MSA-2026-0481",
    "Party A: Aperture Retail Group   Party B: Halcyon Build Partners LLC",
    "Effective Date: 2026-03-01   Expiration Date: 2029-02-28",
    "Total Contract Value: USD 1,250,000.00   Governing Law: State of Ohio",
    "Auto-Renewal: NO   Termination Notice: 60 days written notice",
]
C_BODY = [
    "1. SCOPE. Contractor shall furnish all labor, materials, supervision and",
    "equipment required for the fit-out of retail premises identified in each",
    "Statement of Work executed under this Agreement.",
    "2. TERM. This Agreement commences on the Effective Date and continues",
    "until the Expiration Date unless terminated earlier per Section 9.",
    "3. COMPENSATION. Client shall pay Contractor fees not to exceed the",
    "Total Contract Value, payable per approved milestone invoices Net 30.",
    "4. CHANGE ORDERS. No variation is binding unless documented in a",
    "written change order signed by both parties' authorized representatives.",
    "5. WARRANTIES. Contractor warrants workmanship free of defects for",
    "twenty-four (24) months following practical completion of each site.",
    "6. INSURANCE. Contractor shall maintain commercial general liability",
    "coverage of not less than USD 5,000,000 per occurrence.",
    "7. CONFIDENTIALITY. Each party shall protect Confidential Information",
    "with no less than reasonable care and use it solely for this Agreement.",
    "8. LIABILITY. Neither party is liable for indirect or consequential",
    "damages; aggregate liability is capped at the Total Contract Value.",
    "9. TERMINATION. Either party may terminate for convenience upon sixty",
    "(60) days written notice, or immediately for uncured material breach.",
    "10. GOVERNING LAW. This Agreement is governed by the laws of the",
    "State of Ohio, excluding its conflict of law provisions.",
]


def gen_contract():
    W, H = 1240, 1600
    im = Image.new("RGB", (W, H), (255, 255, 255))
    d = ImageDraw.Draw(im)
    fh, fb, fs = font(26, True), font(17), font(14)
    d.text((30, 18), "FAX  TX 06/12/2026 14:22  FROM HALCYON BUILD +1 614 555 0188  P.01/07", font=fs, fill=(60, 60, 60))
    d.line([(30, 44), (1210, 44)], fill=(60, 60, 60), width=2)
    tw = d.textlength(C_HEAD[0], font=fh)
    d.text(((W - tw) / 2, 70), C_HEAD[0], font=fh, fill=(15, 15, 15))
    y = 130
    for ln in C_HEAD[1:]:
        d.text((80, y), ln, font=fb, fill=(20, 20, 20))
        y += 30
    d.line([(60, y + 8), (1180, y + 8)], fill=(120, 120, 120), width=2)
    # dense two-column body
    half = (len(C_BODY) + 1) // 2
    for col, lines in enumerate((C_BODY[:half], C_BODY[half:])):
        x = 70 + col * 590
        yy = y + 34
        for ln in lines:
            d.text((x, yy), ln, font=fs, fill=(25, 25, 25))
            yy += 24
        for extra in range(14):  # filler legalese to densify
            d.text((x, yy), f"{'WHEREAS the parties acknowledge the recitals set forth herein;'[: 58 - (extra % 3) * 4]}",
                   font=fs, fill=(45, 45, 45))
            yy += 24
    # signature block
    sy = H - 300
    for col, (name, role) in enumerate([("M. Okafor β€” Aperture Retail Group", "Chief Procurement Officer"),
                                        ("D. Reyes β€” Halcyon Build Partners LLC", "Managing Partner")]):
        x = 90 + col * 600
        sig = signature(color=(20, 20, 20))
        im.paste(sig, (x, sy), sig)
        d.line([(x, sy + 70), (x + 420, sy + 70)], fill=(40, 40, 40), width=2)
        d.text((x, sy + 80), name, font=fs, fill=(30, 30, 30))
        d.text((x, sy + 102), role, font=fs, fill=(90, 90, 90))
    # handwritten blue margin note + red stamp
    note = Image.new("RGBA", (430, 60), (0, 0, 0, 0))
    ImageDraw.Draw(note).text((0, 8), "legal OK -> route to CFO  (June 5)", font=font(24), fill=(28, 40, 160, 220))
    note = note.rotate(-3, expand=True, resample=Image.BICUBIC)
    im.paste(note, (700, 360), note)
    st = stamp("RECEIVED JUN 05 2026", (180, 30, 30), -14, size=(420, 110), fsz=30)
    im.paste(st, (90, 430), st)
    # fax degradation: low contrast, salt & pepper, scanline streaks, skew, punch holes
    g = im.convert("L")
    a = np.asarray(g).astype(np.float32)
    a = 255 - (255 - a) * 0.62                      # washed-out toner
    nz = np.random.default_rng(11)
    a += nz.normal(0, 9, a.shape)
    pepper = nz.random(a.shape)
    a[pepper < 0.004] = 30                          # pepper
    a[pepper > 0.997] = 245                         # salt
    for yy in range(0, H, 90):                      # scanline streaks
        a[yy:yy + 2, :] = np.clip(a[yy:yy + 2, :] * 1.25, 0, 255)
    img = Image.fromarray(np.clip(a, 0, 255).astype(np.uint8)).rotate(1.3, expand=False, fillcolor=235)
    d2 = ImageDraw.Draw(img)
    for hy in (H // 4, 3 * H // 4):                 # punch-hole shadows
        d2.ellipse([18, hy - 22, 62, hy + 22], fill=246, outline=140, width=3)
    img.convert("RGB").save(OUT / "extreme_contract_fax.png")
    (OUT / "extreme_contract_fax.txt").write_text("\n".join(C_HEAD + C_BODY) + "\n")
    (OUT / "extreme_contract_fax.gt.json").write_text(json.dumps(C_GT, indent=2))


if __name__ == "__main__":
    OUT.mkdir(parents=True, exist_ok=True)
    gen_receipt()
    gen_po()
    gen_contract()
    for sid in ("extreme_receipt_photo", "extreme_po_collage", "extreme_contract_fax"):
        print(f"  wrote {OUT / sid}.png (+ .gt.json + .txt)")