Spaces:
Running
Running
File size: 17,410 Bytes
be54038 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 | """
Generate a synthetic UK motor insurance PDF pack for demos and tests.
The PDFs are intentionally fictional: invented insurer, logo, names, address,
policy number, vehicle registration, and risk details. They are text-based PDFs
so Docling can parse them without OCR.
Run from the repository root:
python scripts/generate_synthetic_policy_pack.py
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Iterable
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.units import mm
from reportlab.platypus import (
Paragraph,
SimpleDocTemplate,
Spacer,
Table,
TableStyle,
)
OUT_DIR = Path("sample_data/policytrace_demo_pack")
BRAND_DARK = colors.HexColor("#1F2937")
BRAND_BLUE = colors.HexColor("#2563EB")
BRAND_TEAL = colors.HexColor("#008080")
BRAND_PINK = colors.HexColor("#FCE7F3")
BRAND_LIGHT = colors.HexColor("#F8FAFC")
POLICY = {
"insurer": "Northbridge Mutual Motor Insurance Ltd",
"product_name": "PolicyTrace Comprehensive Plus",
"policy_number": "NBM-DEMO-0427",
"issue_date": "18/03/2026",
"start_date": "15/04/2026 at 00:00 hours",
"expiry_date": "14/04/2027 at 23:59 hours",
"policyholder": "Alex Morgan",
"address": "14 Demo Crescent, Sampleton, West Yorkshire, ZZ1 1ZZ",
"dob": "14/03/1991",
"occupation": "Product Manager",
"second_driver": "Priya Shah",
"second_driver_dob": "07/08/1995",
"second_driver_occupation": "Business Analyst",
"third_driver": "Jordan Reed",
"third_driver_dob": "11/10/1985",
"third_driver_occupation": "Data Administrator",
"vrm": "ZX24 DEM",
"make": "Skoda",
"model": "Enyaq iV 60 62kWh 177.0 bhp",
"fuel_type": "Electric",
"transmission": "Automatic",
"estimated_value": "Market Value",
"annual_mileage": "7,000 miles",
"overnight_postcode": "ZZ1 1ZZ",
"kept_location": "Drive",
"security_device": "Yes",
"tracker_fitted": "No",
"modifications": "No",
"cover_type": "Comprehensive",
"class_of_use": (
"Use for social, domestic and pleasure purposes including commuting "
"to and from a permanent place of work."
),
"driving_other_cars": "No",
"ncb_years": "2 years",
"ncb_protected": "No",
"standard_compulsory": "GBP 395.00",
"voluntary": "GBP 200.00",
"total_accidental_damage": "GBP 595.00",
"fire": "GBP 395.00",
"theft": "GBP 445.00",
"windscreen_repair": "GBP 15.00",
"windscreen_replacement": "GBP 200.00",
"own_repairer": "GBP 200.00",
"total_premium": "GBP 703.28",
"legal": "GBP 25.40",
"breakdown": "GBP 28.07",
"personal_accident": "GBP 20.00",
"hire_car": "Not selected",
"key_cover": "Not selected",
}
def _styles() -> dict[str, ParagraphStyle]:
base = getSampleStyleSheet()
return {
"title": ParagraphStyle(
"title",
parent=base["Title"],
fontName="Helvetica-Bold",
fontSize=22,
textColor=BRAND_DARK,
spaceAfter=14,
),
"subtitle": ParagraphStyle(
"subtitle",
parent=base["Normal"],
fontName="Helvetica",
fontSize=10,
leading=14,
textColor=colors.HexColor("#475569"),
spaceAfter=10,
),
"h2": ParagraphStyle(
"h2",
parent=base["Heading2"],
fontName="Helvetica-Bold",
fontSize=13,
textColor=BRAND_TEAL,
spaceBefore=12,
spaceAfter=7,
),
"body": ParagraphStyle(
"body",
parent=base["BodyText"],
fontName="Helvetica",
fontSize=9,
leading=12,
textColor=BRAND_DARK,
spaceAfter=6,
),
"small": ParagraphStyle(
"small",
parent=base["BodyText"],
fontName="Helvetica",
fontSize=7,
leading=9,
textColor=colors.HexColor("#64748B"),
),
}
def _draw_header(canvas, doc, title: str) -> None:
canvas.saveState()
width, height = A4
canvas.setFillColor(BRAND_DARK)
canvas.roundRect(16 * mm, height - 24 * mm, 42 * mm, 11 * mm, 2 * mm, fill=1, stroke=0)
canvas.setFillColor(BRAND_TEAL)
canvas.circle(22 * mm, height - 18.5 * mm, 2.6 * mm, fill=1, stroke=0)
canvas.setFillColor(BRAND_BLUE)
canvas.circle(29 * mm, height - 18.5 * mm, 2.6 * mm, fill=1, stroke=0)
canvas.setFillColor(colors.white)
canvas.setFont("Helvetica-Bold", 6)
canvas.drawString(36 * mm, height - 19.5 * mm, "NORTHBRIDGE")
canvas.setFillColor(colors.HexColor("#64748B"))
canvas.setFont("Helvetica", 7)
canvas.drawRightString(width - 16 * mm, height - 18 * mm, title)
canvas.setStrokeColor(colors.HexColor("#E2E8F0"))
canvas.line(16 * mm, height - 28 * mm, width - 16 * mm, height - 28 * mm)
canvas.setFont("Helvetica", 6)
canvas.setFillColor(colors.HexColor("#94A3B8"))
canvas.drawString(
16 * mm,
11 * mm,
"Synthetic demo document generated for AI Tool Stack PolicyTrace. No real customer or insurer data.",
)
canvas.drawRightString(width - 16 * mm, 11 * mm, f"Page {doc.page}")
canvas.restoreState()
def _table(rows: Iterable[Iterable[str]], col_widths: list[float] | None = None) -> Table:
data = [[Paragraph(str(cell), _styles()["body"]) for cell in row] for row in rows]
table = Table(data, colWidths=col_widths, hAlign="LEFT")
table.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, 0), BRAND_LIGHT),
("TEXTCOLOR", (0, 0), (-1, 0), BRAND_DARK),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("GRID", (0, 0), (-1, -1), 0.35, colors.HexColor("#CBD5E1")),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, BRAND_PINK]),
("LEFTPADDING", (0, 0), (-1, -1), 6),
("RIGHTPADDING", (0, 0), (-1, -1), 6),
("TOPPADDING", (0, 0), (-1, -1), 5),
("BOTTOMPADDING", (0, 0), (-1, -1), 5),
]
)
)
return table
def _doc(path: Path, title: str):
return SimpleDocTemplate(
str(path),
pagesize=A4,
leftMargin=18 * mm,
rightMargin=18 * mm,
topMargin=32 * mm,
bottomMargin=18 * mm,
title=title,
author="AI Tool Stack",
)
def build_schedule() -> None:
s = _styles()
path = OUT_DIR / "Schedule of Insurance - Demo.pdf"
story = [
Paragraph("Car insurance schedule", s["title"]),
Paragraph(
"This schedule is a synthetic text-based PDF for the PolicyTrace demo. "
"Please check all details carefully and contact Northbridge Mutual if anything is incorrect.",
s["subtitle"],
),
_table(
[
["Policy number", POLICY["policy_number"], "Date of issue", POLICY["issue_date"]],
["Insurer", POLICY["insurer"], "Product", POLICY["product_name"]],
["Period of cover", f"{POLICY['start_date']} - {POLICY['expiry_date']}", "Cover type", POLICY["cover_type"]],
],
[33 * mm, 52 * mm, 33 * mm, 52 * mm],
),
Paragraph("Policyholder details", s["h2"]),
_table(
[
["Name", POLICY["policyholder"]],
["Address", POLICY["address"]],
["Date of birth", POLICY["dob"]],
["Occupation", POLICY["occupation"]],
["Children under 16", "Yes"],
["Home ownership status", "Not a Homeowner"],
["Number of cars in household", "1"],
["Access to other vehicles", "No access to any other vehicles"],
],
[55 * mm, 115 * mm],
),
Paragraph("Vehicle details", s["h2"]),
_table(
[
["Registration number", POLICY["vrm"], "Make", POLICY["make"]],
["Model", POLICY["model"], "Fuel type", POLICY["fuel_type"]],
["Transmission", POLICY["transmission"], "Estimated value", POLICY["estimated_value"]],
["Annual mileage", POLICY["annual_mileage"], "Overnight postcode", POLICY["overnight_postcode"]],
["Kept location", POLICY["kept_location"], "Security device fitted", POLICY["security_device"]],
["Tracker fitted", POLICY["tracker_fitted"], "Modifications", POLICY["modifications"]],
],
[38 * mm, 48 * mm, 38 * mm, 48 * mm],
),
Paragraph("Cover and no claims discount", s["h2"]),
_table(
[
["Class of use", POLICY["class_of_use"]],
["Driving other cars", POLICY["driving_other_cars"]],
["No claims discount", POLICY["ncb_years"]],
["Protected no claims discount", POLICY["ncb_protected"]],
],
[55 * mm, 115 * mm],
),
Paragraph("Excess breakdown", s["h2"]),
_table(
[
["Excess type", "Amount"],
["Standard compulsory excess", POLICY["standard_compulsory"]],
["Voluntary excess", POLICY["voluntary"]],
["Total accidental damage excess", POLICY["total_accidental_damage"]],
["Fire excess", POLICY["fire"]],
["Theft excess", POLICY["theft"]],
["Windscreen repair excess", POLICY["windscreen_repair"]],
["Windscreen replacement excess", POLICY["windscreen_replacement"]],
["Own repairer additional excess", POLICY["own_repairer"]],
],
[90 * mm, 50 * mm],
),
Paragraph("Driver details", s["h2"]),
_table(
[
["Driver name", "Date of birth", "Relationship", "Occupation", "Licence type", "Main driver", "Specific excess"],
[POLICY["policyholder"], POLICY["dob"], "Policyholder", POLICY["occupation"], "Full Licence UK / 2/1 / No", "Yes", ""],
[POLICY["second_driver"], POLICY["second_driver_dob"], "Named Driver", POLICY["second_driver_occupation"], "UK Provisional / 1/4 / No", "No", "GBP 200.00"],
[POLICY["third_driver"], POLICY["third_driver_dob"], "Named Driver", POLICY["third_driver_occupation"], "Full Licence UK / 5/0 / No", "No", ""],
],
[30 * mm, 24 * mm, 24 * mm, 31 * mm, 31 * mm, 18 * mm, 22 * mm],
),
Paragraph("Financial summary", s["h2"]),
_table(
[
["Item", "Premium"],
["Total annual premium", POLICY["total_premium"]],
["Motor legal protection", POLICY["legal"]],
["Breakdown roadside assistance", POLICY["breakdown"]],
["Enhanced personal accident", POLICY["personal_accident"]],
["Hire car", POLICY["hire_car"]],
["Key cover", POLICY["key_cover"]],
],
[90 * mm, 50 * mm],
),
]
_doc(path, "Schedule of Insurance - Demo").build(
story,
onFirstPage=lambda c, d: _draw_header(c, d, "Schedule of Insurance"),
onLaterPages=lambda c, d: _draw_header(c, d, "Schedule of Insurance"),
)
def build_certificate() -> None:
s = _styles()
path = OUT_DIR / "Certificate of Motor Insurance - Demo.pdf"
story = [
Paragraph("Certificate of Motor Insurance", s["title"]),
Paragraph(
"This is to certify that a policy of insurance has been issued for the purposes of the Road Traffic Act.",
s["subtitle"],
),
_table(
[
["Policy number", POLICY["policy_number"]],
["Insurer", POLICY["insurer"]],
["Effective from", POLICY["start_date"]],
["Expires", POLICY["expiry_date"]],
["Registration number", POLICY["vrm"]],
],
[55 * mm, 115 * mm],
),
Paragraph("Persons entitled to drive", s["h2"]),
_table(
[
["Name", "Entitlement"],
[POLICY["policyholder"], "The policyholder may drive the insured vehicle."],
[POLICY["second_driver"], "Named driver may drive the insured vehicle."],
[POLICY["third_driver"], "Named driver may drive the insured vehicle."],
],
[55 * mm, 115 * mm],
),
Paragraph("Limitations as to use", s["h2"]),
Paragraph(POLICY["class_of_use"], s["body"]),
Paragraph("The policy does not provide cover for driving other cars.", s["body"]),
Spacer(1, 8),
Paragraph(
"This certificate is fictional and is provided only as a safe demonstration fixture for the PolicyTrace project.",
s["small"],
),
]
_doc(path, "Certificate of Motor Insurance - Demo").build(
story,
onFirstPage=lambda c, d: _draw_header(c, d, "Certificate of Motor Insurance"),
onLaterPages=lambda c, d: _draw_header(c, d, "Certificate of Motor Insurance"),
)
def build_statement_of_fact() -> None:
s = _styles()
path = OUT_DIR / "Statement of Fact - Demo.pdf"
story = [
Paragraph("Statement of Fact", s["title"]),
Paragraph(
"These fictional facts were used to calculate the demo insurance premium.",
s["subtitle"],
),
_table(
[
["Policy number", POLICY["policy_number"]],
["Main driver", POLICY["policyholder"]],
["Annual mileage", POLICY["annual_mileage"]],
["Vehicle kept overnight", POLICY["kept_location"]],
["Overnight postcode", POLICY["overnight_postcode"]],
["Security device fitted", POLICY["security_device"]],
["Tracker fitted", POLICY["tracker_fitted"]],
["Modifications", POLICY["modifications"]],
["Non-motoring convictions", "No"],
["Endorsements", "None"],
["Claims in last five years", "None"],
],
[58 * mm, 112 * mm],
),
]
_doc(path, "Statement of Fact - Demo").build(
story,
onFirstPage=lambda c, d: _draw_header(c, d, "Statement of Fact"),
onLaterPages=lambda c, d: _draw_header(c, d, "Statement of Fact"),
)
def build_policy_booklet() -> None:
s = _styles()
path = OUT_DIR / "Policy Booklet - Demo.pdf"
story = [
Paragraph("Motor Insurance Policy Booklet", s["title"]),
Paragraph(
"This booklet describes generic terms for a fictional motor insurance product. "
"It intentionally contains little policyholder-specific data.",
s["subtitle"],
),
Paragraph("What is covered", s["h2"]),
Paragraph(
"Comprehensive cover may include damage to your vehicle, fire, theft, windscreen cover, "
"and third-party liability, subject to the terms and exclusions in this booklet.",
s["body"],
),
Paragraph("Claims", s["h2"]),
Paragraph(
"You must tell Northbridge Mutual Motor Insurance Ltd about any accident or loss as soon as possible. "
"We may ask for evidence, photographs, repair estimates, or further information.",
s["body"],
),
Paragraph("General exclusions", s["h2"]),
Paragraph(
"No cover is provided where the vehicle is used outside the permitted class of use, "
"where the driver is not entitled to drive, or where policy information is materially incorrect.",
s["body"],
),
Paragraph("Complaints", s["h2"]),
Paragraph(
"If you are unhappy with our service, contact the fictional complaints team at Northbridge Mutual.",
s["body"],
),
]
_doc(path, "Policy Booklet - Demo").build(
story,
onFirstPage=lambda c, d: _draw_header(c, d, "Policy Booklet"),
onLaterPages=lambda c, d: _draw_header(c, d, "Policy Booklet"),
)
def write_manifest() -> None:
manifest = {
"purpose": "Synthetic demo data for AI Tool Stack PolicyTrace.",
"warning": "No real customer, insurer, vehicle, or policy data is included.",
"files": [
"Schedule of Insurance - Demo.pdf",
"Certificate of Motor Insurance - Demo.pdf",
"Statement of Fact - Demo.pdf",
"Policy Booklet - Demo.pdf",
],
"expected_policy_number": POLICY["policy_number"],
"expected_vrm": POLICY["vrm"],
"expected_insurer": POLICY["insurer"],
}
(OUT_DIR / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
def main() -> None:
OUT_DIR.mkdir(parents=True, exist_ok=True)
build_schedule()
build_certificate()
build_statement_of_fact()
build_policy_booklet()
write_manifest()
print(f"Synthetic demo pack written to {OUT_DIR.resolve()}")
if __name__ == "__main__":
main()
|