document-ocr / data /samples /index.json
fm1320's picture
Initial: document-ocr demo for HF Spaces
ffe59ba
raw
history blame
1.86 kB
[
{
"id": "receipt",
"filename": "receipt.png",
"label": "Grocery receipt",
"description": "Printed receipt with line items, subtotal, tax, total. Clean text, simple table layout.",
"labels": [
"merchant",
"date",
"line_item",
"subtotal",
"tax",
"total",
"payment_method"
]
},
{
"id": "invoice",
"filename": "invoice.png",
"label": "Vendor invoice",
"description": "Multi-column invoice with billing party, line items, subtotal, tax, total. Form-style layout.",
"labels": [
"vendor",
"invoice_number",
"date",
"due_date",
"billing_party",
"line_item",
"total"
]
},
{
"id": "business-card",
"filename": "business-card.png",
"label": "Business card",
"description": "Tight layout, mixed text sizes, multiple contact fields.",
"labels": [
"company",
"person",
"role",
"email",
"phone",
"address",
"website"
]
},
{
"id": "table",
"filename": "table.png",
"label": "Quarterly table",
"description": "Dense numerical table with totals row. Tests table-structure recognition.",
"labels": [
"department",
"headcount",
"amount",
"category"
]
},
{
"id": "handwritten",
"filename": "handwritten.png",
"label": "Casual notes",
"description": "Jittered text simulating informal handwriting; non-template content.",
"labels": [
"task",
"person",
"place",
"amount"
]
},
{
"id": "multi-column",
"filename": "multi-column.png",
"label": "Newspaper page",
"description": "Two-column newspaper-style layout. Reading order matters.",
"labels": [
"headline",
"person",
"organization",
"place",
"date"
]
}
]