Spaces:
Running
Running
| """Dataset adapters mapping public benchmark labels onto the ``Document`` schema. | |
| Each adapter yields :class:`~eval.datasets.base.GoldExample` records: an id, the | |
| input (a PIL image or a file path), and a gold dict keyed by ``Document`` field | |
| names. Only the fields a dataset actually labels appear in ``labeled_fields``; | |
| the scorer restricts every metric to that set (an unlabeled field is neither a | |
| false positive nor a miss -- there is simply no ground truth for it). | |
| SROIE is wired end-to-end first (T10). CORD and the invoice-JSON set are | |
| scaffolded as adapters with their intended field mappings documented, but are | |
| intentionally not wired yet -- calling ``load`` on them raises. | |
| """ | |
| from __future__ import annotations | |
| from eval.datasets.base import DatasetAdapter, GoldExample | |
| from eval.datasets.cord import CordAdapter | |
| from eval.datasets.invoice_json import InvoiceJsonAdapter | |
| from eval.datasets.sroie import SroieAdapter | |
| # Registry of all known adapters, keyed by stable name. | |
| ADAPTERS: dict[str, type[DatasetAdapter]] = { | |
| SroieAdapter.name: SroieAdapter, | |
| CordAdapter.name: CordAdapter, | |
| InvoiceJsonAdapter.name: InvoiceJsonAdapter, | |
| } | |
| # Adapters proven end-to-end and safe to run the predict phase against. The | |
| # others are scaffolds; ``get_adapter`` still returns them (so their metadata is | |
| # inspectable) but ``eval.predict`` refuses to run an unwired dataset. | |
| WIRED_DATASETS: frozenset[str] = frozenset({SroieAdapter.name}) | |
| def get_adapter(name: str) -> DatasetAdapter: | |
| """Instantiate a dataset adapter by name. | |
| Args: | |
| name: The adapter's stable name (e.g. "sroie"). | |
| Returns: | |
| A new adapter instance. | |
| Raises: | |
| KeyError: If no adapter is registered under ``name``. | |
| """ | |
| if name not in ADAPTERS: | |
| available = ", ".join(sorted(ADAPTERS)) | |
| raise KeyError(f"Unknown dataset {name!r}; available: {available}") | |
| return ADAPTERS[name]() | |
| __all__ = [ | |
| "ADAPTERS", | |
| "WIRED_DATASETS", | |
| "DatasetAdapter", | |
| "GoldExample", | |
| "SroieAdapter", | |
| "CordAdapter", | |
| "InvoiceJsonAdapter", | |
| "get_adapter", | |
| ] | |