Spaces:

MikeTrizna
/

doctr_demo_fork

Running

App Files Files Community

doctr_demo_fork / src /python-doctr /doctr /datasets /doc_artefacts.py

MikeTrizna

Upload folder using huggingface_hub

f3270e6 verified 5 months ago

raw

history blame contribute delete

3.23 kB

	# Copyright (C) 2021-2025, Mindee.

	# This program is licensed under the Apache License 2.0.
	# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

	import json
	import os
	from typing import Any

	import numpy as np

	from .datasets import VisionDataset

	__all__ = ["DocArtefacts"]


	class DocArtefacts(VisionDataset):
	"""Object detection dataset for non-textual elements in documents.
	The dataset includes a variety of synthetic document pages with non-textual elements.

	.. image:: https://doctr-static.mindee.com/models?id=v0.5.0/artefacts-grid.png&src=0
	:align: center

	>>> from doctr.datasets import DocArtefacts
	>>> train_set = DocArtefacts(train=True, download=True)
	>>> img, target = train_set[0]

	Args:
	train: whether the subset should be the training one
	use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
	**kwargs: keyword arguments from `VisionDataset`.
	"""

	URL = "https://doctr-static.mindee.com/models?id=v0.4.0/artefact_detection-13fab8ce.zip&src=0"
	SHA256 = "13fab8ced7f84583d9dccd0c634f046c3417e62a11fe1dea6efbbaba5052471b"
	CLASSES = ["background", "qr_code", "bar_code", "logo", "photo"]

	def __init__(
	self,
	train: bool = True,
	use_polygons: bool = False,
	**kwargs: Any,
	) -> None:
	super().__init__(self.URL, None, self.SHA256, True, **kwargs)
	self.train = train

	# Update root
	self.root = os.path.join(self.root, "train" if train else "val")
	# List images
	tmp_root = os.path.join(self.root, "images")
	with open(os.path.join(self.root, "labels.json"), "rb") as f:
	labels = json.load(f)
	self.data: list[tuple[str, dict[str, Any]]] = []
	img_list = os.listdir(tmp_root)
	if len(labels) != len(img_list):
	raise AssertionError("the number of images and labels do not match")
	np_dtype = np.float32
	for img_name, label in labels.items():
	# File existence check
	if not os.path.exists(os.path.join(tmp_root, img_name)):
	raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_name)}")

	# xmin, ymin, xmax, ymax
	boxes: np.ndarray = np.asarray([obj["geometry"] for obj in label], dtype=np_dtype)
	classes: np.ndarray = np.asarray([self.CLASSES.index(obj["label"]) for obj in label], dtype=np.int64)
	if use_polygons:
	# (x, y) coordinates of top left, top right, bottom right, bottom left corners
	boxes = np.stack(
	[
	np.stack([boxes[:, 0], boxes[:, 1]], axis=-1),
	np.stack([boxes[:, 2], boxes[:, 1]], axis=-1),
	np.stack([boxes[:, 2], boxes[:, 3]], axis=-1),
	np.stack([boxes[:, 0], boxes[:, 3]], axis=-1),
	],
	axis=1,
	)
	self.data.append((img_name, dict(boxes=boxes, labels=classes)))
	self.root = tmp_root

	def extra_repr(self) -> str:
	return f"train={self.train}"