| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import unittest |
|
|
| import datasets |
| from huggingface_hub import ObjectDetectionOutputElement |
|
|
| from transformers import ( |
| MODEL_FOR_OBJECT_DETECTION_MAPPING, |
| AutoFeatureExtractor, |
| AutoModelForObjectDetection, |
| ObjectDetectionPipeline, |
| is_vision_available, |
| pipeline, |
| ) |
| from transformers.testing_utils import ( |
| compare_pipeline_output_to_hub_spec, |
| is_pipeline_test, |
| nested_simplify, |
| require_pytesseract, |
| require_timm, |
| require_torch, |
| require_vision, |
| slow, |
| ) |
|
|
| from .test_pipelines_common import ANY |
|
|
|
|
| if is_vision_available(): |
| from PIL import Image |
| else: |
|
|
| class Image: |
| @staticmethod |
| def open(*args, **kwargs): |
| pass |
|
|
|
|
| @is_pipeline_test |
| @require_vision |
| @require_timm |
| @require_torch |
| class ObjectDetectionPipelineTests(unittest.TestCase): |
| model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING |
| _dataset = None |
|
|
| @classmethod |
| def _load_dataset(cls): |
| |
| if cls._dataset is None: |
| |
| |
| cls._dataset = datasets.load_dataset( |
| "hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1" |
| ) |
|
|
| def get_test_pipeline( |
| self, |
| model, |
| tokenizer=None, |
| image_processor=None, |
| feature_extractor=None, |
| processor=None, |
| torch_dtype="float32", |
| ): |
| object_detector = ObjectDetectionPipeline( |
| model=model, |
| tokenizer=tokenizer, |
| feature_extractor=feature_extractor, |
| image_processor=image_processor, |
| processor=processor, |
| torch_dtype=torch_dtype, |
| ) |
| return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"] |
|
|
| def run_pipeline_test(self, object_detector, examples): |
| self._load_dataset() |
| outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0) |
|
|
| self.assertGreater(len(outputs), 0) |
| for detected_object in outputs: |
| self.assertEqual( |
| detected_object, |
| { |
| "score": ANY(float), |
| "label": ANY(str), |
| "box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)}, |
| }, |
| ) |
|
|
| batch = [ |
| Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| |
| self._dataset[0]["image"], |
| |
| self._dataset[1]["image"], |
| |
| self._dataset[2]["image"], |
| ] |
| batch_outputs = object_detector(batch, threshold=0.0) |
|
|
| self.assertEqual(len(batch), len(batch_outputs)) |
| for outputs in batch_outputs: |
| self.assertGreater(len(outputs), 0) |
| for detected_object in outputs: |
| self.assertEqual( |
| detected_object, |
| { |
| "score": ANY(float), |
| "label": ANY(str), |
| "box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)}, |
| }, |
| ) |
| compare_pipeline_output_to_hub_spec(detected_object, ObjectDetectionOutputElement) |
|
|
| @require_torch |
| def test_small_model_pt(self): |
| model_id = "hf-internal-testing/tiny-detr-mobilenetsv3" |
|
|
| model = AutoModelForObjectDetection.from_pretrained(model_id) |
| feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) |
| object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor) |
|
|
| outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0) |
|
|
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| {"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, |
| {"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, |
| ], |
| ) |
|
|
| outputs = object_detector( |
| [ |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| ], |
| threshold=0.0, |
| ) |
|
|
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| [ |
| {"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, |
| {"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, |
| ], |
| [ |
| {"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, |
| {"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}}, |
| ], |
| ], |
| ) |
|
|
| @require_torch |
| @slow |
| def test_large_model_pt(self): |
| model_id = "facebook/detr-resnet-50" |
|
|
| model = AutoModelForObjectDetection.from_pretrained(model_id) |
| feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) |
| object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor) |
|
|
| outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg") |
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, |
| {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, |
| {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| ) |
|
|
| outputs = object_detector( |
| [ |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| ] |
| ) |
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| [ |
| {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, |
| {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, |
| {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| [ |
| {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, |
| {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, |
| {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| ], |
| ) |
|
|
| @require_torch |
| @slow |
| def test_integration_torch_object_detection(self): |
| model_id = "facebook/detr-resnet-50" |
|
|
| object_detector = pipeline("object-detection", model=model_id) |
|
|
| outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg") |
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, |
| {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, |
| {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| ) |
|
|
| outputs = object_detector( |
| [ |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| "http://images.cocodataset.org/val2017/000000039769.jpg", |
| ] |
| ) |
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| [ |
| {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, |
| {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, |
| {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| [ |
| {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}}, |
| {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}}, |
| {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}}, |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| ], |
| ) |
|
|
| @require_torch |
| @slow |
| def test_threshold(self): |
| threshold = 0.9985 |
| model_id = "facebook/detr-resnet-50" |
|
|
| object_detector = pipeline("object-detection", model=model_id) |
|
|
| outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold) |
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}}, |
| {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}}, |
| ], |
| ) |
|
|
| @require_torch |
| @require_pytesseract |
| @slow |
| def test_layoutlm(self): |
| model_id = "Narsil/layoutlmv3-finetuned-funsd" |
| threshold = 0.9993 |
|
|
| object_detector = pipeline("object-detection", model=model_id, threshold=threshold) |
|
|
| outputs = object_detector( |
| "https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png" |
| ) |
| self.assertEqual( |
| nested_simplify(outputs, decimals=4), |
| [ |
| {"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}}, |
| {"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}}, |
| ], |
| ) |
|
|