| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import unittest |
|
|
| from huggingface_hub import ZeroShotImageClassificationOutputElement |
|
|
| from transformers import is_vision_available |
| from transformers.pipelines import pipeline |
| from transformers.testing_utils import ( |
| compare_pipeline_output_to_hub_spec, |
| is_pipeline_test, |
| nested_simplify, |
| require_torch, |
| require_vision, |
| slow, |
| ) |
|
|
| from .test_pipelines_common import ANY |
|
|
|
|
| if is_vision_available(): |
| from PIL import Image |
| else: |
|
|
| class Image: |
| @staticmethod |
| def open(*args, **kwargs): |
| pass |
|
|
|
|
| @is_pipeline_test |
| @require_vision |
| class ZeroShotImageClassificationPipelineTests(unittest.TestCase): |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
|
|
| @require_torch |
| def test_small_model_pt(self, torch_dtype="float32"): |
| image_classifier = pipeline( |
| model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", torch_dtype=torch_dtype |
| ) |
| image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") |
| output = image_classifier(image, candidate_labels=["a", "b", "c"]) |
|
|
| |
| |
| self.assertIn( |
| nested_simplify(output), |
| [ |
| [{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "b"}, {"score": 0.333, "label": "c"}], |
| [{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "c"}, {"score": 0.333, "label": "b"}], |
| [{"score": 0.333, "label": "b"}, {"score": 0.333, "label": "a"}, {"score": 0.333, "label": "c"}], |
| ], |
| ) |
|
|
| output = image_classifier([image] * 5, candidate_labels=["A", "B", "C"], batch_size=2) |
| self.assertEqual( |
| nested_simplify(output), |
| |
| |
| |
| |
| |
| |
| [ |
| [ |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| ], |
| [ |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| ], |
| [ |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| ], |
| [ |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| ], |
| [ |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| {"score": 0.333, "label": ANY(str)}, |
| ], |
| ], |
| ) |
|
|
| for single_output in output: |
| compare_pipeline_output_to_hub_spec(single_output, ZeroShotImageClassificationOutputElement) |
|
|
| @require_torch |
| def test_small_model_pt_fp16(self): |
| self.test_small_model_pt(torch_dtype="float16") |
|
|
| @slow |
| @require_torch |
| def test_large_model_pt(self): |
| image_classifier = pipeline( |
| task="zero-shot-image-classification", |
| model="openai/clip-vit-base-patch32", |
| ) |
| |
| image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") |
| output = image_classifier(image, candidate_labels=["cat", "plane", "remote"]) |
|
|
| self.assertEqual( |
| nested_simplify(output), |
| [ |
| {"score": 0.511, "label": "remote"}, |
| {"score": 0.485, "label": "cat"}, |
| {"score": 0.004, "label": "plane"}, |
| ], |
| ) |
|
|
| output = image_classifier([image] * 5, candidate_labels=["cat", "plane", "remote"], batch_size=2) |
| self.assertEqual( |
| nested_simplify(output), |
| [ |
| [ |
| {"score": 0.511, "label": "remote"}, |
| {"score": 0.485, "label": "cat"}, |
| {"score": 0.004, "label": "plane"}, |
| ], |
| ] |
| * 5, |
| ) |
|
|
| @slow |
| @require_torch |
| def test_siglip_model_pt(self): |
| image_classifier = pipeline( |
| task="zero-shot-image-classification", |
| model="google/siglip-base-patch16-224", |
| ) |
| |
| image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") |
| output = image_classifier(image, candidate_labels=["2 cats", "a plane", "a remote"]) |
|
|
| self.assertEqual( |
| nested_simplify(output), |
| [ |
| {"score": 0.198, "label": "2 cats"}, |
| {"score": 0.0, "label": "a remote"}, |
| {"score": 0.0, "label": "a plane"}, |
| ], |
| ) |
|
|
| output = image_classifier([image] * 5, candidate_labels=["2 cats", "a plane", "a remote"], batch_size=2) |
|
|
| self.assertEqual( |
| nested_simplify(output), |
| [ |
| [ |
| {"score": 0.198, "label": "2 cats"}, |
| {"score": 0.0, "label": "a remote"}, |
| {"score": 0.0, "label": "a plane"}, |
| ] |
| ] |
| * 5, |
| ) |
|
|
| @slow |
| @require_torch |
| def test_blip2_model_pt(self): |
| image_classifier = pipeline( |
| task="zero-shot-image-classification", |
| model="Salesforce/blip2-itm-vit-g", |
| ) |
| |
| image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") |
| output = image_classifier( |
| image, |
| candidate_labels=["2 cats", "a plane", "a remote"], |
| tokenizer_kwargs={"return_token_type_ids": False}, |
| ) |
|
|
| self.assertEqual( |
| nested_simplify(output), |
| [ |
| {"score": 0.369, "label": "2 cats"}, |
| {"score": 0.333, "label": "a remote"}, |
| {"score": 0.297, "label": "a plane"}, |
| ], |
| ) |
|
|
| output = image_classifier( |
| [image] * 5, |
| candidate_labels=["2 cats", "a plane", "a remote"], |
| batch_size=2, |
| tokenizer_kwargs={"return_token_type_ids": False}, |
| ) |
|
|
| self.assertEqual( |
| nested_simplify(output), |
| [ |
| [ |
| {"score": 0.369, "label": "2 cats"}, |
| {"score": 0.333, "label": "a remote"}, |
| {"score": 0.297, "label": "a plane"}, |
| ] |
| ] |
| * 5, |
| ) |
|
|