| | import torch |
| | from PIL import Image |
| | from open_clip.factory import get_tokenizer |
| | import pytest |
| | import open_clip |
| | import os |
| | os.environ["CUDA_VISIBLE_DEVICES"] = "" |
| |
|
| | if hasattr(torch._C, '_jit_set_profiling_executor'): |
| | |
| | |
| | torch._C._jit_set_profiling_executor(True) |
| | torch._C._jit_set_profiling_mode(False) |
| |
|
| |
|
| | test_simple_models = [ |
| | |
| | ("ViT-B-32", "laion2b_s34b_b79k", False, False), |
| | ("ViT-B-32", "laion2b_s34b_b79k", True, False), |
| | ("ViT-B-32", "laion2b_s34b_b79k", True, True), |
| | ("roberta-ViT-B-32", "laion2b_s12b_b32k", False, False), |
| | ] |
| |
|
| |
|
| | @pytest.mark.parametrize("model_type,pretrained,jit,force_custom_text", test_simple_models) |
| | def test_inference_simple( |
| | model_type, |
| | pretrained, |
| | jit, |
| | force_custom_text, |
| | ): |
| | model, _, preprocess = open_clip.create_model_and_transforms( |
| | model_type, |
| | pretrained=pretrained, |
| | jit=jit, |
| | force_custom_text=force_custom_text, |
| | ) |
| | tokenizer = get_tokenizer(model_type) |
| |
|
| | current_dir = os.path.dirname(os.path.realpath(__file__)) |
| |
|
| | image = preprocess(Image.open(current_dir + "/../docs/CLIP.png")).unsqueeze(0) |
| | text = tokenizer(["a diagram", "a dog", "a cat"]) |
| |
|
| | with torch.no_grad(): |
| | image_features = model.encode_image(image) |
| | text_features = model.encode_text(text) |
| |
|
| | text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) |
| |
|
| | assert torch.allclose(text_probs.cpu()[0], torch.tensor([1.0, 0.0, 0.0])) |
| |
|