| #!/usr/bin/env python3 | |
| from transformers import CLIPTextConfig, CLIPModel | |
| import torch | |
| config = CLIPTextConfig.from_pretrained("openMUSE/CLIP-ViT-L-14-DataComp.XL-s13B-b90K-penultimate") | |
| model = CLIPModel.from_pretrained("laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K", text_config=config) | |
| model.to_bettertransformer() | |
| text_encoder = model.text_model | |
| text_encoder = torch.compile(text_encoder) | |