| import gradio as gr |
| from transformers import CLIPProcessor, CLIPModel |
|
|
| model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14") |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") |
|
|
|
|
| def calculate_score(image, text): |
| labels = text.split(";") |
| labels = [l.strip() for l in labels] |
| labels = list(filter(None, labels)) |
| if len(labels) == 0: |
| return dict() |
| inputs = processor(text=labels, images=image, return_tensors="pt", padding=True) |
| outputs = model(**inputs) |
| logits_per_image = outputs.logits_per_image.detach().numpy() |
|
|
| results_dict = { |
| label: score / 100.0 for label, score in zip(labels, logits_per_image[0]) |
| } |
| return results_dict |
|
|
|
|
| if __name__ == "__main__": |
| cat_example = [ |
| "cat.jpg", |
| "a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void", |
| ] |
|
|
| demo = gr.Interface( |
| fn=calculate_score, |
| inputs=["image", "text"], |
| outputs="label", |
| examples=[cat_example], |
| allow_flagging="never", |
| description="# CLIP Score", |
| article="Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text", |
| cache_examples=True, |
| ) |
|
|
| demo.launch() |