File size: 1,840 Bytes
e36e780
8afe594
 
 
e36e780
72f5b82
c6ab637
 
72f5b82
 
 
 
e36e780
 
 
 
 
 
 
 
 
 
 
c6ab637
e36e780
885dda4
c6ab637
e36e780
c6ab637
e36e780
 
 
 
 
 
 
 
 
 
 
 
 
 
c6ab637
e36e780
 
 
c6ab637
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import torch
import logging
...
from transformers import pipeline
 
# Load models
vit_classifier = pipeline("image-classification", model="dewiri/vit-base-oxford-iiit-pets")
clip_detector = pipeline(
  "zero-shot-image-classification",
  model="openai/clip-vit-large-patch14"
)

labels_oxford_pets = [
    'Siamese', 'Birman', 'shiba inu', 'staffordshire bull terrier', 'basset hound', 'Bombay', 'japanese chin',
    'chihuahua', 'german shorthaired', 'pomeranian', 'beagle', 'english cocker spaniel', 'american pit bull terrier',
    'Ragdoll', 'Persian', 'Egyptian Mau', 'miniature pinscher', 'Sphynx', 'Maine Coon', 'keeshond', 'yorkshire terrier',
    'havanese', 'leonberger', 'wheaten terrier', 'american bulldog', 'english setter', 'boxer', 'newfoundland', 'Bengal',
    'samoyed', 'British Shorthair', 'great pyrenees', 'Abyssinian', 'pug', 'saint bernard', 'Russian Blue', 'scottish terrier'
]

def classify_pet(image):
    vit_results = vit_classifier(image)
    vit_output = {result['label']: result['score'] for result in vit_results}
    
    clip_results = clip_detector(image, candidate_labels=labels_oxford_pets)
    clip_output = {result['label']: result['score'] for result in clip_results}
    
    return {"ViT Classification": vit_output, "CLIP Zero-Shot Classification": clip_output}

example_images = [
    ["example_images/dog1.jpeg"],
    ["example_images/dog2.jpeg"],
    ["example_images/leonberger.jpg"],
    ["example_images/snow_leopard.jpeg"],
    ["example_images/cat.jpg"]
]

iface = gr.Interface(
    fn=classify_pet,
    inputs=gr.Image(type="filepath"),
    outputs=gr.JSON(),
    title="Pet Classification Comparison",
    description="Upload an image of a pet, and compare results from a trained ViT model and a zero-shot CLIP model.",
    examples=example_images
)

iface.launch()