thoeppner commited on
Commit
91219fd
·
verified ·
1 Parent(s): 8bec17b

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ example_images/cat.jpg filter=lfs diff=lfs merge=lfs -text
37
+ example_images/dog2.jpeg filter=lfs diff=lfs merge=lfs -text
38
+ example_images/leonberger.jpg filter=lfs diff=lfs merge=lfs -text
39
+ example_images/snow_leopard.jpeg filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load models
5
+ vit_classifier = pipeline("image-classification", model="chrisis2/vit-base-oxford-iiit-pets")
6
+ clip_detector = pipeline(model="openai/clip-vit-large-patch14", task="zero-shot-image-classification")
7
+
8
+ labels_oxford_pets = [
9
+ 'Siamese', 'Birman', 'shiba inu', 'staffordshire bull terrier', 'basset hound', 'Bombay', 'japanese chin',
10
+ 'chihuahua', 'german shorthaired', 'pomeranian', 'beagle', 'english cocker spaniel', 'american pit bull terrier',
11
+ 'Ragdoll', 'Persian', 'Egyptian Mau', 'miniature pinscher', 'Sphynx', 'Maine Coon', 'keeshond', 'yorkshire terrier',
12
+ 'havanese', 'leonberger', 'wheaten terrier', 'american bulldog', 'english setter', 'boxer', 'newfoundland', 'Bengal',
13
+ 'samoyed', 'British Shorthair', 'great pyrenees', 'Abyssinian', 'pug', 'saint bernard', 'Russian Blue', 'scottish terrier'
14
+ ]
15
+
16
+ def classify_pet(image):
17
+ vit_results = vit_classifier(image)
18
+ vit_output = {result['label']: result['score'] for result in vit_results}
19
+
20
+ clip_results = clip_detector(image, candidate_labels=labels_oxford_pets)
21
+ clip_output = {result['label']: result['score'] for result in clip_results}
22
+
23
+ return {"ViT Classification": vit_output, "CLIP Zero-Shot Classification": clip_output}
24
+
25
+ example_images = [
26
+ ["example_images/dog1.jpeg"],
27
+ ["example_images/dog2.jpeg"],
28
+ ["example_images/leonberger.jpg"],
29
+ ["example_images/snow_leopard.jpeg"],
30
+ ["example_images/cat.jpg"]
31
+ ]
32
+
33
+ iface = gr.Interface(
34
+ fn=classify_pet,
35
+ inputs=gr.Image(type="filepath"),
36
+ outputs=gr.JSON(),
37
+ title="Pet Classification Comparison",
38
+ description="Upload an image of a pet, and compare results from a trained ViT model and a zero-shot CLIP model.",
39
+ examples=example_images
40
+ )
41
+
42
+ iface.launch()
example_images/cat.jpg ADDED

Git LFS Details

  • SHA256: 881d621af6147d8559f9d5c51856874733ca264e61473741affc4bd804643049
  • Pointer size: 131 Bytes
  • Size of remote file: 128 kB
example_images/dog1.jpeg ADDED
example_images/dog2.jpeg ADDED

Git LFS Details

  • SHA256: 384da43ccf516d9b07da8d449a721088ebe9d863f8bcc5288903eaaff965c81c
  • Pointer size: 131 Bytes
  • Size of remote file: 530 kB
example_images/leonberger.jpg ADDED

Git LFS Details

  • SHA256: dbc6037e9b4cbbbd77dcb3825e0f265920833196aaf7ed91925f5824f825f107
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB
example_images/snow_leopard.jpeg ADDED

Git LFS Details

  • SHA256: da158f89f95dea3bb521de362c8e3bfa5e44d3341949831db740c521a1702a55
  • Pointer size: 131 Bytes
  • Size of remote file: 247 kB
oxford_pets_zero_shot.ipynb ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from transformers import pipeline\n",
10
+ "\n",
11
+ "checkpoint = \"openai/clip-vit-large-patch14\"\n",
12
+ "detector = pipeline(model=checkpoint, task=\"zero-shot-image-classification\")\n",
13
+ "#checkpoint = \"google/siglip-so400m-patch14-384\"\n",
14
+ "#detector = pipeline(task=\"zero-shot-image-classification\", model=\"google/siglip-so400m-patch14-384\")"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "from datasets import load_dataset\n",
24
+ "\n",
25
+ "dataset = load_dataset('pcuenq/oxford-pets')\n",
26
+ "dataset"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "dataset['train'][0]['image']"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "from PIL import Image\n",
45
+ "import io\n",
46
+ "from tqdm import tqdm\n",
47
+ "\n",
48
+ "labels_oxford_pets = ['Siamese', 'Birman', 'shiba inu', 'staffordshire bull terrier', 'basset hound', 'Bombay', 'japanese chin', 'chihuahua', 'german shorthaired', 'pomeranian', 'beagle', 'english cocker spaniel', 'american pit bull terrier', 'Ragdoll', 'Persian', 'Egyptian Mau', 'miniature pinscher', 'Sphynx', 'Maine Coon', 'keeshond', 'yorkshire terrier', 'havanese', 'leonberger', 'wheaten terrier', 'american bulldog', 'english setter', 'boxer', 'newfoundland', 'Bengal', 'samoyed', 'British Shorthair', 'great pyrenees', 'Abyssinian', 'pug', 'saint bernard', 'Russian Blue', 'scottish terrier']\n",
49
+ "\n",
50
+ "# List to store true labels and predicted labels\n",
51
+ "true_labels = []\n",
52
+ "predicted_labels = []"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "\n",
62
+ "for i in tqdm(range(len(dataset['train']))):\n",
63
+ " # Get the image bytes from the dataset\n",
64
+ " image_bytes = dataset['train'][i]['image']['bytes']\n",
65
+ " \n",
66
+ " # Convert the bytes to a PIL image\n",
67
+ " image = Image.open(io.BytesIO(image_bytes))\n",
68
+ " \n",
69
+ " # Run the detector on the image with the provided labels\n",
70
+ " results = detector(image, candidate_labels=labels_oxford_pets)\n",
71
+ " # Sort the results by score in descending order\n",
72
+ " sorted_results = sorted(results, key=lambda x: x['score'], reverse=True)\n",
73
+ " \n",
74
+ " # Get the top predicted label\n",
75
+ " predicted_label = sorted_results[0]['label']\n",
76
+ " \n",
77
+ " # Append the true and predicted labels to the respective lists\n",
78
+ " true_labels.append(dataset['train'][i]['label'])\n",
79
+ " predicted_labels.append(predicted_label)\n"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": null,
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
89
+ "\n",
90
+ "# Calculate accuracy\n",
91
+ "accuracy = accuracy_score(true_labels, predicted_labels)\n",
92
+ "\n",
93
+ "# Calculate precision and recall\n",
94
+ "precision = precision_score(true_labels, predicted_labels, average='weighted', labels=labels_oxford_pets)\n",
95
+ "recall = recall_score(true_labels, predicted_labels, average='weighted', labels=labels_oxford_pets)\n",
96
+ "\n",
97
+ "# Print the results\n",
98
+ "print(f\"Accuracy: {accuracy:.4f}\")\n",
99
+ "print(f\"Precision: {precision:.4f}\")\n",
100
+ "print(f\"Recall: {recall:.4f}\")"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "markdown",
105
+ "metadata": {},
106
+ "source": [
107
+ "## Gradio example"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": null,
113
+ "metadata": {},
114
+ "outputs": [],
115
+ "source": [
116
+ "import gradio as gr\n",
117
+ "from transformers import pipeline\n",
118
+ "\n",
119
+ "# Load models\n",
120
+ "vit_classifier = pipeline(\"image-classification\", model=\"kuhs/vit-base-oxford-iiit-pets\")\n",
121
+ "clip_detector = pipeline(model=\"openai/clip-vit-large-patch14\", task=\"zero-shot-image-classification\")\n",
122
+ "\n",
123
+ "labels_oxford_pets = [\n",
124
+ " 'Siamese', 'Birman', 'shiba inu', 'staffordshire bull terrier', 'basset hound', 'Bombay', 'japanese chin',\n",
125
+ " 'chihuahua', 'german shorthaired', 'pomeranian', 'beagle', 'english cocker spaniel', 'american pit bull terrier',\n",
126
+ " 'Ragdoll', 'Persian', 'Egyptian Mau', 'miniature pinscher', 'Sphynx', 'Maine Coon', 'keeshond', 'yorkshire terrier',\n",
127
+ " 'havanese', 'leonberger', 'wheaten terrier', 'american bulldog', 'english setter', 'boxer', 'newfoundland', 'Bengal',\n",
128
+ " 'samoyed', 'British Shorthair', 'great pyrenees', 'Abyssinian', 'pug', 'saint bernard', 'Russian Blue', 'scottish terrier'\n",
129
+ "]\n",
130
+ "\n",
131
+ "def classify_pet(image):\n",
132
+ " vit_results = vit_classifier(image)\n",
133
+ " vit_output = {result['label']: result['score'] for result in vit_results}\n",
134
+ " \n",
135
+ " clip_results = clip_detector(image, candidate_labels=labels_oxford_pets)\n",
136
+ " clip_output = {result['label']: result['score'] for result in clip_results}\n",
137
+ " \n",
138
+ " return {\"ViT Classification\": vit_output, \"CLIP Zero-Shot Classification\": clip_output}\n",
139
+ "\n",
140
+ "example_images = [\n",
141
+ " [\"example_images/dog1.jpeg\"],\n",
142
+ " [\"example_images/dog2.jpeg\"],\n",
143
+ " [\"example_images/leonberger.jpg\"],\n",
144
+ " [\"example_images/snow_leopard.jpeg\"],\n",
145
+ " [\"example_images/cat.jpg\"]\n",
146
+ "]\n",
147
+ "\n",
148
+ "iface = gr.Interface(\n",
149
+ " fn=classify_pet,\n",
150
+ " inputs=gr.Image(type=\"filepath\"),\n",
151
+ " outputs=gr.JSON(),\n",
152
+ " title=\"Pet Classification Comparison\",\n",
153
+ " description=\"Upload an image of a pet, and compare results from a trained ViT model and a zero-shot CLIP model.\",\n",
154
+ " examples=example_images\n",
155
+ ")\n",
156
+ "\n",
157
+ "iface.launch()"
158
+ ]
159
+ }
160
+ ],
161
+ "metadata": {
162
+ "kernelspec": {
163
+ "display_name": ".venv",
164
+ "language": "python",
165
+ "name": "python3"
166
+ },
167
+ "language_info": {
168
+ "codemirror_mode": {
169
+ "name": "ipython",
170
+ "version": 3
171
+ },
172
+ "file_extension": ".py",
173
+ "mimetype": "text/x-python",
174
+ "name": "python",
175
+ "nbconvert_exporter": "python",
176
+ "pygments_lexer": "ipython3",
177
+ "version": "3.13.1"
178
+ }
179
+ },
180
+ "nbformat": 4,
181
+ "nbformat_minor": 2
182
+ }
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ torch