Upload AI_Models_Demo.ipynb with huggingface_hub
Browse files- AI_Models_Demo.ipynb +30 -15
AI_Models_Demo.ipynb
CHANGED
|
@@ -119,6 +119,11 @@
|
|
| 119 |
"metadata": {},
|
| 120 |
"outputs": [],
|
| 121 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
"# Load a test image\n",
|
| 123 |
"url = \"https://images.unsplash.com/photo-1543466835-00a7907e9de1?ixlib=rb-4.0.3&auto=format&fit=crop&w=500&q=80\"\n",
|
| 124 |
"response = requests.get(url)\n",
|
|
@@ -126,25 +131,35 @@
|
|
| 126 |
"display(image.resize((300, 300)))\n",
|
| 127 |
"\n",
|
| 128 |
"# Define queries\n",
|
| 129 |
-
"queries = [\"a cute dog\", \"a
|
| 130 |
"\n",
|
| 131 |
-
"# 1. Encode Image
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
"\n",
|
| 135 |
-
"#
|
| 136 |
-
"
|
| 137 |
-
"
|
| 138 |
"\n",
|
| 139 |
"for query in queries:\n",
|
| 140 |
-
" # Tokenize and encode text\n",
|
| 141 |
" text_inputs = clip_processor(text=[query], return_tensors=\"np\", padding=True)\n",
|
| 142 |
-
"
|
| 143 |
-
" \n",
|
| 144 |
-
"
|
| 145 |
-
"
|
| 146 |
-
" \n",
|
| 147 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
]
|
| 149 |
},
|
| 150 |
{
|
|
@@ -195,4 +210,4 @@
|
|
| 195 |
},
|
| 196 |
"nbformat": 4,
|
| 197 |
"nbformat_minor": 4
|
| 198 |
-
}
|
|
|
|
| 119 |
"metadata": {},
|
| 120 |
"outputs": [],
|
| 121 |
"source": [
|
| 122 |
+
"import numpy as np\n",
|
| 123 |
+
"import requests\n",
|
| 124 |
+
"from PIL import Image\n",
|
| 125 |
+
"from io import BytesIO\n",
|
| 126 |
+
"\n",
|
| 127 |
"# Load a test image\n",
|
| 128 |
"url = \"https://images.unsplash.com/photo-1543466835-00a7907e9de1?ixlib=rb-4.0.3&auto=format&fit=crop&w=500&q=80\"\n",
|
| 129 |
"response = requests.get(url)\n",
|
|
|
|
| 131 |
"display(image.resize((300, 300)))\n",
|
| 132 |
"\n",
|
| 133 |
"# Define queries\n",
|
| 134 |
+
"queries = [\"a cute dog\", \"a dog looking\", \"a cat\", \"a car\", \"food\"]\n",
|
| 135 |
"\n",
|
| 136 |
+
"# ---------- 1. Encode Image ----------\n",
|
| 137 |
+
"image_inputs = clip_processor(images=image, return_tensors=\"np\")\n",
|
| 138 |
+
"image_embed = vision_sess.run(None, dict(image_inputs))[0][0]\n",
|
| 139 |
"\n",
|
| 140 |
+
"# L2 normalize image embedding\n",
|
| 141 |
+
"image_embed = image_embed / np.linalg.norm(image_embed)\n",
|
| 142 |
+
"scores = []\n",
|
| 143 |
"\n",
|
| 144 |
"for query in queries:\n",
|
|
|
|
| 145 |
" text_inputs = clip_processor(text=[query], return_tensors=\"np\", padding=True)\n",
|
| 146 |
+
" text_embed = text_sess.run(None, dict(text_inputs))[0][0]\n",
|
| 147 |
+
" text_embed = text_embed / np.linalg.norm(text_embed)\n",
|
| 148 |
+
"\n",
|
| 149 |
+
" score = 100.0 * np.dot(text_embed, image_embed)\n",
|
| 150 |
+
" scores.append(score)\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"scores = np.array(scores)\n",
|
| 153 |
+
"\n",
|
| 154 |
+
"# Softmax over queries (THIS is what CLIP expects)\n",
|
| 155 |
+
"probs = np.exp(scores) / np.exp(scores).sum()\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"print(f\"\\n{'Query':<20} | {'Logit':<10} | {'Prob'}\")\n",
|
| 158 |
+
"print(\"-\" * 50)\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"for q, s, p in zip(queries, scores, probs):\n",
|
| 161 |
+
" print(f\"{q:<20} | {s:8.2f} | {100*p:.3f}%\")\n",
|
| 162 |
+
"\n"
|
| 163 |
]
|
| 164 |
},
|
| 165 |
{
|
|
|
|
| 210 |
},
|
| 211 |
"nbformat": 4,
|
| 212 |
"nbformat_minor": 4
|
| 213 |
+
}
|