JanadaSroor commited on
Commit
1c955f7
·
verified ·
1 Parent(s): 006517c

Upload AI_Models_Demo.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. AI_Models_Demo.ipynb +30 -15
AI_Models_Demo.ipynb CHANGED
@@ -119,6 +119,11 @@
119
  "metadata": {},
120
  "outputs": [],
121
  "source": [
 
 
 
 
 
122
  "# Load a test image\n",
123
  "url = \"https://images.unsplash.com/photo-1543466835-00a7907e9de1?ixlib=rb-4.0.3&auto=format&fit=crop&w=500&q=80\"\n",
124
  "response = requests.get(url)\n",
@@ -126,25 +131,35 @@
126
  "display(image.resize((300, 300)))\n",
127
  "\n",
128
  "# Define queries\n",
129
- "queries = [\"a cute dog\", \"a running dog\", \"a cat\", \"a car\", \"food\"]\n",
130
  "\n",
131
- "# 1. Encode Image (CLIP Vision)\n",
132
- "inputs = clip_processor(images=image, return_tensors=\"np\")\n",
133
- "image_embeds = vision_sess.run(None, dict(inputs))[0][0]\n",
134
  "\n",
135
- "# 2. Encode Text & Compare\n",
136
- "print(f\"\\n{'Query':<20} | {'Score':<10}\")\n",
137
- "print(\"-\" * 35)\n",
138
  "\n",
139
  "for query in queries:\n",
140
- " # Tokenize and encode text\n",
141
  " text_inputs = clip_processor(text=[query], return_tensors=\"np\", padding=True)\n",
142
- " text_embeds = text_sess.run(None, dict(text_inputs))[0][0]\n",
143
- " \n",
144
- " # Calculate Cosine Similarity\n",
145
- " similarity = np.dot(text_embeds, image_embeds) / (np.linalg.norm(text_embeds) * np.linalg.norm(image_embeds))\n",
146
- " \n",
147
- " print(f\"{query:<20} | {similarity:.4f}\")"
 
 
 
 
 
 
 
 
 
 
 
148
  ]
149
  },
150
  {
@@ -195,4 +210,4 @@
195
  },
196
  "nbformat": 4,
197
  "nbformat_minor": 4
198
- }
 
119
  "metadata": {},
120
  "outputs": [],
121
  "source": [
122
+ "import numpy as np\n",
123
+ "import requests\n",
124
+ "from PIL import Image\n",
125
+ "from io import BytesIO\n",
126
+ "\n",
127
  "# Load a test image\n",
128
  "url = \"https://images.unsplash.com/photo-1543466835-00a7907e9de1?ixlib=rb-4.0.3&auto=format&fit=crop&w=500&q=80\"\n",
129
  "response = requests.get(url)\n",
 
131
  "display(image.resize((300, 300)))\n",
132
  "\n",
133
  "# Define queries\n",
134
+ "queries = [\"a cute dog\", \"a dog looking\", \"a cat\", \"a car\", \"food\"]\n",
135
  "\n",
136
+ "# ---------- 1. Encode Image ----------\n",
137
+ "image_inputs = clip_processor(images=image, return_tensors=\"np\")\n",
138
+ "image_embed = vision_sess.run(None, dict(image_inputs))[0][0]\n",
139
  "\n",
140
+ "# L2 normalize image embedding\n",
141
+ "image_embed = image_embed / np.linalg.norm(image_embed)\n",
142
+ "scores = []\n",
143
  "\n",
144
  "for query in queries:\n",
 
145
  " text_inputs = clip_processor(text=[query], return_tensors=\"np\", padding=True)\n",
146
+ " text_embed = text_sess.run(None, dict(text_inputs))[0][0]\n",
147
+ " text_embed = text_embed / np.linalg.norm(text_embed)\n",
148
+ "\n",
149
+ " score = 100.0 * np.dot(text_embed, image_embed)\n",
150
+ " scores.append(score)\n",
151
+ "\n",
152
+ "scores = np.array(scores)\n",
153
+ "\n",
154
+ "# Softmax over queries (THIS is what CLIP expects)\n",
155
+ "probs = np.exp(scores) / np.exp(scores).sum()\n",
156
+ "\n",
157
+ "print(f\"\\n{'Query':<20} | {'Logit':<10} | {'Prob'}\")\n",
158
+ "print(\"-\" * 50)\n",
159
+ "\n",
160
+ "for q, s, p in zip(queries, scores, probs):\n",
161
+ " print(f\"{q:<20} | {s:8.2f} | {100*p:.3f}%\")\n",
162
+ "\n"
163
  ]
164
  },
165
  {
 
210
  },
211
  "nbformat": 4,
212
  "nbformat_minor": 4
213
+ }