Spaces:

Samarth991
/

CLIP-SEGMENTATION

Sleeping

App Files Files Community

Samarth991 commited on Jan 19, 2024

Commit

8dbc829

verified ·

1 Parent(s): c6ba654

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -12

app.py CHANGED Viewed

@@ -30,14 +30,29 @@ def detect_using_clip(image,prompts=[],threshould=0.4):
     )
     with torch.no_grad():  # Use 'torch.no_grad()' to disable gradient computation
         outputs = model(**inputs)
-    preds = outputs.logits.unsqueeze(1)
-    for i,prompt in enumerate(prompts):
-        predicted_image =  torch.sigmoid(preds[i][0]).detach().cpu().numpy()
-        predicted_image = np.where(predicted_image>threshould,255,0)
-        predicted_masks.append(predicted_image)
-    bool_masks = [predicted_mask.astype('bool') for predicted_mask in predicted_masks]
-    return bool_masks
 def visualize_images(image,predicted_images,brightness=15,contrast=1.8):
     alpha = 0.7
@@ -50,27 +65,28 @@ def visualize_images(image,predicted_images,brightness=15,contrast=1.8):
     return cv2.convertScaleAbs(resize_image_copy, alpha=contrast, beta=brightness)
 def shot(alpha,beta,image,labels_text):
     if "," in labels_text:
         prompts = labels_text.split(',')
     else:
         prompts = [labels_text]
     prompts = list(map(lambda x: x.strip(),prompts))
     mask_labels = [f"{prompt}_{i}" for i,prompt in enumerate(prompts)]
     cmap = plt.cm.tab20(np.arange(len(mask_labels)))[..., :-1]
-    resize_image = cv2.resize(image,(352,352))
-    predicted_images  = detect_using_clip(image,prompts=prompts)
-    category_image = overlay_masks(resize_image,np.stack(predicted_images,-1),labels=mask_labels,colors=cmap,alpha=alpha,beta=beta)
     return category_image
 iface = gr.Interface(fn=shot,
                     inputs = [
                         gr.Slider(0.1, 1, value=0.4, step=0.1 , label="alpha", info="Choose between 0.1 to 1"),
-                        gr.Slider(0.1, 1, value=1, step=0.1, label="beta", info="Choose between 0.1 to 1"),
                         "image",
                         "text"
                         ],

     )
     with torch.no_grad():  # Use 'torch.no_grad()' to disable gradient computation
         outputs = model(**inputs)
+    #preds = outputs.logits.unsqueeze(1)
+    preds = nn.functional.interpolate(
+        outputs.logits.unsqueeze(1),
+        size=(test_image.shape[0], test_image.shape[1]),
+        mode="bilinear"
+        )
+    threshold = 0.1
+    flat_preds = torch.sigmoid(preds.squeeze()).reshape((preds.shape[0], -1))
+    # Initialize a dummy "unlabeled" mask with the threshold
+    flat_preds_with_treshold = torch.full((preds.shape[0] + 1, flat_preds.shape[-1]), threshold)
+    flat_preds_with_treshold[1:preds.shape[0]+1,:] = flat_preds
+    # Get the top mask index for each pixel
+    inds = torch.topk(flat_preds_with_treshold, 1, dim=0).indices.reshape((preds.shape[-2], preds.shape[-1]))
+    predicted_masks = []
+    for i in range(1, len(prompts)+1):
+        mask =  np.where(inds==i,255,0)
+        predicted_masks.append(mask)
+    return predicted_masks
 def visualize_images(image,predicted_images,brightness=15,contrast=1.8):
     alpha = 0.7
     return cv2.convertScaleAbs(resize_image_copy, alpha=contrast, beta=brightness)
 def shot(alpha,beta,image,labels_text):
+    print(labels_text)
     if "," in labels_text:
         prompts = labels_text.split(',')
     else:
         prompts = [labels_text]
+    print(prompts)
     prompts = list(map(lambda x: x.strip(),prompts))
     mask_labels = [f"{prompt}_{i}" for i,prompt in enumerate(prompts)]
     cmap = plt.cm.tab20(np.arange(len(mask_labels)))[..., :-1]
+    predicted_masks  = detect_using_clip(image,prompts=prompts)
+    bool_masks = [predicted_mask.astype('bool') for predicted_mask in predicted_masks]
+    category_image = overlay_masks(resize_image,np.stack(bool_masks,-1),labels=mask_labels,colors=cmap,alpha=alpha,beta=beta)
     return category_image
 iface = gr.Interface(fn=shot,
                     inputs = [
                         gr.Slider(0.1, 1, value=0.4, step=0.1 , label="alpha", info="Choose between 0.1 to 1"),
+                        gr.Slider(0.1, 1, value=0.7, step=0.1, label="beta", info="Choose between 0.1 to 1"),
                         "image",
                         "text"
                         ],