Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

huzey commited on Aug 26, 2024

Commit

c320745

1 Parent(s): 1075d8a

update gpu

Browse files

Files changed (1) hide show

app.py +38 -19

app.py CHANGED Viewed

@@ -378,13 +378,14 @@ class CLIP(torch.nn.Module):
                 causal_attention_mask=causal_attention_mask,
                 output_attentions=output_attentions,
             )
-            self.attn_output = hidden_states.clone()
             hidden_states = residual + hidden_states
             residual = hidden_states
             hidden_states = self.layer_norm2(hidden_states)
             hidden_states = self.mlp(hidden_states)
-            self.mlp_output = hidden_states.clone()
             hidden_states = residual + hidden_states
@@ -393,7 +394,7 @@ class CLIP(torch.nn.Module):
             if output_attentions:
                 outputs += (attn_weights,)
-            self.block_output = hidden_states.clone()
             return outputs
         setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
@@ -538,10 +539,13 @@ def compute_ncut(
     affinity_focal_gamma=0.3,
     knn_ncut=10,
     knn_tsne=10,
     num_sample_tsne=1000,
     perplexity=500,
 ):
-    from ncut_pytorch import NCUT, rgb_from_tsne_3d
     start = time.time()
     eigvecs, eigvals = NCUT(
@@ -554,16 +558,23 @@ def compute_ncut(
     print(f"NCUT time (cpu): {time.time() - start:.2f}s")
     start = time.time()
-    X_3d, rgb = rgb_from_tsne_3d(
-        eigvecs,
-        num_sample=num_sample_tsne,
-        perplexity=perplexity,
-        knn=knn_tsne,
-    )
-    print(f"t-SNE time (cpu): {time.time() - start:.2f}s")
-    # print("input shape:", features.shape)
-    # print("output shape:", rgb.shape)
     rgb = rgb.reshape(features.shape[:3] + (3,))
     return rgb
@@ -585,7 +596,7 @@ def to_pil_images(images):
         for image in images
     ]
-@spaces.GPU(duration=60)
 def main_fn(
     images,
     model_name="SAM(sam_vit_b)",
@@ -595,9 +606,12 @@ def main_fn(
     affinity_focal_gamma=0.3,
     num_sample_ncut=10000,
     knn_ncut=10,
     num_sample_tsne=1000,
     knn_tsne=10,
     perplexity=500,
 ):
     if perplexity >= num_sample_tsne:
         # raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
@@ -620,7 +634,10 @@ def main_fn(
         knn_ncut=knn_ncut,
         knn_tsne=knn_tsne,
         num_sample_tsne=num_sample_tsne,
         perplexity=perplexity,
     )
     rgb = dont_use_too_much_green(rgb)
     return to_pil_images(rgb)
@@ -642,10 +659,12 @@ demo = gr.Interface(
         gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
         gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
         gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
-        gr.Slider(100, 1000, step=100, label="num_sample (t-SNE)", value=500, elem_id="num_sample_tsne", info="for Nyström approximation. Adding will slow down t-SNE quite a lot"),
-        gr.Slider(1, 100, step=1, label="KNN (t-SNE)", value=10, elem_id="knn_tsne", info="for Nyström approximation"),
-        gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=250, elem_id="perplexity", info="for t-SNE"),
     ]
 )

                 causal_attention_mask=causal_attention_mask,
                 output_attentions=output_attentions,
             )
+            hw = np.sqrt(hidden_states.shape[1]-1).astype(int)
+            self.attn_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
             hidden_states = residual + hidden_states
             residual = hidden_states
             hidden_states = self.layer_norm2(hidden_states)
             hidden_states = self.mlp(hidden_states)
+            self.mlp_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
             hidden_states = residual + hidden_states
             if output_attentions:
                 outputs += (attn_weights,)
+            self.block_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
             return outputs
         setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
     affinity_focal_gamma=0.3,
     knn_ncut=10,
     knn_tsne=10,
+    embedding_method="UMAP",
     num_sample_tsne=1000,
     perplexity=500,
+    n_neighbors=500,
+    min_dist=0.1,
 ):
+    from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d
     start = time.time()
     eigvecs, eigvals = NCUT(
     print(f"NCUT time (cpu): {time.time() - start:.2f}s")
     start = time.time()
+    if embedding_method == "UMAP":
+        rgb = rgb_from_umap_3d(
+            eigvecs,
+            n_neighbors=n_neighbors,
+            min_dist=min_dist,
+        )
+        print(f"UMAP time (cpu): {time.time() - start:.2f}s")
+    elif embedding_method == "t-SNE":
+        X_3d, rgb = rgb_from_tsne_3d(
+            eigvecs,
+            num_sample=num_sample_tsne,
+            perplexity=perplexity,
+            knn=knn_tsne,
+        )
+        print(f"t-SNE time (cpu): {time.time() - start:.2f}s")
+    else:
+        raise ValueError(f"Embedding method {embedding_method} not supported.")
     rgb = rgb.reshape(features.shape[:3] + (3,))
     return rgb
         for image in images
     ]
+@spaces.GPU(duration=30)
 def main_fn(
     images,
     model_name="SAM(sam_vit_b)",
     affinity_focal_gamma=0.3,
     num_sample_ncut=10000,
     knn_ncut=10,
+    embedding_method="UMAP",
     num_sample_tsne=1000,
     knn_tsne=10,
     perplexity=500,
+    n_neighbors=500,
+    min_dist=0.1,
 ):
     if perplexity >= num_sample_tsne:
         # raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
         knn_ncut=knn_ncut,
         knn_tsne=knn_tsne,
         num_sample_tsne=num_sample_tsne,
+        embedding_method=embedding_method,
         perplexity=perplexity,
+        n_neighbors=n_neighbors,
+        min_dist=min_dist,
     )
     rgb = dont_use_too_much_green(rgb)
     return to_pil_images(rgb)
         gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
         gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
         gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
+        gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="UMAP", elem_id="embedding_method"),
+        gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="for Nyström approximation. Adding will slow down quite a lot"),
+        gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="for Nyström approximation"),
+        gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity", info="for t-SNE"),
+        gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors", info="for UMAP"),
+        gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist", info="for UMAP"),
     ]
 )