Spaces:
Running
on
Zero
Running
on
Zero
update gpu
Browse files
app.py
CHANGED
|
@@ -378,13 +378,14 @@ class CLIP(torch.nn.Module):
|
|
| 378 |
causal_attention_mask=causal_attention_mask,
|
| 379 |
output_attentions=output_attentions,
|
| 380 |
)
|
| 381 |
-
|
|
|
|
| 382 |
hidden_states = residual + hidden_states
|
| 383 |
|
| 384 |
residual = hidden_states
|
| 385 |
hidden_states = self.layer_norm2(hidden_states)
|
| 386 |
hidden_states = self.mlp(hidden_states)
|
| 387 |
-
self.mlp_output = hidden_states.clone()
|
| 388 |
|
| 389 |
hidden_states = residual + hidden_states
|
| 390 |
|
|
@@ -393,7 +394,7 @@ class CLIP(torch.nn.Module):
|
|
| 393 |
if output_attentions:
|
| 394 |
outputs += (attn_weights,)
|
| 395 |
|
| 396 |
-
self.block_output = hidden_states.clone()
|
| 397 |
return outputs
|
| 398 |
|
| 399 |
setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
|
|
@@ -538,10 +539,13 @@ def compute_ncut(
|
|
| 538 |
affinity_focal_gamma=0.3,
|
| 539 |
knn_ncut=10,
|
| 540 |
knn_tsne=10,
|
|
|
|
| 541 |
num_sample_tsne=1000,
|
| 542 |
perplexity=500,
|
|
|
|
|
|
|
| 543 |
):
|
| 544 |
-
from ncut_pytorch import NCUT, rgb_from_tsne_3d
|
| 545 |
|
| 546 |
start = time.time()
|
| 547 |
eigvecs, eigvals = NCUT(
|
|
@@ -554,16 +558,23 @@ def compute_ncut(
|
|
| 554 |
print(f"NCUT time (cpu): {time.time() - start:.2f}s")
|
| 555 |
|
| 556 |
start = time.time()
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
|
| 568 |
rgb = rgb.reshape(features.shape[:3] + (3,))
|
| 569 |
return rgb
|
|
@@ -585,7 +596,7 @@ def to_pil_images(images):
|
|
| 585 |
for image in images
|
| 586 |
]
|
| 587 |
|
| 588 |
-
@spaces.GPU(duration=
|
| 589 |
def main_fn(
|
| 590 |
images,
|
| 591 |
model_name="SAM(sam_vit_b)",
|
|
@@ -595,9 +606,12 @@ def main_fn(
|
|
| 595 |
affinity_focal_gamma=0.3,
|
| 596 |
num_sample_ncut=10000,
|
| 597 |
knn_ncut=10,
|
|
|
|
| 598 |
num_sample_tsne=1000,
|
| 599 |
knn_tsne=10,
|
| 600 |
perplexity=500,
|
|
|
|
|
|
|
| 601 |
):
|
| 602 |
if perplexity >= num_sample_tsne:
|
| 603 |
# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
|
|
@@ -620,7 +634,10 @@ def main_fn(
|
|
| 620 |
knn_ncut=knn_ncut,
|
| 621 |
knn_tsne=knn_tsne,
|
| 622 |
num_sample_tsne=num_sample_tsne,
|
|
|
|
| 623 |
perplexity=perplexity,
|
|
|
|
|
|
|
| 624 |
)
|
| 625 |
rgb = dont_use_too_much_green(rgb)
|
| 626 |
return to_pil_images(rgb)
|
|
@@ -642,10 +659,12 @@ demo = gr.Interface(
|
|
| 642 |
gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
|
| 643 |
gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
|
| 644 |
gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
|
| 645 |
-
gr.
|
| 646 |
-
gr.Slider(
|
| 647 |
-
gr.Slider(
|
| 648 |
-
|
|
|
|
|
|
|
| 649 |
]
|
| 650 |
)
|
| 651 |
|
|
|
|
| 378 |
causal_attention_mask=causal_attention_mask,
|
| 379 |
output_attentions=output_attentions,
|
| 380 |
)
|
| 381 |
+
hw = np.sqrt(hidden_states.shape[1]-1).astype(int)
|
| 382 |
+
self.attn_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
|
| 383 |
hidden_states = residual + hidden_states
|
| 384 |
|
| 385 |
residual = hidden_states
|
| 386 |
hidden_states = self.layer_norm2(hidden_states)
|
| 387 |
hidden_states = self.mlp(hidden_states)
|
| 388 |
+
self.mlp_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
|
| 389 |
|
| 390 |
hidden_states = residual + hidden_states
|
| 391 |
|
|
|
|
| 394 |
if output_attentions:
|
| 395 |
outputs += (attn_weights,)
|
| 396 |
|
| 397 |
+
self.block_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
|
| 398 |
return outputs
|
| 399 |
|
| 400 |
setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
|
|
|
|
| 539 |
affinity_focal_gamma=0.3,
|
| 540 |
knn_ncut=10,
|
| 541 |
knn_tsne=10,
|
| 542 |
+
embedding_method="UMAP",
|
| 543 |
num_sample_tsne=1000,
|
| 544 |
perplexity=500,
|
| 545 |
+
n_neighbors=500,
|
| 546 |
+
min_dist=0.1,
|
| 547 |
):
|
| 548 |
+
from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d
|
| 549 |
|
| 550 |
start = time.time()
|
| 551 |
eigvecs, eigvals = NCUT(
|
|
|
|
| 558 |
print(f"NCUT time (cpu): {time.time() - start:.2f}s")
|
| 559 |
|
| 560 |
start = time.time()
|
| 561 |
+
if embedding_method == "UMAP":
|
| 562 |
+
rgb = rgb_from_umap_3d(
|
| 563 |
+
eigvecs,
|
| 564 |
+
n_neighbors=n_neighbors,
|
| 565 |
+
min_dist=min_dist,
|
| 566 |
+
)
|
| 567 |
+
print(f"UMAP time (cpu): {time.time() - start:.2f}s")
|
| 568 |
+
elif embedding_method == "t-SNE":
|
| 569 |
+
X_3d, rgb = rgb_from_tsne_3d(
|
| 570 |
+
eigvecs,
|
| 571 |
+
num_sample=num_sample_tsne,
|
| 572 |
+
perplexity=perplexity,
|
| 573 |
+
knn=knn_tsne,
|
| 574 |
+
)
|
| 575 |
+
print(f"t-SNE time (cpu): {time.time() - start:.2f}s")
|
| 576 |
+
else:
|
| 577 |
+
raise ValueError(f"Embedding method {embedding_method} not supported.")
|
| 578 |
|
| 579 |
rgb = rgb.reshape(features.shape[:3] + (3,))
|
| 580 |
return rgb
|
|
|
|
| 596 |
for image in images
|
| 597 |
]
|
| 598 |
|
| 599 |
+
@spaces.GPU(duration=30)
|
| 600 |
def main_fn(
|
| 601 |
images,
|
| 602 |
model_name="SAM(sam_vit_b)",
|
|
|
|
| 606 |
affinity_focal_gamma=0.3,
|
| 607 |
num_sample_ncut=10000,
|
| 608 |
knn_ncut=10,
|
| 609 |
+
embedding_method="UMAP",
|
| 610 |
num_sample_tsne=1000,
|
| 611 |
knn_tsne=10,
|
| 612 |
perplexity=500,
|
| 613 |
+
n_neighbors=500,
|
| 614 |
+
min_dist=0.1,
|
| 615 |
):
|
| 616 |
if perplexity >= num_sample_tsne:
|
| 617 |
# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
|
|
|
|
| 634 |
knn_ncut=knn_ncut,
|
| 635 |
knn_tsne=knn_tsne,
|
| 636 |
num_sample_tsne=num_sample_tsne,
|
| 637 |
+
embedding_method=embedding_method,
|
| 638 |
perplexity=perplexity,
|
| 639 |
+
n_neighbors=n_neighbors,
|
| 640 |
+
min_dist=min_dist,
|
| 641 |
)
|
| 642 |
rgb = dont_use_too_much_green(rgb)
|
| 643 |
return to_pil_images(rgb)
|
|
|
|
| 659 |
gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
|
| 660 |
gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
|
| 661 |
gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
|
| 662 |
+
gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="UMAP", elem_id="embedding_method"),
|
| 663 |
+
gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="for Nyström approximation. Adding will slow down quite a lot"),
|
| 664 |
+
gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="for Nyström approximation"),
|
| 665 |
+
gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity", info="for t-SNE"),
|
| 666 |
+
gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors", info="for UMAP"),
|
| 667 |
+
gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist", info="for UMAP"),
|
| 668 |
]
|
| 669 |
)
|
| 670 |
|