Spaces:
Running
on
Zero
Running
on
Zero
update compare
Browse files
app.py
CHANGED
|
@@ -98,7 +98,7 @@ def pil_images_to_video(images, output_path, fps=5):
|
|
| 98 |
# from pil images to numpy
|
| 99 |
images = [np.array(image) for image in images]
|
| 100 |
|
| 101 |
-
print("Saving video to", output_path)
|
| 102 |
import cv2
|
| 103 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 104 |
height, width, _ = images[0].shape
|
|
@@ -338,7 +338,7 @@ def run_fn(
|
|
| 338 |
recursion_l3_n_eigs=20,
|
| 339 |
recursion_metric="euclidean",
|
| 340 |
):
|
| 341 |
-
print("Running...")
|
| 342 |
if images is None:
|
| 343 |
gr.Warning("No images selected.")
|
| 344 |
return [], "No images selected."
|
|
@@ -373,10 +373,12 @@ def run_fn(
|
|
| 373 |
"recursion_metric": recursion_metric,
|
| 374 |
"video_output": video_output,
|
| 375 |
}
|
| 376 |
-
print(kwargs)
|
| 377 |
num_images = len(images)
|
| 378 |
if num_images > 100:
|
| 379 |
return super_duper_long_run(images, **kwargs)
|
|
|
|
|
|
|
| 380 |
if num_images > 50:
|
| 381 |
return longer_run(images, **kwargs)
|
| 382 |
if old_school_ncut:
|
|
@@ -406,7 +408,9 @@ def make_input_images_section():
|
|
| 406 |
def make_input_video_section():
|
| 407 |
gr.Markdown('### Input Video')
|
| 408 |
input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
|
| 409 |
-
|
|
|
|
|
|
|
| 410 |
submit_button = gr.Button("🔴RUN", elem_id="submit_button")
|
| 411 |
clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
|
| 412 |
return input_gallery, submit_button, clear_images_button, max_frames_number
|
|
@@ -428,7 +432,7 @@ def make_example_video_section():
|
|
| 428 |
return load_video_button
|
| 429 |
|
| 430 |
def make_dataset_images_section():
|
| 431 |
-
with gr.Accordion("➡️ Load from dataset", open=
|
| 432 |
dataset_names = [
|
| 433 |
'UCSC-VLAA/Recap-COCO-30K',
|
| 434 |
'nateraw/pascal-voc-2012',
|
|
@@ -440,7 +444,8 @@ def make_dataset_images_section():
|
|
| 440 |
]
|
| 441 |
dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
|
| 442 |
num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
|
| 443 |
-
random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
|
|
|
|
| 444 |
load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
|
| 445 |
def load_dataset_images(dataset_name, num_images=10, random_seed=42):
|
| 446 |
from datasets import load_dataset
|
|
@@ -524,8 +529,8 @@ with gr.Blocks() as demo:
|
|
| 524 |
outputs=[output_gallery, logging_text]
|
| 525 |
)
|
| 526 |
|
| 527 |
-
with gr.Tab('NCut
|
| 528 |
-
gr.Markdown('####
|
| 529 |
gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
|
| 530 |
|
| 531 |
gr.Markdown('---')
|
|
@@ -595,6 +600,10 @@ with gr.Blocks() as demo:
|
|
| 595 |
with gr.Row():
|
| 596 |
with gr.Column(scale=5, min_width=200):
|
| 597 |
input_gallery, submit_button, clear_images_button = make_input_images_section()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
|
| 599 |
num_images_slider.value = 100
|
| 600 |
dataset_dropdown.value = 'nielsr/CelebA-faces'
|
|
@@ -657,14 +666,13 @@ with gr.Blocks() as demo:
|
|
| 657 |
)
|
| 658 |
|
| 659 |
|
| 660 |
-
with gr.Tab('
|
| 661 |
with gr.Row():
|
| 662 |
with gr.Column(scale=5, min_width=200):
|
| 663 |
-
|
| 664 |
# load_video_button = make_example_video_section()
|
| 665 |
with gr.Column(scale=5, min_width=200):
|
| 666 |
-
|
| 667 |
-
gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
|
| 668 |
[
|
| 669 |
model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
|
| 670 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
|
@@ -679,27 +687,96 @@ with gr.Blocks() as demo:
|
|
| 679 |
# logging text box
|
| 680 |
logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
| 681 |
load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
|
| 682 |
-
|
| 683 |
-
clear_images_button.click(lambda x: (None, []), outputs=[input_gallery, output_gallery])
|
| 684 |
place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
|
| 685 |
place_holder_false.visible = False
|
| 686 |
submit_button.click(
|
| 687 |
run_fn,
|
| 688 |
inputs=[
|
| 689 |
-
|
| 690 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
| 691 |
embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
|
| 692 |
perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
|
| 693 |
place_holder_false, max_frame_number
|
| 694 |
],
|
| 695 |
-
outputs=[
|
| 696 |
)
|
| 697 |
|
| 698 |
-
with gr.Tab('
|
| 699 |
gr.Markdown('=== under construction ===')
|
| 700 |
gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
|
| 701 |
gr.Markdown('---')
|
| 702 |
gr.Markdown('')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
|
| 704 |
demo.launch(share=True)
|
| 705 |
|
|
|
|
| 98 |
# from pil images to numpy
|
| 99 |
images = [np.array(image) for image in images]
|
| 100 |
|
| 101 |
+
# print("Saving video to", output_path)
|
| 102 |
import cv2
|
| 103 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 104 |
height, width, _ = images[0].shape
|
|
|
|
| 338 |
recursion_l3_n_eigs=20,
|
| 339 |
recursion_metric="euclidean",
|
| 340 |
):
|
| 341 |
+
# print("Running...")
|
| 342 |
if images is None:
|
| 343 |
gr.Warning("No images selected.")
|
| 344 |
return [], "No images selected."
|
|
|
|
| 373 |
"recursion_metric": recursion_metric,
|
| 374 |
"video_output": video_output,
|
| 375 |
}
|
| 376 |
+
# print(kwargs)
|
| 377 |
num_images = len(images)
|
| 378 |
if num_images > 100:
|
| 379 |
return super_duper_long_run(images, **kwargs)
|
| 380 |
+
if recursion:
|
| 381 |
+
return longer_run(images, **kwargs)
|
| 382 |
if num_images > 50:
|
| 383 |
return longer_run(images, **kwargs)
|
| 384 |
if old_school_ncut:
|
|
|
|
| 408 |
def make_input_video_section():
|
| 409 |
gr.Markdown('### Input Video')
|
| 410 |
input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
|
| 411 |
+
gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
|
| 412 |
+
# max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
|
| 413 |
+
max_frames_number = gr.Slider(1, 200, step=1, label="Max frames", value=100, elem_id="max_frames")
|
| 414 |
submit_button = gr.Button("🔴RUN", elem_id="submit_button")
|
| 415 |
clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
|
| 416 |
return input_gallery, submit_button, clear_images_button, max_frames_number
|
|
|
|
| 432 |
return load_video_button
|
| 433 |
|
| 434 |
def make_dataset_images_section():
|
| 435 |
+
with gr.Accordion("➡️ Click to expand: Load from dataset", open=False):
|
| 436 |
dataset_names = [
|
| 437 |
'UCSC-VLAA/Recap-COCO-30K',
|
| 438 |
'nateraw/pascal-voc-2012',
|
|
|
|
| 444 |
]
|
| 445 |
dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
|
| 446 |
num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
|
| 447 |
+
# random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
|
| 448 |
+
random_seed_slider = gr.Slider(0, 1000, step=1, label="Random seed", value=1, elem_id="random_seed")
|
| 449 |
load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
|
| 450 |
def load_dataset_images(dataset_name, num_images=10, random_seed=42):
|
| 451 |
from datasets import load_dataset
|
|
|
|
| 529 |
outputs=[output_gallery, logging_text]
|
| 530 |
)
|
| 531 |
|
| 532 |
+
with gr.Tab('NCut'):
|
| 533 |
+
gr.Markdown('#### NCut (Legacy), not aligned, no Nyström approximation')
|
| 534 |
gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
|
| 535 |
|
| 536 |
gr.Markdown('---')
|
|
|
|
| 600 |
with gr.Row():
|
| 601 |
with gr.Column(scale=5, min_width=200):
|
| 602 |
input_gallery, submit_button, clear_images_button = make_input_images_section()
|
| 603 |
+
load_images_button, example_gallery, hide_button = make_example_images_section()
|
| 604 |
+
load_images_button.click(lambda x: default_images, outputs=[input_gallery])
|
| 605 |
+
example_gallery.visible = False
|
| 606 |
+
hide_button.visible = False
|
| 607 |
dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
|
| 608 |
num_images_slider.value = 100
|
| 609 |
dataset_dropdown.value = 'nielsr/CelebA-faces'
|
|
|
|
| 666 |
)
|
| 667 |
|
| 668 |
|
| 669 |
+
with gr.Tab('Video'):
|
| 670 |
with gr.Row():
|
| 671 |
with gr.Column(scale=5, min_width=200):
|
| 672 |
+
video_input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
|
| 673 |
# load_video_button = make_example_video_section()
|
| 674 |
with gr.Column(scale=5, min_width=200):
|
| 675 |
+
video_output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
|
|
|
|
| 676 |
[
|
| 677 |
model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
|
| 678 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
|
|
|
| 687 |
# logging text box
|
| 688 |
logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
| 689 |
load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
|
| 690 |
+
clear_images_button.click(lambda x: (None, []), outputs=[video_input_gallery, video_output_gallery])
|
|
|
|
| 691 |
place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
|
| 692 |
place_holder_false.visible = False
|
| 693 |
submit_button.click(
|
| 694 |
run_fn,
|
| 695 |
inputs=[
|
| 696 |
+
video_input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
|
| 697 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
| 698 |
embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
|
| 699 |
perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
|
| 700 |
place_holder_false, max_frame_number
|
| 701 |
],
|
| 702 |
+
outputs=[video_output_gallery, logging_text]
|
| 703 |
)
|
| 704 |
|
| 705 |
+
with gr.Tab('Text'):
|
| 706 |
gr.Markdown('=== under construction ===')
|
| 707 |
gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
|
| 708 |
gr.Markdown('---')
|
| 709 |
gr.Markdown('')
|
| 710 |
+
|
| 711 |
+
with gr.Tab('Compare'):
|
| 712 |
+
|
| 713 |
+
with gr.Row():
|
| 714 |
+
with gr.Column(scale=5, min_width=200):
|
| 715 |
+
input_gallery, submit_button, clear_images_button = make_input_images_section()
|
| 716 |
+
submit_button.visible = False
|
| 717 |
+
load_images_button, example_gallery, hide_button = make_example_images_section()
|
| 718 |
+
example_gallery.visible = False
|
| 719 |
+
hide_button.visible = False
|
| 720 |
+
dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
|
| 721 |
+
load_images_button.click(lambda x: default_images, outputs=input_gallery)
|
| 722 |
+
|
| 723 |
+
with gr.Column(scale=5, min_width=200):
|
| 724 |
+
gr.Markdown('### Output Model1')
|
| 725 |
+
output_gallery1 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut1", columns=[3], rows=[1], object_fit="contain", height="auto")
|
| 726 |
+
submit_button1 = gr.Button("🔴RUN", elem_id="submit_button1")
|
| 727 |
+
[
|
| 728 |
+
model_dropdown1, layer_slider1, node_type_dropdown1, num_eig_slider1,
|
| 729 |
+
affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
|
| 730 |
+
embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
|
| 731 |
+
perplexity_slider1, n_neighbors_slider1, min_dist_slider1,
|
| 732 |
+
sampling_method_dropdown1
|
| 733 |
+
] = make_parameters_section()
|
| 734 |
+
model_dropdown1.value = 'DiNO(dinov2_vitb14_reg)'
|
| 735 |
+
layer_slider1.value = 11
|
| 736 |
+
node_type_dropdown1.value = 'block: sum of residual'
|
| 737 |
+
# logging text box
|
| 738 |
+
logging_text1 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
| 739 |
+
|
| 740 |
+
with gr.Column(scale=5, min_width=200):
|
| 741 |
+
gr.Markdown('### Output Model2')
|
| 742 |
+
output_gallery2 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut2", columns=[3], rows=[1], object_fit="contain", height="auto")
|
| 743 |
+
submit_button2 = gr.Button("🔴RUN", elem_id="submit_button2")
|
| 744 |
+
[
|
| 745 |
+
model_dropdown2, layer_slider2, node_type_dropdown2, num_eig_slider2,
|
| 746 |
+
affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
|
| 747 |
+
embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
|
| 748 |
+
perplexity_slider2, n_neighbors_slider2, min_dist_slider2,
|
| 749 |
+
sampling_method_dropdown2
|
| 750 |
+
] = make_parameters_section()
|
| 751 |
+
model_dropdown2.value = 'DiNO(dinov2_vitb14_reg)'
|
| 752 |
+
layer_slider2.value = 9
|
| 753 |
+
node_type_dropdown2.value = 'attn: attention output'
|
| 754 |
+
# logging text box
|
| 755 |
+
logging_text2 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
| 756 |
+
|
| 757 |
+
clear_images_button.click(lambda x: ([], [], []), outputs=[input_gallery, output_gallery1, output_gallery2])
|
| 758 |
+
submit_button1.click(
|
| 759 |
+
run_fn,
|
| 760 |
+
inputs=[
|
| 761 |
+
input_gallery, model_dropdown1, layer_slider1, num_eig_slider1, node_type_dropdown1,
|
| 762 |
+
affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
|
| 763 |
+
embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
|
| 764 |
+
perplexity_slider1, n_neighbors_slider1, min_dist_slider1, sampling_method_dropdown1
|
| 765 |
+
],
|
| 766 |
+
outputs=[output_gallery1, logging_text1]
|
| 767 |
+
)
|
| 768 |
+
|
| 769 |
+
submit_button2.click(
|
| 770 |
+
run_fn,
|
| 771 |
+
inputs=[
|
| 772 |
+
input_gallery, model_dropdown2, layer_slider2, num_eig_slider2, node_type_dropdown2,
|
| 773 |
+
affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
|
| 774 |
+
embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
|
| 775 |
+
perplexity_slider2, n_neighbors_slider2, min_dist_slider2, sampling_method_dropdown2
|
| 776 |
+
],
|
| 777 |
+
outputs=[output_gallery2, logging_text2]
|
| 778 |
+
)
|
| 779 |
+
|
| 780 |
|
| 781 |
demo.launch(share=True)
|
| 782 |
|