Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

huzey commited on Aug 27, 2024

Commit

e85b6ae

1 Parent(s): a29b195

update compare

Browse files

Files changed (1) hide show

app.py +94 -17

app.py CHANGED Viewed

@@ -98,7 +98,7 @@ def pil_images_to_video(images, output_path, fps=5):
     # from pil images to numpy
     images = [np.array(image) for image in images]
-    print("Saving video to", output_path)
     import cv2
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     height, width, _ = images[0].shape
@@ -338,7 +338,7 @@ def run_fn(
     recursion_l3_n_eigs=20,
     recursion_metric="euclidean",
 ):
-    print("Running...")
     if images is None:
         gr.Warning("No images selected.")
         return [], "No images selected."
@@ -373,10 +373,12 @@ def run_fn(
         "recursion_metric": recursion_metric,
         "video_output": video_output,
     }
-    print(kwargs)
     num_images = len(images)
     if num_images > 100:
         return super_duper_long_run(images, **kwargs)
     if num_images > 50:
         return longer_run(images, **kwargs)
     if old_school_ncut:
@@ -406,7 +408,9 @@ def make_input_images_section():
 def make_input_video_section():
     gr.Markdown('### Input Video')
     input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
-    max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
     submit_button = gr.Button("🔴RUN", elem_id="submit_button")
     clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
     return input_gallery, submit_button, clear_images_button, max_frames_number
@@ -428,7 +432,7 @@ def make_example_video_section():
     return load_video_button
 def make_dataset_images_section():
-    with gr.Accordion("➡️ Load from dataset", open=True):
         dataset_names = [
             'UCSC-VLAA/Recap-COCO-30K',
             'nateraw/pascal-voc-2012',
@@ -440,7 +444,8 @@ def make_dataset_images_section():
         ]
         dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
         num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
-        random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
         load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
     def load_dataset_images(dataset_name, num_images=10, random_seed=42):
         from datasets import load_dataset
@@ -524,8 +529,8 @@ with gr.Blocks() as demo:
             outputs=[output_gallery, logging_text]
         )
-    with gr.Tab('NCut (Legacy)'):
-        gr.Markdown('#### Ncut, not aligned, no Nyström approximation')
         gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
         gr.Markdown('---')
@@ -595,6 +600,10 @@ with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column(scale=5, min_width=200):
                 input_gallery, submit_button, clear_images_button = make_input_images_section()
                 dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
                 num_images_slider.value = 100
                 dataset_dropdown.value = 'nielsr/CelebA-faces'
@@ -657,14 +666,13 @@ with gr.Blocks() as demo:
         )
-    with gr.Tab('AlignedCut (Video)'):
         with gr.Row():
             with gr.Column(scale=5, min_width=200):
-                input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
                 # load_video_button = make_example_video_section()
             with gr.Column(scale=5, min_width=200):
-                output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
-                gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
                 [
                     model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
                     affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
@@ -679,27 +687,96 @@ with gr.Blocks() as demo:
                 # logging text box
                 logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
                 load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
-        # load_video_button.click(lambda x: './images/ego4d_dog.mp4', outputs=[input_gallery])
-        clear_images_button.click(lambda x: (None, []), outputs=[input_gallery, output_gallery])
         place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
         place_holder_false.visible = False
         submit_button.click(
             run_fn,
             inputs=[
-                input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
                 embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
                 perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
                 place_holder_false, max_frame_number
             ],
-            outputs=[output_gallery, logging_text]
         )
-    with gr.Tab('AlignedCut (Text)'):
         gr.Markdown('=== under construction ===')
         gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
         gr.Markdown('---')
         gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
 demo.launch(share=True)

     # from pil images to numpy
     images = [np.array(image) for image in images]
+    # print("Saving video to", output_path)
     import cv2
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     height, width, _ = images[0].shape
     recursion_l3_n_eigs=20,
     recursion_metric="euclidean",
 ):
+    # print("Running...")
     if images is None:
         gr.Warning("No images selected.")
         return [], "No images selected."
         "recursion_metric": recursion_metric,
         "video_output": video_output,
     }
+    # print(kwargs)
     num_images = len(images)
     if num_images > 100:
         return super_duper_long_run(images, **kwargs)
+    if recursion:
+        return longer_run(images, **kwargs)
     if num_images > 50:
         return longer_run(images, **kwargs)
     if old_school_ncut:
 def make_input_video_section():
     gr.Markdown('### Input Video')
     input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
+    gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
+    # max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
+    max_frames_number = gr.Slider(1, 200, step=1, label="Max frames", value=100, elem_id="max_frames")
     submit_button = gr.Button("🔴RUN", elem_id="submit_button")
     clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
     return input_gallery, submit_button, clear_images_button, max_frames_number
     return load_video_button
 def make_dataset_images_section():
+    with gr.Accordion("➡️ Click to expand: Load from dataset", open=False):
         dataset_names = [
             'UCSC-VLAA/Recap-COCO-30K',
             'nateraw/pascal-voc-2012',
         ]
         dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
         num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
+        # random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
+        random_seed_slider = gr.Slider(0, 1000, step=1, label="Random seed", value=1, elem_id="random_seed")
         load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
     def load_dataset_images(dataset_name, num_images=10, random_seed=42):
         from datasets import load_dataset
             outputs=[output_gallery, logging_text]
         )
+    with gr.Tab('NCut'):
+        gr.Markdown('#### NCut (Legacy), not aligned, no Nyström approximation')
         gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
         gr.Markdown('---')
         with gr.Row():
             with gr.Column(scale=5, min_width=200):
                 input_gallery, submit_button, clear_images_button = make_input_images_section()
+                load_images_button, example_gallery, hide_button = make_example_images_section()
+                load_images_button.click(lambda x: default_images, outputs=[input_gallery])
+                example_gallery.visible = False
+                hide_button.visible = False
                 dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
                 num_images_slider.value = 100
                 dataset_dropdown.value = 'nielsr/CelebA-faces'
         )
+    with gr.Tab('Video'):
         with gr.Row():
             with gr.Column(scale=5, min_width=200):
+                video_input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
                 # load_video_button = make_example_video_section()
             with gr.Column(scale=5, min_width=200):
+                video_output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
                 [
                     model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
                     affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
                 # logging text box
                 logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
                 load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
+        clear_images_button.click(lambda x: (None, []), outputs=[video_input_gallery, video_output_gallery])
         place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
         place_holder_false.visible = False
         submit_button.click(
             run_fn,
             inputs=[
+                video_input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
                 embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
                 perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
                 place_holder_false, max_frame_number
             ],
+            outputs=[video_output_gallery, logging_text]
         )
+    with gr.Tab('Text'):
         gr.Markdown('=== under construction ===')
         gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
         gr.Markdown('---')
         gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
+    with gr.Tab('Compare'):
+        with gr.Row():
+            with gr.Column(scale=5, min_width=200):
+                input_gallery, submit_button, clear_images_button = make_input_images_section()
+                submit_button.visible = False
+                load_images_button, example_gallery, hide_button = make_example_images_section()
+                example_gallery.visible = False
+                hide_button.visible = False
+                dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
+                load_images_button.click(lambda x: default_images, outputs=input_gallery)
+            with gr.Column(scale=5, min_width=200):
+                gr.Markdown('### Output Model1')
+                output_gallery1 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut1", columns=[3], rows=[1], object_fit="contain", height="auto")
+                submit_button1 = gr.Button("🔴RUN", elem_id="submit_button1")
+                [
+                    model_dropdown1, layer_slider1, node_type_dropdown1, num_eig_slider1,
+                    affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
+                    embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
+                    perplexity_slider1, n_neighbors_slider1, min_dist_slider1,
+                    sampling_method_dropdown1
+                ] = make_parameters_section()
+                model_dropdown1.value = 'DiNO(dinov2_vitb14_reg)'
+                layer_slider1.value = 11
+                node_type_dropdown1.value = 'block: sum of residual'
+                # logging text box
+                logging_text1 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
+            with gr.Column(scale=5, min_width=200):
+                gr.Markdown('### Output Model2')
+                output_gallery2 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut2", columns=[3], rows=[1], object_fit="contain", height="auto")
+                submit_button2 = gr.Button("🔴RUN", elem_id="submit_button2")
+                [
+                    model_dropdown2, layer_slider2, node_type_dropdown2, num_eig_slider2,
+                    affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
+                    embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
+                    perplexity_slider2, n_neighbors_slider2, min_dist_slider2,
+                    sampling_method_dropdown2
+                ] = make_parameters_section()
+                model_dropdown2.value = 'DiNO(dinov2_vitb14_reg)'
+                layer_slider2.value = 9
+                node_type_dropdown2.value = 'attn: attention output'
+                # logging text box
+                logging_text2 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
+        clear_images_button.click(lambda x: ([], [], []), outputs=[input_gallery, output_gallery1, output_gallery2])
+        submit_button1.click(
+            run_fn,
+            inputs=[
+                input_gallery, model_dropdown1, layer_slider1, num_eig_slider1, node_type_dropdown1,
+                affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
+                embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
+                perplexity_slider1, n_neighbors_slider1, min_dist_slider1, sampling_method_dropdown1
+            ],
+            outputs=[output_gallery1, logging_text1]
+        )
+        submit_button2.click(
+            run_fn,
+            inputs=[
+                input_gallery, model_dropdown2, layer_slider2, num_eig_slider2, node_type_dropdown2,
+                affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
+                embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
+                perplexity_slider2, n_neighbors_slider2, min_dist_slider2, sampling_method_dropdown2
+            ],
+            outputs=[output_gallery2, logging_text2]
+        )
 demo.launch(share=True)