Spaces:

Canyu
/

Diception-Demo

Runtime error

App Files Files Community

Canyu commited on Feb 22, 2025

Commit

5b9d0ae

1 Parent(s): d62a6d7

commit

Browse files

Files changed (14) hide show

app.py +65 -54
assets/apple.jpg +3 -0
assets/board.jpg +3 -0
assets/car.jpg +3 -0
assets/cartoon_cat.png +3 -0
assets/cartoon_girl.jpeg +3 -0
assets/cat.jpg +3 -0
assets/lion.jpg +3 -0
assets/room.jpg +3 -0
assets/room2.jpg +3 -0
assets/sheep.jpg +3 -0
assets/woman.jpg +3 -0
assets/woman2.jpg +3 -0
assets/woman3.jpg +3 -0

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from PIL import Image
 import cv2
 import numpy as np
 class Examples(gr.helpers.Examples):
     def __init__(self, *args, cached_folder=None, **kwargs):
@@ -41,7 +42,7 @@ def postprocess(output, prompt):
 # user click the image to get points, and show the points on the image
 def get_point(img, sel_pix, evt: gr.SelectData):
-    print(sel_pix)
     if len(sel_pix) < 5:
         sel_pix.append((evt.index, 1))    # default foreground_point
     img = cv2.imread(img)
@@ -55,6 +56,18 @@ def get_point(img, sel_pix, evt: gr.SelectData):
     print(sel_pix)
     return img, sel_pix
 # undo the selected point
 def undo_points(orig_img, sel_pix):
@@ -92,22 +105,6 @@ map_prompt = {
     'semantic segmentation': '[[image2semantic]]',
 }
-def download_additional_params(model_name, filename="add_params.bin"):
-    # 下载文件并返回文件路径
-    file_path = hf_hub_download(repo_id=model_name, filename=filename, use_auth_token=HF_TOKEN)
-    return file_path
-# 加载 additional_params.bin 文件
-def load_additional_params(model_name):
-    # 下载 additional_params.bin
-    params_path = download_additional_params(model_name)
-    # 使用 torch.load() 加载文件内容
-    additional_params = torch.load(params_path, map_location='cpu')
-    # 返回加载的参数内容
-    return additional_params
 def process_image_check(path_input, prompt, sel_points, semantic):
     if path_input is None:
         raise gr.Error(
@@ -119,30 +116,9 @@ def process_image_check(path_input, prompt, sel_points, semantic):
         )
-def process_image_4(image_path, prompt):
-    inputs = []
-    for p in prompt:
-        cur_p = map_prompt[p]
-        coor_point = []
-        point_labels = []
-        cur_input = {
-                # 'original_size': [[w,h]],
-                # 'target_size': [[768, 768]],
-                'prompt': [cur_p],
-                'coor_point': coor_point,
-                'point_labels': point_labels,
-            }
-        inputs.append(cur_input)
-    return inputs
 def inf(image_path, prompt, sel_points, semantic):
     print('=========== PROCESS IMAGE CHECK ===========')
     print(f"Image Path: {image_path}")
     print(f"Prompt: {prompt}")
@@ -191,6 +167,9 @@ def inf(image_path, prompt, sel_points, semantic):
 def clear_cache():
     return None, None
 def run_demo_server():
     options = ['depth', 'normal', 'entity segmentation', 'human pose', 'point segmentation', 'semantic segmentation']
     gradio_theme = gr.themes.Default()
@@ -227,6 +206,9 @@ def run_demo_server():
             .md_feedback li {
                 margin-bottom: 0px !important;
             }
         """,
         head="""
             <script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
@@ -258,13 +240,22 @@ def run_demo_server():
         """
         )
         with gr.Row():
-            checkbox_group = gr.CheckboxGroup(choices=options, label="Select options:")
         with gr.Row():
-            semantic_input = gr.Textbox(label="Category Name (for semantic segmentation only, in COCO)", placeholder="e.g. person/cat/dog/elephant......")
         with gr.Row():
             gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(
@@ -314,7 +305,7 @@ def run_demo_server():
         ).success(
             # fn=process_pipe_matting,
             fn=inf,
-            inputs=[input_image, checkbox_group, selected_points, semantic_input],
             outputs=[matting_image_output],
             concurrency_limit=1,
         )
@@ -346,7 +337,7 @@ def run_demo_server():
         input_image.select(
             get_point,
-            [input_image, selected_points],
             [input_image, selected_points],
         )
@@ -356,16 +347,36 @@ def run_demo_server():
             [input_image, selected_points]
         )
-        # gr.Examples(
-        #     fn=inf,
-        #     examples=[
-        #         ["assets/person.jpg", ['depth', 'normal', 'entity segmentation', 'pose']]
-        #     ],
-        #     inputs=[input_image, checkbox_group],
-        #     outputs=[matting_image_output],
-        #     cache_examples=True,
-        #     # cache_examples=False,
-        #     # cached_folder="cache_dir",
         # )
     demo.queue(

 import cv2
 import numpy as np
+import ast
 class Examples(gr.helpers.Examples):
     def __init__(self, *args, cached_folder=None, **kwargs):
 # user click the image to get points, and show the points on the image
 def get_point(img, sel_pix, evt: gr.SelectData):
+    # print(img, sel_pix)
     if len(sel_pix) < 5:
         sel_pix.append((evt.index, 1))    # default foreground_point
     img = cv2.imread(img)
     print(sel_pix)
     return img, sel_pix
+def set_point(img, checkbox_group, sel_pix, semantic_input):
+    ori_img = img
+    # print(img, checkbox_group, sel_pix, semantic_input)
+    sel_pix = ast.literal_eval(sel_pix)
+    img = cv2.imread(img)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    if len(sel_pix) <= 5 and len(sel_pix) > 0:
+        for point, label in sel_pix:
+            cv2.drawMarker(img, point, colors[label], markerType=markers[label], markerSize=20, thickness=5)
+    return ori_img, img, sel_pix
 # undo the selected point
 def undo_points(orig_img, sel_pix):
     'semantic segmentation': '[[image2semantic]]',
 }
 def process_image_check(path_input, prompt, sel_points, semantic):
     if path_input is None:
         raise gr.Error(
         )
 def inf(image_path, prompt, sel_points, semantic):
+    if isinstance(sel_points, str):
+        sel_points = ast.literal_eval(selected_points)
     print('=========== PROCESS IMAGE CHECK ===========')
     print(f"Image Path: {image_path}")
     print(f"Prompt: {prompt}")
 def clear_cache():
     return None, None
+def dummy():
+    pass
 def run_demo_server():
     options = ['depth', 'normal', 'entity segmentation', 'human pose', 'point segmentation', 'semantic segmentation']
     gradio_theme = gr.themes.Default()
             .md_feedback li {
                 margin-bottom: 0px !important;
             }
+            .hideme {
+                display: none;
+            }
         """,
         head="""
             <script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
         """
         )
+        selected_points_tmp = gr.Textbox(label="Points", elem_classes="hideme")
         with gr.Row():
+            checkbox_group = gr.CheckboxGroup(choices=options, label="Task")
         with gr.Row():
+            semantic_input = gr.Textbox(label="Category Name", placeholder="e.g. person/cat/dog/elephant......  (for semantic segmentation only, in COCO)")
         with gr.Row():
             gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
+        with gr.Row():
+            gr.Markdown('The results of semantic segmentation may be unstable because:')
+        with gr.Row():
+            gr.Markdown('1. We only trained on COCO, whose quality and quantity are insufficient to meet the requirements.')
+        with gr.Row():
+            gr.Markdown('2. Semantic segmentation is more complex than other tasks, as it requires accurately learning the relationship between semantics and objects.')
+        with gr.Row():
+            gr.Markdown('However, we are still able to produce some high-quality semantic segmentation results, strongly demonstrating the potential of our approach.')
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(
         ).success(
             # fn=process_pipe_matting,
             fn=inf,
+            inputs=[original_image, checkbox_group, selected_points, semantic_input],
             outputs=[matting_image_output],
             concurrency_limit=1,
         )
         input_image.select(
             get_point,
+            [original_image, selected_points],
             [input_image, selected_points],
         )
             [input_image, selected_points]
         )
+        examples = gr.Examples(
+            fn=set_point,
+	        run_on_click=True,
+            examples=[
+                ["assets/woman.jpg", ['point segmentation', 'depth', 'normal', 'entity segmentation', 'human pose', 'semantic segmentation'], '[([2744, 975], 1), ([3440, 1954], 1), ([2123, 2405], 1), ([838, 1678], 1), ([4688, 1922], 1)]', 'person'],
+                ["assets/woman2.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'human pose'], '[([687, 1416], 1), ([1021, 707], 1), ([1138, 1138], 1), ([1182, 1583], 1), ([1188, 2172], 1)]', 'person'],
+                ["assets/board.jpg", ['point segmentation', 'depth', 'entity segmentation', 'normal'], '[([1003, 2163], 1)]', ''],
+                ["assets/lion.jpg", ['point segmentation', 'depth', 'semantic segmentation'], '[([1287, 671], 1)]', 'lion'],
+                ["assets/apple.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1287, 671], 1)]', 'apple'],
+                ["assets/room.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1308, 2215], 1)]', 'chair'],
+                ["assets/car.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1276, 1369], 1)]', 'car'],
+                ["assets/person.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation', 'human pose'], '[([3253, 1459], 1)]', 'tie'],
+                ["assets/woman3.jpg", ['point segmentation', 'depth', 'entity segmentation'], '[([420, 692], 1)]', ''],
+                ["assets/cat.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation'], '[([756, 661], 1)]', 'cat'],
+                ["assets/room2.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'normal'], '[([3946, 224], 1)]', 'laptop'],
+                ["assets/cartoon_cat.png", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'normal'], '[([1478, 3048], 1)]', 'cat'],
+                ["assets/sheep.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation'], '[([1789, 1791], 1), ([1869, 1333], 1)]', 'sheep'],
+                ["assets/cartoon_girl.jpeg", ['point segmentation', 'depth', 'entity segmentation', 'normal', 'human pose', 'semantic segmentation'], '[([1208, 2089], 1), ([635, 2731], 1), ([1070, 2888], 1), ([1493, 2350], 1)]', 'person'],
+            ],
+            inputs=[input_image, checkbox_group, selected_points_tmp, semantic_input],
+            outputs=[original_image, input_image, selected_points],
+            cache_examples=False,
+        )
+        # examples.dataset.click(
+        #     fn=dummy
+        # ).success(
+        #     fn=set_point,  # Now run the actual function after inputs are populated
+        #     inputs=[input_image, checkbox_group, selected_points_tmp, semantic_input],
+        #     outputs=[input_image, selected_points]
         # )
     demo.queue(