Spaces:
Runtime error
Runtime error
commit
Browse files- app.py +65 -54
- assets/apple.jpg +3 -0
- assets/board.jpg +3 -0
- assets/car.jpg +3 -0
- assets/cartoon_cat.png +3 -0
- assets/cartoon_girl.jpeg +3 -0
- assets/cat.jpg +3 -0
- assets/lion.jpg +3 -0
- assets/room.jpg +3 -0
- assets/room2.jpg +3 -0
- assets/sheep.jpg +3 -0
- assets/woman.jpg +3 -0
- assets/woman2.jpg +3 -0
- assets/woman3.jpg +3 -0
app.py
CHANGED
|
@@ -10,6 +10,7 @@ from PIL import Image
|
|
| 10 |
|
| 11 |
import cv2
|
| 12 |
import numpy as np
|
|
|
|
| 13 |
|
| 14 |
class Examples(gr.helpers.Examples):
|
| 15 |
def __init__(self, *args, cached_folder=None, **kwargs):
|
|
@@ -41,7 +42,7 @@ def postprocess(output, prompt):
|
|
| 41 |
|
| 42 |
# user click the image to get points, and show the points on the image
|
| 43 |
def get_point(img, sel_pix, evt: gr.SelectData):
|
| 44 |
-
print(sel_pix)
|
| 45 |
if len(sel_pix) < 5:
|
| 46 |
sel_pix.append((evt.index, 1)) # default foreground_point
|
| 47 |
img = cv2.imread(img)
|
|
@@ -55,6 +56,18 @@ def get_point(img, sel_pix, evt: gr.SelectData):
|
|
| 55 |
print(sel_pix)
|
| 56 |
return img, sel_pix
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# undo the selected point
|
| 60 |
def undo_points(orig_img, sel_pix):
|
|
@@ -92,22 +105,6 @@ map_prompt = {
|
|
| 92 |
'semantic segmentation': '[[image2semantic]]',
|
| 93 |
}
|
| 94 |
|
| 95 |
-
def download_additional_params(model_name, filename="add_params.bin"):
|
| 96 |
-
# 下载文件并返回文件路径
|
| 97 |
-
file_path = hf_hub_download(repo_id=model_name, filename=filename, use_auth_token=HF_TOKEN)
|
| 98 |
-
return file_path
|
| 99 |
-
|
| 100 |
-
# 加载 additional_params.bin 文件
|
| 101 |
-
def load_additional_params(model_name):
|
| 102 |
-
# 下载 additional_params.bin
|
| 103 |
-
params_path = download_additional_params(model_name)
|
| 104 |
-
|
| 105 |
-
# 使用 torch.load() 加载文件内容
|
| 106 |
-
additional_params = torch.load(params_path, map_location='cpu')
|
| 107 |
-
|
| 108 |
-
# 返回加载的参数内容
|
| 109 |
-
return additional_params
|
| 110 |
-
|
| 111 |
def process_image_check(path_input, prompt, sel_points, semantic):
|
| 112 |
if path_input is None:
|
| 113 |
raise gr.Error(
|
|
@@ -119,30 +116,9 @@ def process_image_check(path_input, prompt, sel_points, semantic):
|
|
| 119 |
)
|
| 120 |
|
| 121 |
|
| 122 |
-
|
| 123 |
-
def process_image_4(image_path, prompt):
|
| 124 |
-
|
| 125 |
-
inputs = []
|
| 126 |
-
for p in prompt:
|
| 127 |
-
cur_p = map_prompt[p]
|
| 128 |
-
|
| 129 |
-
coor_point = []
|
| 130 |
-
point_labels = []
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
cur_input = {
|
| 134 |
-
# 'original_size': [[w,h]],
|
| 135 |
-
# 'target_size': [[768, 768]],
|
| 136 |
-
'prompt': [cur_p],
|
| 137 |
-
'coor_point': coor_point,
|
| 138 |
-
'point_labels': point_labels,
|
| 139 |
-
}
|
| 140 |
-
inputs.append(cur_input)
|
| 141 |
-
|
| 142 |
-
return inputs
|
| 143 |
-
|
| 144 |
-
|
| 145 |
def inf(image_path, prompt, sel_points, semantic):
|
|
|
|
|
|
|
| 146 |
print('=========== PROCESS IMAGE CHECK ===========')
|
| 147 |
print(f"Image Path: {image_path}")
|
| 148 |
print(f"Prompt: {prompt}")
|
|
@@ -191,6 +167,9 @@ def inf(image_path, prompt, sel_points, semantic):
|
|
| 191 |
def clear_cache():
|
| 192 |
return None, None
|
| 193 |
|
|
|
|
|
|
|
|
|
|
| 194 |
def run_demo_server():
|
| 195 |
options = ['depth', 'normal', 'entity segmentation', 'human pose', 'point segmentation', 'semantic segmentation']
|
| 196 |
gradio_theme = gr.themes.Default()
|
|
@@ -227,6 +206,9 @@ def run_demo_server():
|
|
| 227 |
.md_feedback li {
|
| 228 |
margin-bottom: 0px !important;
|
| 229 |
}
|
|
|
|
|
|
|
|
|
|
| 230 |
""",
|
| 231 |
head="""
|
| 232 |
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
|
|
@@ -258,13 +240,22 @@ def run_demo_server():
|
|
| 258 |
|
| 259 |
"""
|
| 260 |
)
|
|
|
|
| 261 |
|
| 262 |
with gr.Row():
|
| 263 |
-
checkbox_group = gr.CheckboxGroup(choices=options, label="
|
| 264 |
with gr.Row():
|
| 265 |
-
semantic_input = gr.Textbox(label="Category Name
|
| 266 |
with gr.Row():
|
| 267 |
gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
with gr.Row():
|
| 269 |
with gr.Column():
|
| 270 |
input_image = gr.Image(
|
|
@@ -314,7 +305,7 @@ def run_demo_server():
|
|
| 314 |
).success(
|
| 315 |
# fn=process_pipe_matting,
|
| 316 |
fn=inf,
|
| 317 |
-
inputs=[
|
| 318 |
outputs=[matting_image_output],
|
| 319 |
concurrency_limit=1,
|
| 320 |
)
|
|
@@ -346,7 +337,7 @@ def run_demo_server():
|
|
| 346 |
|
| 347 |
input_image.select(
|
| 348 |
get_point,
|
| 349 |
-
[
|
| 350 |
[input_image, selected_points],
|
| 351 |
)
|
| 352 |
|
|
@@ -356,16 +347,36 @@ def run_demo_server():
|
|
| 356 |
[input_image, selected_points]
|
| 357 |
)
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
# )
|
| 370 |
|
| 371 |
demo.queue(
|
|
|
|
| 10 |
|
| 11 |
import cv2
|
| 12 |
import numpy as np
|
| 13 |
+
import ast
|
| 14 |
|
| 15 |
class Examples(gr.helpers.Examples):
|
| 16 |
def __init__(self, *args, cached_folder=None, **kwargs):
|
|
|
|
| 42 |
|
| 43 |
# user click the image to get points, and show the points on the image
|
| 44 |
def get_point(img, sel_pix, evt: gr.SelectData):
|
| 45 |
+
# print(img, sel_pix)
|
| 46 |
if len(sel_pix) < 5:
|
| 47 |
sel_pix.append((evt.index, 1)) # default foreground_point
|
| 48 |
img = cv2.imread(img)
|
|
|
|
| 56 |
print(sel_pix)
|
| 57 |
return img, sel_pix
|
| 58 |
|
| 59 |
+
def set_point(img, checkbox_group, sel_pix, semantic_input):
|
| 60 |
+
ori_img = img
|
| 61 |
+
# print(img, checkbox_group, sel_pix, semantic_input)
|
| 62 |
+
sel_pix = ast.literal_eval(sel_pix)
|
| 63 |
+
img = cv2.imread(img)
|
| 64 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 65 |
+
if len(sel_pix) <= 5 and len(sel_pix) > 0:
|
| 66 |
+
for point, label in sel_pix:
|
| 67 |
+
cv2.drawMarker(img, point, colors[label], markerType=markers[label], markerSize=20, thickness=5)
|
| 68 |
+
|
| 69 |
+
return ori_img, img, sel_pix
|
| 70 |
+
|
| 71 |
|
| 72 |
# undo the selected point
|
| 73 |
def undo_points(orig_img, sel_pix):
|
|
|
|
| 105 |
'semantic segmentation': '[[image2semantic]]',
|
| 106 |
}
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def process_image_check(path_input, prompt, sel_points, semantic):
|
| 109 |
if path_input is None:
|
| 110 |
raise gr.Error(
|
|
|
|
| 116 |
)
|
| 117 |
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
def inf(image_path, prompt, sel_points, semantic):
|
| 120 |
+
if isinstance(sel_points, str):
|
| 121 |
+
sel_points = ast.literal_eval(selected_points)
|
| 122 |
print('=========== PROCESS IMAGE CHECK ===========')
|
| 123 |
print(f"Image Path: {image_path}")
|
| 124 |
print(f"Prompt: {prompt}")
|
|
|
|
| 167 |
def clear_cache():
|
| 168 |
return None, None
|
| 169 |
|
| 170 |
+
def dummy():
|
| 171 |
+
pass
|
| 172 |
+
|
| 173 |
def run_demo_server():
|
| 174 |
options = ['depth', 'normal', 'entity segmentation', 'human pose', 'point segmentation', 'semantic segmentation']
|
| 175 |
gradio_theme = gr.themes.Default()
|
|
|
|
| 206 |
.md_feedback li {
|
| 207 |
margin-bottom: 0px !important;
|
| 208 |
}
|
| 209 |
+
.hideme {
|
| 210 |
+
display: none;
|
| 211 |
+
}
|
| 212 |
""",
|
| 213 |
head="""
|
| 214 |
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
|
|
|
|
| 240 |
|
| 241 |
"""
|
| 242 |
)
|
| 243 |
+
selected_points_tmp = gr.Textbox(label="Points", elem_classes="hideme")
|
| 244 |
|
| 245 |
with gr.Row():
|
| 246 |
+
checkbox_group = gr.CheckboxGroup(choices=options, label="Task")
|
| 247 |
with gr.Row():
|
| 248 |
+
semantic_input = gr.Textbox(label="Category Name", placeholder="e.g. person/cat/dog/elephant...... (for semantic segmentation only, in COCO)")
|
| 249 |
with gr.Row():
|
| 250 |
gr.Markdown('For non-human image inputs, the pose results may have issues. Same when perform semantic segmentation with categories that are not in COCO.')
|
| 251 |
+
with gr.Row():
|
| 252 |
+
gr.Markdown('The results of semantic segmentation may be unstable because:')
|
| 253 |
+
with gr.Row():
|
| 254 |
+
gr.Markdown('1. We only trained on COCO, whose quality and quantity are insufficient to meet the requirements.')
|
| 255 |
+
with gr.Row():
|
| 256 |
+
gr.Markdown('2. Semantic segmentation is more complex than other tasks, as it requires accurately learning the relationship between semantics and objects.')
|
| 257 |
+
with gr.Row():
|
| 258 |
+
gr.Markdown('However, we are still able to produce some high-quality semantic segmentation results, strongly demonstrating the potential of our approach.')
|
| 259 |
with gr.Row():
|
| 260 |
with gr.Column():
|
| 261 |
input_image = gr.Image(
|
|
|
|
| 305 |
).success(
|
| 306 |
# fn=process_pipe_matting,
|
| 307 |
fn=inf,
|
| 308 |
+
inputs=[original_image, checkbox_group, selected_points, semantic_input],
|
| 309 |
outputs=[matting_image_output],
|
| 310 |
concurrency_limit=1,
|
| 311 |
)
|
|
|
|
| 337 |
|
| 338 |
input_image.select(
|
| 339 |
get_point,
|
| 340 |
+
[original_image, selected_points],
|
| 341 |
[input_image, selected_points],
|
| 342 |
)
|
| 343 |
|
|
|
|
| 347 |
[input_image, selected_points]
|
| 348 |
)
|
| 349 |
|
| 350 |
+
examples = gr.Examples(
|
| 351 |
+
fn=set_point,
|
| 352 |
+
run_on_click=True,
|
| 353 |
+
examples=[
|
| 354 |
+
["assets/woman.jpg", ['point segmentation', 'depth', 'normal', 'entity segmentation', 'human pose', 'semantic segmentation'], '[([2744, 975], 1), ([3440, 1954], 1), ([2123, 2405], 1), ([838, 1678], 1), ([4688, 1922], 1)]', 'person'],
|
| 355 |
+
["assets/woman2.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'human pose'], '[([687, 1416], 1), ([1021, 707], 1), ([1138, 1138], 1), ([1182, 1583], 1), ([1188, 2172], 1)]', 'person'],
|
| 356 |
+
["assets/board.jpg", ['point segmentation', 'depth', 'entity segmentation', 'normal'], '[([1003, 2163], 1)]', ''],
|
| 357 |
+
["assets/lion.jpg", ['point segmentation', 'depth', 'semantic segmentation'], '[([1287, 671], 1)]', 'lion'],
|
| 358 |
+
["assets/apple.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1287, 671], 1)]', 'apple'],
|
| 359 |
+
["assets/room.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1308, 2215], 1)]', 'chair'],
|
| 360 |
+
["assets/car.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation'], '[([1276, 1369], 1)]', 'car'],
|
| 361 |
+
["assets/person.jpg", ['point segmentation', 'depth', 'semantic segmentation', 'normal', 'entity segmentation', 'human pose'], '[([3253, 1459], 1)]', 'tie'],
|
| 362 |
+
["assets/woman3.jpg", ['point segmentation', 'depth', 'entity segmentation'], '[([420, 692], 1)]', ''],
|
| 363 |
+
["assets/cat.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation'], '[([756, 661], 1)]', 'cat'],
|
| 364 |
+
["assets/room2.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'normal'], '[([3946, 224], 1)]', 'laptop'],
|
| 365 |
+
["assets/cartoon_cat.png", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation', 'normal'], '[([1478, 3048], 1)]', 'cat'],
|
| 366 |
+
["assets/sheep.jpg", ['point segmentation', 'depth', 'entity segmentation', 'semantic segmentation'], '[([1789, 1791], 1), ([1869, 1333], 1)]', 'sheep'],
|
| 367 |
+
["assets/cartoon_girl.jpeg", ['point segmentation', 'depth', 'entity segmentation', 'normal', 'human pose', 'semantic segmentation'], '[([1208, 2089], 1), ([635, 2731], 1), ([1070, 2888], 1), ([1493, 2350], 1)]', 'person'],
|
| 368 |
+
],
|
| 369 |
+
inputs=[input_image, checkbox_group, selected_points_tmp, semantic_input],
|
| 370 |
+
outputs=[original_image, input_image, selected_points],
|
| 371 |
+
cache_examples=False,
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
# examples.dataset.click(
|
| 375 |
+
# fn=dummy
|
| 376 |
+
# ).success(
|
| 377 |
+
# fn=set_point, # Now run the actual function after inputs are populated
|
| 378 |
+
# inputs=[input_image, checkbox_group, selected_points_tmp, semantic_input],
|
| 379 |
+
# outputs=[input_image, selected_points]
|
| 380 |
# )
|
| 381 |
|
| 382 |
demo.queue(
|
assets/apple.jpg
ADDED
|
Git LFS Details
|
assets/board.jpg
ADDED
|
Git LFS Details
|
assets/car.jpg
ADDED
|
Git LFS Details
|
assets/cartoon_cat.png
ADDED
|
Git LFS Details
|
assets/cartoon_girl.jpeg
ADDED
|
Git LFS Details
|
assets/cat.jpg
ADDED
|
Git LFS Details
|
assets/lion.jpg
ADDED
|
Git LFS Details
|
assets/room.jpg
ADDED
|
Git LFS Details
|
assets/room2.jpg
ADDED
|
Git LFS Details
|
assets/sheep.jpg
ADDED
|
Git LFS Details
|
assets/woman.jpg
ADDED
|
Git LFS Details
|
assets/woman2.jpg
ADDED
|
Git LFS Details
|
assets/woman3.jpg
ADDED
|
Git LFS Details
|