Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -114,8 +114,14 @@ def create_video(frames, fps):
|
|
| 114 |
return 'movie.mp4'
|
| 115 |
|
| 116 |
|
| 117 |
-
def run_grounded_sam(input_image, text_prompt, task_type, background_prompt
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
#global groundingdino_model, sam_predictor, generator
|
| 120 |
|
| 121 |
# make dir
|
|
@@ -276,9 +282,9 @@ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, bac
|
|
| 276 |
green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
|
| 277 |
green_img = np.uint8(green_img)
|
| 278 |
#return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
|
| 279 |
-
return alpha_rgb
|
| 280 |
|
| 281 |
-
def infer(video_in, trim_value, prompt, background_prompt
|
| 282 |
print(prompt)
|
| 283 |
break_vid = get_frames(video_in)
|
| 284 |
|
|
@@ -299,11 +305,10 @@ def infer(video_in, trim_value, prompt, background_prompt, background_type, box_
|
|
| 299 |
# Convert the image to a NumPy array
|
| 300 |
image_array = np.array(to_numpy_i)
|
| 301 |
|
| 302 |
-
|
| 303 |
matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
|
| 304 |
-
|
| 305 |
-
matte_img
|
| 306 |
-
|
| 307 |
|
| 308 |
# exporting the image
|
| 309 |
matte_img.save(f"result_img-{i}.jpg")
|
|
@@ -360,26 +365,26 @@ if __name__ == "__main__":
|
|
| 360 |
#task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
|
| 361 |
#task_type = "text"
|
| 362 |
text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
|
| 363 |
-
background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
|
| 364 |
background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
|
| 365 |
|
| 366 |
run_button = gr.Button(label="Run")
|
| 367 |
-
with gr.Accordion("Advanced options", open=False):
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
|
| 384 |
with gr.Column():
|
| 385 |
#gallery = gr.Gallery(
|
|
@@ -388,7 +393,7 @@ if __name__ == "__main__":
|
|
| 388 |
video_out = gr.Video()
|
| 389 |
|
| 390 |
run_button.click(fn=infer, inputs=[
|
| 391 |
-
video_in, trim_in, text_prompt, background_prompt
|
| 392 |
|
| 393 |
block.launch(debug=args.debug, share=args.share, show_error=True)
|
| 394 |
#block.queue(concurrency_count=100)
|
|
|
|
| 114 |
return 'movie.mp4'
|
| 115 |
|
| 116 |
|
| 117 |
+
def run_grounded_sam(input_image, text_prompt, task_type, background_prompt):
|
| 118 |
+
background_type = "generated_by_text"
|
| 119 |
+
box_threshold = 0.25
|
| 120 |
+
text_threshold = 0.25
|
| 121 |
+
iou_threshold = 0.5
|
| 122 |
+
scribble_mode = "split"
|
| 123 |
+
guidance_mode = "alpha"
|
| 124 |
+
|
| 125 |
#global groundingdino_model, sam_predictor, generator
|
| 126 |
|
| 127 |
# make dir
|
|
|
|
| 282 |
green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
|
| 283 |
green_img = np.uint8(green_img)
|
| 284 |
#return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
|
| 285 |
+
return com_img, alpha_rgb
|
| 286 |
|
| 287 |
+
def infer(video_in, trim_value, prompt, background_prompt):
|
| 288 |
print(prompt)
|
| 289 |
break_vid = get_frames(video_in)
|
| 290 |
|
|
|
|
| 305 |
# Convert the image to a NumPy array
|
| 306 |
image_array = np.array(to_numpy_i)
|
| 307 |
|
|
|
|
| 308 |
matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
|
| 309 |
+
|
| 310 |
+
matte_img= Image.fromarray(matte_img[1])
|
| 311 |
+
|
| 312 |
|
| 313 |
# exporting the image
|
| 314 |
matte_img.save(f"result_img-{i}.jpg")
|
|
|
|
| 365 |
#task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
|
| 366 |
#task_type = "text"
|
| 367 |
text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
|
| 368 |
+
#background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
|
| 369 |
background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
|
| 370 |
|
| 371 |
run_button = gr.Button(label="Run")
|
| 372 |
+
#with gr.Accordion("Advanced options", open=False):
|
| 373 |
+
# box_threshold = gr.Slider(
|
| 374 |
+
# label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
|
| 375 |
+
# )
|
| 376 |
+
# text_threshold = gr.Slider(
|
| 377 |
+
# label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
|
| 378 |
+
# )
|
| 379 |
+
# iou_threshold = gr.Slider(
|
| 380 |
+
# label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05
|
| 381 |
+
# )
|
| 382 |
+
# scribble_mode = gr.Dropdown(
|
| 383 |
+
# ["merge", "split"], value="split", label="scribble_mode"
|
| 384 |
+
# )
|
| 385 |
+
# guidance_mode = gr.Dropdown(
|
| 386 |
+
# ["mask", "alpha"], value="alpha", label="guidance_mode", info="mask guidance is for complex scenes with multiple instances, alpha guidance is for simple scene with single instance"
|
| 387 |
+
# )
|
| 388 |
|
| 389 |
with gr.Column():
|
| 390 |
#gallery = gr.Gallery(
|
|
|
|
| 393 |
video_out = gr.Video()
|
| 394 |
|
| 395 |
run_button.click(fn=infer, inputs=[
|
| 396 |
+
video_in, trim_in, text_prompt, background_prompt], outputs=video_out)
|
| 397 |
|
| 398 |
block.launch(debug=args.debug, share=args.share, show_error=True)
|
| 399 |
#block.queue(concurrency_count=100)
|