Video-Matting-Anything

Build error

App Files Files Community

fffiloni commited on Jun 11, 2023

Commit

991c663

1 Parent(s): fe09d17

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -26

app.py CHANGED Viewed

@@ -296,7 +296,10 @@ def infer(video_in, trim_value, prompt, background_prompt):
         print("video is shorter than the cut value")
         n_frame = len(frames_list)
-    result_frames = []
     print("set stop frames to: " + str(n_frame))
     for i in frames_list[0:int(n_frame)]:
@@ -305,20 +308,28 @@ def infer(video_in, trim_value, prompt, background_prompt):
         # Convert the image to a NumPy array
         image_array = np.array(to_numpy_i)
-        matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt)
-        matte_img= Image.fromarray(matte_img[1])
-        # exporting the image
-        matte_img.save(f"result_img-{i}.jpg")
-        result_frames.append(f"result_img-{i}.jpg")
         print("frame " + i + "/" + str(n_frame) + ": done;")
-    final_vid = create_video(result_frames, fps)
     print("finished !")
-    return final_vid
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("MAM demo", add_help=True)
@@ -337,24 +348,17 @@ if __name__ == "__main__":
     with block:
         gr.Markdown(
         """
-        # Matting Anything Demo
-        Welcome to the Matting Anything demo and upload your image to get started <br/> You may select different prompt types to get the alpha matte of target instance, and select different backgrounds for image composition.
         ## Usage
-        You may check the <a href='https://www.youtube.com/watch?v=XY2Q0HATGOk'>video</a> to see how to play with the demo, or check the details below.
         <details>
-        You may upload an image to start, we support 3 prompt types to get the alpha matte of the target instance：
-        **scribble_point**: Click an point on the target instance.
-        **scribble_box**: Click on two points, the top-left point and the bottom-right point to represent a bounding box of the target instance.
         **text**: Send text prompt to identify the target instance in the `Text prompt` box.
-        We also support 2 background types to support image composition with the alpha matte output:
-        **real_world_sample**: Randomly select a real-world image from `assets/backgrounds` for composition.
         **generated_by_text**: Send background text prompt to create a background image with stable diffusion model in the `Background prompt` box.
         </details>
         """)
@@ -364,7 +368,7 @@ if __name__ == "__main__":
                 trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1)
                 #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
                 #task_type = "text"
-                text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
                 #background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
                 background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
@@ -390,11 +394,13 @@ if __name__ == "__main__":
                 #gallery = gr.Gallery(
                 #    label="Generated images", show_label=True, elem_id="gallery"
                 #).style(preview=True, grid=3, object_fit="scale-down")
-                video_out = gr.Video()
         run_button.click(fn=infer, inputs=[
-                        video_in, trim_in, text_prompt, background_prompt], outputs=video_out)
-    block.launch(debug=args.debug, share=args.share, show_error=True)
     #block.queue(concurrency_count=100)
     #block.launch(server_name='0.0.0.0', server_port=args.port, debug=args.debug, share=args.share)

         print("video is shorter than the cut value")
         n_frame = len(frames_list)
+    with_bg_result_frames = []
+    with_green_result_frames = []
+    with_matte_result_frames = []
     print("set stop frames to: " + str(n_frame))
     for i in frames_list[0:int(n_frame)]:
         # Convert the image to a NumPy array
         image_array = np.array(to_numpy_i)
+        results = run_grounded_sam(image_array, prompt, "text", background_prompt)
+        bg_img = Image.fromarray(results[0])
+        greem_img = Image.fromarray(results[1])
+        matte_img = Image.fromarray(results[2])
+        # exporting the images
+        bg_img.save(f"bg_result_img-{i}.jpg")
+        with_bg_result_frames.append(f"bg_result_img-{i}.jpg")
+        green_img.save(f"green_result_img-{i}.jpg")
+        with_green_result_frames.append(f"green_result_img-{i}.jpg")
+        matte_img.save(f"matte_result_img-{i}.jpg")
+        with_matte_result_frames.append(f"matte_result_img-{i}.jpg")
         print("frame " + i + "/" + str(n_frame) + ": done;")
+    vid_bg = create_video(with_bg_result_frames, fps)
+    vid_green = create_video(with_green_result_frames, fps)
+    vid_matte = create_video(with_matte_result_frames, fps)
     print("finished !")
+    return vid_bg, vid_green, vid_matte
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("MAM demo", add_help=True)
     with block:
         gr.Markdown(
         """
+        # Matting Anything in Video Demo
+        Welcome to the Matting Anything in Video demo and upload your video to get started <br/>
+        You may open usage details below to understand how to use this demo.
         ## Usage
         <details>
+        You may upload a video to start, for the moment we only support 1 prompt type to get the alpha matte of the target:
         **text**: Send text prompt to identify the target instance in the `Text prompt` box.
+        We also only support 1 background type to support image composition with the alpha matte output:
         **generated_by_text**: Send background text prompt to create a background image with stable diffusion model in the `Background prompt` box.
         </details>
         """)
                 trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1)
                 #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
                 #task_type = "text"
+                text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle", info="Describe the subject visible in your video that you want to matte")
                 #background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
                 background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
                 #gallery = gr.Gallery(
                 #    label="Generated images", show_label=True, elem_id="gallery"
                 #).style(preview=True, grid=3, object_fit="scale-down")
+                vid_bg_out = gr.Video(label="Video with background")
+                vid_green_out = gr.Video(label="Video green screen")
+                vid_matte_out = gr.Video(label="Video matte")
         run_button.click(fn=infer, inputs=[
+                        video_in, trim_in, text_prompt, background_prompt], outputs=[vid_bg_out, vid_green_out, vid_matte_out])
+    block.queue(size=12).launch(debug=args.debug, share=args.share, show_error=True)
     #block.queue(concurrency_count=100)
     #block.launch(server_name='0.0.0.0', server_port=args.port, debug=args.debug, share=args.share)