Spaces:

Leema-Krishna
/

ObjectInsertion

Runtime error

File size: 14,437 Bytes

f3d0a26

import gradio as gr
import numpy as np
from PIL import Image
from visualizer import draw_box_on_frame, create_comparison_strip
from preview import preview_trajectory
from pipeline_adapter import (
    extract_first_frame,
    load_all_frames,
    run_pipeline_motion_edit,
    run_pipeline_insertion        # ← need to add this
)


def build_interface():

    # Load Qwen-Image-Edit once at startup (not per-click — model is ~20GB)
    _qwen_edit_pipe = None
    try:
        from frame_editor import load_qwen_image_edit
        _qwen_edit_pipe = load_qwen_image_edit(use_lightning=True, device="cuda")
        print("Qwen-Image-Edit ready.")
    except Exception as e:
        print(f"Qwen-Image-Edit not available: {e}")

    with gr.Blocks(title="TRACE Prototype", theme=gr.themes.Soft()) as demo:

        gr.Markdown("# TRACE Prototype — Object Motion Editing")

        with gr.Tabs():

            # ── Tab 1: Motion Edit (existing) ─────────────────────────
            # with gr.Tab("Motion Path Edit"):
            #     gr.Markdown(
            #         "Move an **existing object** in the video "
            #         "to a new trajectory."
            #     )

            #     with gr.Row():
            #         with gr.Column():
            #             video_input_edit = gr.Video(label="Input Video")
            #             video_info_edit  = gr.Markdown("")

            #         with gr.Column():
            #             first_frame_edit = gr.Image(
            #                 label="First Frame + Trajectory Preview",
            #                 interactive=False
            #             )

            #     gr.Markdown("**Start Box** — draw around the object")
            #     with gr.Row():
            #         sx1 = gr.Number(label="x1", value=100, precision=0)
            #         sy1 = gr.Number(label="y1", value=100, precision=0)
            #         sx2 = gr.Number(label="x2", value=200, precision=0)
            #         sy2 = gr.Number(label="y2", value=200, precision=0)

            #     gr.Markdown("**End Box** — where you want it to go")
            #     with gr.Row():
            #         ex1 = gr.Number(label="x1", value=500, precision=0)
            #         ey1 = gr.Number(label="y1", value=200, precision=0)
            #         ex2 = gr.Number(label="x2", value=600, precision=0)
            #         ey2 = gr.Number(label="y2", value=300, precision=0)

            #     prompt_edit = gr.Textbox(
            #         label="Scene Description",
            #         placeholder="a dog running in a park..."
            #     )

            #     with gr.Row():
            #         stage1_method = gr.Radio(
            #             choices=["linear", "cotracker"],
            #             value="linear",
            #             label="Stage 1 Method"
            #         )
            #         use_vace_edit = gr.Checkbox(
            #             label="Use VACE",
            #             value=False
            #         )

            #     run_edit_btn = gr.Button("Run Motion Edit", variant="primary")

            #     with gr.Row():
            #         output_video_edit = gr.Video(label="Output Video")
            #         metrics_edit      = gr.Markdown("")

            #     comparison_edit = gr.Image(label="Frame Comparison", interactive=False)

            # ── Tab 2: Object Insertion (NEW — uses Qwen) ─────────────
            with gr.Tab("Object Insertion"):
                gr.Markdown(
                    "Insert a **new object** into the video using "
                    "Qwen to edit the first frame, then propagate."
                )

                with gr.Row():
                    with gr.Column():
                        video_input_ins = gr.Video(label="Input Video")
                        video_info_ins  = gr.Markdown("")

                    with gr.Column():
                        first_frame_ins = gr.Image(
                            label="First Frame Preview",
                            interactive=False
                        )

                gr.Markdown("**Insertion Box** — where to place the new object")
                with gr.Row():
                    ix1 = gr.Number(label="x1", value=40, precision=0)
                    iy1 = gr.Number(label="y1", value=40, precision=0)
                    ix2 = gr.Number(label="x2", value=300, precision=0)
                    iy2 = gr.Number(label="y2", value=300, precision=0)

                gr.Markdown("**End Box** — where the object should arrive")
                with gr.Row():
                    iex1 = gr.Number(label="x1", value=500, precision=0)
                    iey1 = gr.Number(label="y1", value=200, precision=0)
                    iex2 = gr.Number(label="x2", value=600, precision=0)
                    iey2 = gr.Number(label="y2", value=300, precision=0)

                # ── The Qwen-specific inputs ───────────────────────────
                gr.Markdown("**Object Description** — what Qwen will insert")
                with gr.Row():
                    with gr.Column():
                        object_description = gr.Textbox(
                            label="Object to Insert (Qwen prompt)",
                            placeholder="a red helium balloon with a white string",
                            info="Qwen uses this to paint the object into frame 1"
                        )
                        scene_prompt = gr.Textbox(
                            label="Full Scene Prompt (for video synthesis)",
                            placeholder="a peaceful park scene with a red balloon"
                        )

                    with gr.Column():
                        gr.Markdown("Using **Qwen-Image-Edit-2511** for object insertion")

                        # use_vace_ins = gr.Checkbox(
                        #     label="Use VACE",
                        #     value=False
                        # )

                # ── Qwen output preview before running video ───────────
                gr.Markdown("**Step 1 Preview** — see Qwen's edit before running video")
                preview_qwen_btn = gr.Button(
                    "Preview First Frame Edit",
                    variant="secondary"
                )
                edited_frame_preview = gr.Image(
                    label="Qwen-Edited First Frame",
                    interactive=False
                )
                qwen_status = gr.Markdown("")

                # gr.Markdown("---")
                # run_ins_btn = gr.Button(
                #     "Run Full Insertion Pipeline",
                #     variant="primary"
                # )

                # with gr.Row():
                #     output_video_ins = gr.Video(label="Output Video")
                #     metrics_ins      = gr.Markdown("")

                # comparison_ins = gr.Image(
                #     label="Frame Comparison",
                #     interactive=False
                # )

        # ── Wire Up Tab 1 ─────────────────────────────────────────────
        #_state = {"frames": None, "first_frame": None}
        

        # def on_video_upload_edit(video_path):
        #     if video_path is None:
        #         return None, "Upload a video."
        #     first_frame = extract_first_frame(video_path)
        #     _state["first_frame"] = first_frame
        #     return Image.fromarray(first_frame), "Video loaded."

        # def on_boxes_changed_edit(sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2):
        #     if _state["first_frame"] is None:
        #         return None
        #     from preview import preview_trajectory
        #     preview = preview_trajectory(
        #         _state["first_frame"],
        #         [sx1, sy1, sx2, sy2],
        #         [ex1, ey1, ex2, ey2]
        #     )
        #     return Image.fromarray(preview)

        # video_input_edit.change(
        #     fn=on_video_upload_edit,
        #     inputs=[video_input_edit],
        #     outputs=[first_frame_edit, video_info_edit]
        # )

        # for inp in [sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2]:
        #     inp.change(
        #         fn=on_boxes_changed_edit,
        #         inputs=[sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2],
        #         outputs=[first_frame_edit]
        #     )
        
        # def on_run_edit(video_path, sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2,
        #                 prompt, stage1_method, use_vace, progress=gr.Progress()):
        #     if video_path is None:
        #         raise gr.Error("Please upload a video first.")
        #     if sx2 <= sx1 or sy2 <= sy1:
        #         raise gr.Error("Start box is invalid: x2 must be > x1, y2 must be > y1")
        #     if ex2 <= ex1 or ey2 <= ey1:
        #         raise gr.Error("End box is invalid: x2 must be > x1, y2 must be > y1")

        #     def prog(frac, msg):
        #         progress(frac, desc=msg)

        #     output_path, result_frames, pred_boxes, metrics = \
        #         run_pipeline_motion_edit(
        #             video_path=video_path,
        #             start_box=[sx1, sy1, sx2, sy2],
        #             end_box=[ex1, ey1, ex2, ey2],
        #             prompt=prompt,
        #             stage1_method=stage1_method,
        #             use_vace=use_vace,
        #             progress_callback=prog
        #         )

        #     if _state["frames"] is None:
        #         _state["frames"] = load_all_frames(video_path)

        #     comparison = create_comparison_strip(
        #         _state["frames"],
        #         result_frames,
        #         pred_boxes,
        #         sample_ts=[0, 20, 40, 60, 80]
        #     )

        #     return output_path, Image.fromarray(comparison), metrics


        # run_edit_btn.click(
        #     fn=on_run_edit,
        #     inputs=[
        #         video_input_edit,
        #         sx1, sy1, sx2, sy2,
        #         ex1, ey1, ex2, ey2,
        #         prompt_edit, stage1_method, use_vace_edit
        #     ],
        #     outputs=[output_video_edit, comparison_edit, metrics_edit]
        # )

        # ── Wire Up Tab 2 (Qwen insertion) ────────────────────────────
        _ins_state = {"first_frame": None, "edited_frame": None}
        

        def on_video_upload_ins(video_path):
            if video_path is None:
                return None, "Upload a video."
            first_frame = extract_first_frame(video_path)
            _ins_state["first_frame"] = first_frame
            return Image.fromarray(first_frame), "Video loaded."

        def on_preview_qwen(
            video_path,
            ix1, iy1, ix2, iy2,
            object_description,
            progress=gr.Progress()
        ):
            if _ins_state["first_frame"] is None:
                raise gr.Error("Upload a video first.")
            if not object_description.strip():
                raise gr.Error("Enter an object description.")
            if _qwen_edit_pipe is None:
                raise gr.Error("Qwen-Image-Edit failed to load at startup. Check logs.")

            insertion_box = [ix1, iy1, ix2, iy2]

            progress(0.3, "Editing first frame with Qwen-Image-Edit...")
            from frame_editor import insert_object_qwen_edit
            edited = insert_object_qwen_edit(
                first_frame=_ins_state["first_frame"],
                box=insertion_box,
                object_description=object_description,
                pipe=_qwen_edit_pipe,
            )

            _ins_state["edited_frame"] = edited

            preview = draw_box_on_frame(
                edited,
                insertion_box,
                color=(255, 220, 0),
                label="inserted here"
            )

            progress(1.0, "Done!")
            return (
                Image.fromarray(preview),
                "First frame edited."
            )


        def on_run_insertion(
            video_path,
            ix1, iy1, ix2, iy2,
            iex1, iey1, iex2, iey2,
            scene_prompt,
            use_vace_ins,
            progress=gr.Progress()
        ):
            if _ins_state["edited_frame"] is None:
                raise gr.Error(
                    "Run 'Preview First Frame Edit' first — "
                    "the edited frame is needed as appearance reference."
                )

            output_path, result_frames, pred_boxes, metrics = \
                run_pipeline_insertion(
                    video_path=video_path,
                    edited_first_frame=_ins_state["edited_frame"],
                    start_box=[ix1, iy1, ix2, iy2],
                    end_box=[iex1, iey1, iex2, iey2],
                    prompt=scene_prompt,
                    use_vace=use_vace_ins,
                    progress_callback=lambda f, m: progress(f, desc=m)
                )

            frames = load_all_frames(video_path)
            comparison = create_comparison_strip(
                frames, result_frames, pred_boxes
            )

            return (
                output_path,
                Image.fromarray(comparison),
                metrics
            )

        video_input_ins.change(
            fn=on_video_upload_ins,
            inputs=[video_input_ins],
            outputs=[first_frame_ins, video_info_ins]
        )

        preview_qwen_btn.click(
            fn=on_preview_qwen,
            inputs=[
                video_input_ins,
                ix1, iy1, ix2, iy2,
                object_description,
            ],
            outputs=[edited_frame_preview, qwen_status]
        )

        # run_ins_btn.click(
        #     fn=on_run_insertion,
        #     inputs=[
        #         video_input_ins,
        #         ix1, iy1, ix2, iy2,
        #         iex1, iey1, iex2, iey2,
        #         scene_prompt,
        #         use_vace_ins
        #     ],
        #     outputs=[output_video_ins, comparison_ins, metrics_ins]
        # )

    return demo


if __name__ == "__main__":
    demo = build_interface()
    demo.launch(share=True)