import gradio as gr import numpy as np from PIL import Image from visualizer import draw_box_on_frame, create_comparison_strip from preview import preview_trajectory from pipeline_adapter import ( extract_first_frame, load_all_frames, run_pipeline_motion_edit, run_pipeline_insertion # ← need to add this ) def build_interface(): # Load Qwen-Image-Edit once at startup (not per-click — model is ~20GB) _qwen_edit_pipe = None try: from frame_editor import load_qwen_image_edit _qwen_edit_pipe = load_qwen_image_edit(use_lightning=True, device="cuda") print("Qwen-Image-Edit ready.") except Exception as e: print(f"Qwen-Image-Edit not available: {e}") with gr.Blocks(title="TRACE Prototype", theme=gr.themes.Soft()) as demo: gr.Markdown("# TRACE Prototype — Object Motion Editing") with gr.Tabs(): # ── Tab 1: Motion Edit (existing) ───────────────────────── # with gr.Tab("Motion Path Edit"): # gr.Markdown( # "Move an **existing object** in the video " # "to a new trajectory." # ) # with gr.Row(): # with gr.Column(): # video_input_edit = gr.Video(label="Input Video") # video_info_edit = gr.Markdown("") # with gr.Column(): # first_frame_edit = gr.Image( # label="First Frame + Trajectory Preview", # interactive=False # ) # gr.Markdown("**Start Box** — draw around the object") # with gr.Row(): # sx1 = gr.Number(label="x1", value=100, precision=0) # sy1 = gr.Number(label="y1", value=100, precision=0) # sx2 = gr.Number(label="x2", value=200, precision=0) # sy2 = gr.Number(label="y2", value=200, precision=0) # gr.Markdown("**End Box** — where you want it to go") # with gr.Row(): # ex1 = gr.Number(label="x1", value=500, precision=0) # ey1 = gr.Number(label="y1", value=200, precision=0) # ex2 = gr.Number(label="x2", value=600, precision=0) # ey2 = gr.Number(label="y2", value=300, precision=0) # prompt_edit = gr.Textbox( # label="Scene Description", # placeholder="a dog running in a park..." # ) # with gr.Row(): # stage1_method = gr.Radio( # choices=["linear", "cotracker"], # value="linear", # label="Stage 1 Method" # ) # use_vace_edit = gr.Checkbox( # label="Use VACE", # value=False # ) # run_edit_btn = gr.Button("Run Motion Edit", variant="primary") # with gr.Row(): # output_video_edit = gr.Video(label="Output Video") # metrics_edit = gr.Markdown("") # comparison_edit = gr.Image(label="Frame Comparison", interactive=False) # ── Tab 2: Object Insertion (NEW — uses Qwen) ───────────── with gr.Tab("Object Insertion"): gr.Markdown( "Insert a **new object** into the video using " "Qwen to edit the first frame, then propagate." ) with gr.Row(): with gr.Column(): video_input_ins = gr.Video(label="Input Video") video_info_ins = gr.Markdown("") with gr.Column(): first_frame_ins = gr.Image( label="First Frame Preview", interactive=False ) gr.Markdown("**Insertion Box** — where to place the new object") with gr.Row(): ix1 = gr.Number(label="x1", value=40, precision=0) iy1 = gr.Number(label="y1", value=40, precision=0) ix2 = gr.Number(label="x2", value=300, precision=0) iy2 = gr.Number(label="y2", value=300, precision=0) gr.Markdown("**End Box** — where the object should arrive") with gr.Row(): iex1 = gr.Number(label="x1", value=500, precision=0) iey1 = gr.Number(label="y1", value=200, precision=0) iex2 = gr.Number(label="x2", value=600, precision=0) iey2 = gr.Number(label="y2", value=300, precision=0) # ── The Qwen-specific inputs ─────────────────────────── gr.Markdown("**Object Description** — what Qwen will insert") with gr.Row(): with gr.Column(): object_description = gr.Textbox( label="Object to Insert (Qwen prompt)", placeholder="a red helium balloon with a white string", info="Qwen uses this to paint the object into frame 1" ) scene_prompt = gr.Textbox( label="Full Scene Prompt (for video synthesis)", placeholder="a peaceful park scene with a red balloon" ) with gr.Column(): gr.Markdown("Using **Qwen-Image-Edit-2511** for object insertion") # use_vace_ins = gr.Checkbox( # label="Use VACE", # value=False # ) # ── Qwen output preview before running video ─────────── gr.Markdown("**Step 1 Preview** — see Qwen's edit before running video") preview_qwen_btn = gr.Button( "Preview First Frame Edit", variant="secondary" ) edited_frame_preview = gr.Image( label="Qwen-Edited First Frame", interactive=False ) qwen_status = gr.Markdown("") # gr.Markdown("---") # run_ins_btn = gr.Button( # "Run Full Insertion Pipeline", # variant="primary" # ) # with gr.Row(): # output_video_ins = gr.Video(label="Output Video") # metrics_ins = gr.Markdown("") # comparison_ins = gr.Image( # label="Frame Comparison", # interactive=False # ) # ── Wire Up Tab 1 ───────────────────────────────────────────── #_state = {"frames": None, "first_frame": None} # def on_video_upload_edit(video_path): # if video_path is None: # return None, "Upload a video." # first_frame = extract_first_frame(video_path) # _state["first_frame"] = first_frame # return Image.fromarray(first_frame), "Video loaded." # def on_boxes_changed_edit(sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2): # if _state["first_frame"] is None: # return None # from preview import preview_trajectory # preview = preview_trajectory( # _state["first_frame"], # [sx1, sy1, sx2, sy2], # [ex1, ey1, ex2, ey2] # ) # return Image.fromarray(preview) # video_input_edit.change( # fn=on_video_upload_edit, # inputs=[video_input_edit], # outputs=[first_frame_edit, video_info_edit] # ) # for inp in [sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2]: # inp.change( # fn=on_boxes_changed_edit, # inputs=[sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2], # outputs=[first_frame_edit] # ) # def on_run_edit(video_path, sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2, # prompt, stage1_method, use_vace, progress=gr.Progress()): # if video_path is None: # raise gr.Error("Please upload a video first.") # if sx2 <= sx1 or sy2 <= sy1: # raise gr.Error("Start box is invalid: x2 must be > x1, y2 must be > y1") # if ex2 <= ex1 or ey2 <= ey1: # raise gr.Error("End box is invalid: x2 must be > x1, y2 must be > y1") # def prog(frac, msg): # progress(frac, desc=msg) # output_path, result_frames, pred_boxes, metrics = \ # run_pipeline_motion_edit( # video_path=video_path, # start_box=[sx1, sy1, sx2, sy2], # end_box=[ex1, ey1, ex2, ey2], # prompt=prompt, # stage1_method=stage1_method, # use_vace=use_vace, # progress_callback=prog # ) # if _state["frames"] is None: # _state["frames"] = load_all_frames(video_path) # comparison = create_comparison_strip( # _state["frames"], # result_frames, # pred_boxes, # sample_ts=[0, 20, 40, 60, 80] # ) # return output_path, Image.fromarray(comparison), metrics # run_edit_btn.click( # fn=on_run_edit, # inputs=[ # video_input_edit, # sx1, sy1, sx2, sy2, # ex1, ey1, ex2, ey2, # prompt_edit, stage1_method, use_vace_edit # ], # outputs=[output_video_edit, comparison_edit, metrics_edit] # ) # ── Wire Up Tab 2 (Qwen insertion) ──────────────────────────── _ins_state = {"first_frame": None, "edited_frame": None} def on_video_upload_ins(video_path): if video_path is None: return None, "Upload a video." first_frame = extract_first_frame(video_path) _ins_state["first_frame"] = first_frame return Image.fromarray(first_frame), "Video loaded." def on_preview_qwen( video_path, ix1, iy1, ix2, iy2, object_description, progress=gr.Progress() ): if _ins_state["first_frame"] is None: raise gr.Error("Upload a video first.") if not object_description.strip(): raise gr.Error("Enter an object description.") if _qwen_edit_pipe is None: raise gr.Error("Qwen-Image-Edit failed to load at startup. Check logs.") insertion_box = [ix1, iy1, ix2, iy2] progress(0.3, "Editing first frame with Qwen-Image-Edit...") from frame_editor import insert_object_qwen_edit edited = insert_object_qwen_edit( first_frame=_ins_state["first_frame"], box=insertion_box, object_description=object_description, pipe=_qwen_edit_pipe, ) _ins_state["edited_frame"] = edited preview = draw_box_on_frame( edited, insertion_box, color=(255, 220, 0), label="inserted here" ) progress(1.0, "Done!") return ( Image.fromarray(preview), "First frame edited." ) def on_run_insertion( video_path, ix1, iy1, ix2, iy2, iex1, iey1, iex2, iey2, scene_prompt, use_vace_ins, progress=gr.Progress() ): if _ins_state["edited_frame"] is None: raise gr.Error( "Run 'Preview First Frame Edit' first — " "the edited frame is needed as appearance reference." ) output_path, result_frames, pred_boxes, metrics = \ run_pipeline_insertion( video_path=video_path, edited_first_frame=_ins_state["edited_frame"], start_box=[ix1, iy1, ix2, iy2], end_box=[iex1, iey1, iex2, iey2], prompt=scene_prompt, use_vace=use_vace_ins, progress_callback=lambda f, m: progress(f, desc=m) ) frames = load_all_frames(video_path) comparison = create_comparison_strip( frames, result_frames, pred_boxes ) return ( output_path, Image.fromarray(comparison), metrics ) video_input_ins.change( fn=on_video_upload_ins, inputs=[video_input_ins], outputs=[first_frame_ins, video_info_ins] ) preview_qwen_btn.click( fn=on_preview_qwen, inputs=[ video_input_ins, ix1, iy1, ix2, iy2, object_description, ], outputs=[edited_frame_preview, qwen_status] ) # run_ins_btn.click( # fn=on_run_insertion, # inputs=[ # video_input_ins, # ix1, iy1, ix2, iy2, # iex1, iey1, iex2, iey2, # scene_prompt, # use_vace_ins # ], # outputs=[output_video_ins, comparison_ins, metrics_ins] # ) return demo if __name__ == "__main__": demo = build_interface() demo.launch(share=True)