File size: 14,437 Bytes
f3d0a26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import gradio as gr
import numpy as np
from PIL import Image
from visualizer import draw_box_on_frame, create_comparison_strip
from preview import preview_trajectory
from pipeline_adapter import (
    extract_first_frame,
    load_all_frames,
    run_pipeline_motion_edit,
    run_pipeline_insertion        # ← need to add this
)


def build_interface():

    # Load Qwen-Image-Edit once at startup (not per-click β€” model is ~20GB)
    _qwen_edit_pipe = None
    try:
        from frame_editor import load_qwen_image_edit
        _qwen_edit_pipe = load_qwen_image_edit(use_lightning=True, device="cuda")
        print("Qwen-Image-Edit ready.")
    except Exception as e:
        print(f"Qwen-Image-Edit not available: {e}")

    with gr.Blocks(title="TRACE Prototype", theme=gr.themes.Soft()) as demo:

        gr.Markdown("# TRACE Prototype β€” Object Motion Editing")

        with gr.Tabs():

            # ── Tab 1: Motion Edit (existing) ─────────────────────────
            # with gr.Tab("Motion Path Edit"):
            #     gr.Markdown(
            #         "Move an **existing object** in the video "
            #         "to a new trajectory."
            #     )

            #     with gr.Row():
            #         with gr.Column():
            #             video_input_edit = gr.Video(label="Input Video")
            #             video_info_edit  = gr.Markdown("")

            #         with gr.Column():
            #             first_frame_edit = gr.Image(
            #                 label="First Frame + Trajectory Preview",
            #                 interactive=False
            #             )

            #     gr.Markdown("**Start Box** β€” draw around the object")
            #     with gr.Row():
            #         sx1 = gr.Number(label="x1", value=100, precision=0)
            #         sy1 = gr.Number(label="y1", value=100, precision=0)
            #         sx2 = gr.Number(label="x2", value=200, precision=0)
            #         sy2 = gr.Number(label="y2", value=200, precision=0)

            #     gr.Markdown("**End Box** β€” where you want it to go")
            #     with gr.Row():
            #         ex1 = gr.Number(label="x1", value=500, precision=0)
            #         ey1 = gr.Number(label="y1", value=200, precision=0)
            #         ex2 = gr.Number(label="x2", value=600, precision=0)
            #         ey2 = gr.Number(label="y2", value=300, precision=0)

            #     prompt_edit = gr.Textbox(
            #         label="Scene Description",
            #         placeholder="a dog running in a park..."
            #     )

            #     with gr.Row():
            #         stage1_method = gr.Radio(
            #             choices=["linear", "cotracker"],
            #             value="linear",
            #             label="Stage 1 Method"
            #         )
            #         use_vace_edit = gr.Checkbox(
            #             label="Use VACE",
            #             value=False
            #         )

            #     run_edit_btn = gr.Button("Run Motion Edit", variant="primary")

            #     with gr.Row():
            #         output_video_edit = gr.Video(label="Output Video")
            #         metrics_edit      = gr.Markdown("")

            #     comparison_edit = gr.Image(label="Frame Comparison", interactive=False)

            # ── Tab 2: Object Insertion (NEW β€” uses Qwen) ─────────────
            with gr.Tab("Object Insertion"):
                gr.Markdown(
                    "Insert a **new object** into the video using "
                    "Qwen to edit the first frame, then propagate."
                )

                with gr.Row():
                    with gr.Column():
                        video_input_ins = gr.Video(label="Input Video")
                        video_info_ins  = gr.Markdown("")

                    with gr.Column():
                        first_frame_ins = gr.Image(
                            label="First Frame Preview",
                            interactive=False
                        )

                gr.Markdown("**Insertion Box** β€” where to place the new object")
                with gr.Row():
                    ix1 = gr.Number(label="x1", value=40, precision=0)
                    iy1 = gr.Number(label="y1", value=40, precision=0)
                    ix2 = gr.Number(label="x2", value=300, precision=0)
                    iy2 = gr.Number(label="y2", value=300, precision=0)

                gr.Markdown("**End Box** β€” where the object should arrive")
                with gr.Row():
                    iex1 = gr.Number(label="x1", value=500, precision=0)
                    iey1 = gr.Number(label="y1", value=200, precision=0)
                    iex2 = gr.Number(label="x2", value=600, precision=0)
                    iey2 = gr.Number(label="y2", value=300, precision=0)

                # ── The Qwen-specific inputs ───────────────────────────
                gr.Markdown("**Object Description** β€” what Qwen will insert")
                with gr.Row():
                    with gr.Column():
                        object_description = gr.Textbox(
                            label="Object to Insert (Qwen prompt)",
                            placeholder="a red helium balloon with a white string",
                            info="Qwen uses this to paint the object into frame 1"
                        )
                        scene_prompt = gr.Textbox(
                            label="Full Scene Prompt (for video synthesis)",
                            placeholder="a peaceful park scene with a red balloon"
                        )

                    with gr.Column():
                        gr.Markdown("Using **Qwen-Image-Edit-2511** for object insertion")

                        # use_vace_ins = gr.Checkbox(
                        #     label="Use VACE",
                        #     value=False
                        # )

                # ── Qwen output preview before running video ───────────
                gr.Markdown("**Step 1 Preview** β€” see Qwen's edit before running video")
                preview_qwen_btn = gr.Button(
                    "Preview First Frame Edit",
                    variant="secondary"
                )
                edited_frame_preview = gr.Image(
                    label="Qwen-Edited First Frame",
                    interactive=False
                )
                qwen_status = gr.Markdown("")

                # gr.Markdown("---")
                # run_ins_btn = gr.Button(
                #     "Run Full Insertion Pipeline",
                #     variant="primary"
                # )

                # with gr.Row():
                #     output_video_ins = gr.Video(label="Output Video")
                #     metrics_ins      = gr.Markdown("")

                # comparison_ins = gr.Image(
                #     label="Frame Comparison",
                #     interactive=False
                # )

        # ── Wire Up Tab 1 ─────────────────────────────────────────────
        #_state = {"frames": None, "first_frame": None}
        

        # def on_video_upload_edit(video_path):
        #     if video_path is None:
        #         return None, "Upload a video."
        #     first_frame = extract_first_frame(video_path)
        #     _state["first_frame"] = first_frame
        #     return Image.fromarray(first_frame), "Video loaded."

        # def on_boxes_changed_edit(sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2):
        #     if _state["first_frame"] is None:
        #         return None
        #     from preview import preview_trajectory
        #     preview = preview_trajectory(
        #         _state["first_frame"],
        #         [sx1, sy1, sx2, sy2],
        #         [ex1, ey1, ex2, ey2]
        #     )
        #     return Image.fromarray(preview)

        # video_input_edit.change(
        #     fn=on_video_upload_edit,
        #     inputs=[video_input_edit],
        #     outputs=[first_frame_edit, video_info_edit]
        # )

        # for inp in [sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2]:
        #     inp.change(
        #         fn=on_boxes_changed_edit,
        #         inputs=[sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2],
        #         outputs=[first_frame_edit]
        #     )
        
        # def on_run_edit(video_path, sx1, sy1, sx2, sy2, ex1, ey1, ex2, ey2,
        #                 prompt, stage1_method, use_vace, progress=gr.Progress()):
        #     if video_path is None:
        #         raise gr.Error("Please upload a video first.")
        #     if sx2 <= sx1 or sy2 <= sy1:
        #         raise gr.Error("Start box is invalid: x2 must be > x1, y2 must be > y1")
        #     if ex2 <= ex1 or ey2 <= ey1:
        #         raise gr.Error("End box is invalid: x2 must be > x1, y2 must be > y1")

        #     def prog(frac, msg):
        #         progress(frac, desc=msg)

        #     output_path, result_frames, pred_boxes, metrics = \
        #         run_pipeline_motion_edit(
        #             video_path=video_path,
        #             start_box=[sx1, sy1, sx2, sy2],
        #             end_box=[ex1, ey1, ex2, ey2],
        #             prompt=prompt,
        #             stage1_method=stage1_method,
        #             use_vace=use_vace,
        #             progress_callback=prog
        #         )

        #     if _state["frames"] is None:
        #         _state["frames"] = load_all_frames(video_path)

        #     comparison = create_comparison_strip(
        #         _state["frames"],
        #         result_frames,
        #         pred_boxes,
        #         sample_ts=[0, 20, 40, 60, 80]
        #     )

        #     return output_path, Image.fromarray(comparison), metrics


        # run_edit_btn.click(
        #     fn=on_run_edit,
        #     inputs=[
        #         video_input_edit,
        #         sx1, sy1, sx2, sy2,
        #         ex1, ey1, ex2, ey2,
        #         prompt_edit, stage1_method, use_vace_edit
        #     ],
        #     outputs=[output_video_edit, comparison_edit, metrics_edit]
        # )

        # ── Wire Up Tab 2 (Qwen insertion) ────────────────────────────
        _ins_state = {"first_frame": None, "edited_frame": None}
        

        def on_video_upload_ins(video_path):
            if video_path is None:
                return None, "Upload a video."
            first_frame = extract_first_frame(video_path)
            _ins_state["first_frame"] = first_frame
            return Image.fromarray(first_frame), "Video loaded."

        def on_preview_qwen(
            video_path,
            ix1, iy1, ix2, iy2,
            object_description,
            progress=gr.Progress()
        ):
            if _ins_state["first_frame"] is None:
                raise gr.Error("Upload a video first.")
            if not object_description.strip():
                raise gr.Error("Enter an object description.")
            if _qwen_edit_pipe is None:
                raise gr.Error("Qwen-Image-Edit failed to load at startup. Check logs.")

            insertion_box = [ix1, iy1, ix2, iy2]

            progress(0.3, "Editing first frame with Qwen-Image-Edit...")
            from frame_editor import insert_object_qwen_edit
            edited = insert_object_qwen_edit(
                first_frame=_ins_state["first_frame"],
                box=insertion_box,
                object_description=object_description,
                pipe=_qwen_edit_pipe,
            )

            _ins_state["edited_frame"] = edited

            preview = draw_box_on_frame(
                edited,
                insertion_box,
                color=(255, 220, 0),
                label="inserted here"
            )

            progress(1.0, "Done!")
            return (
                Image.fromarray(preview),
                "First frame edited."
            )


        def on_run_insertion(
            video_path,
            ix1, iy1, ix2, iy2,
            iex1, iey1, iex2, iey2,
            scene_prompt,
            use_vace_ins,
            progress=gr.Progress()
        ):
            if _ins_state["edited_frame"] is None:
                raise gr.Error(
                    "Run 'Preview First Frame Edit' first β€” "
                    "the edited frame is needed as appearance reference."
                )

            output_path, result_frames, pred_boxes, metrics = \
                run_pipeline_insertion(
                    video_path=video_path,
                    edited_first_frame=_ins_state["edited_frame"],
                    start_box=[ix1, iy1, ix2, iy2],
                    end_box=[iex1, iey1, iex2, iey2],
                    prompt=scene_prompt,
                    use_vace=use_vace_ins,
                    progress_callback=lambda f, m: progress(f, desc=m)
                )

            frames = load_all_frames(video_path)
            comparison = create_comparison_strip(
                frames, result_frames, pred_boxes
            )

            return (
                output_path,
                Image.fromarray(comparison),
                metrics
            )

        video_input_ins.change(
            fn=on_video_upload_ins,
            inputs=[video_input_ins],
            outputs=[first_frame_ins, video_info_ins]
        )

        preview_qwen_btn.click(
            fn=on_preview_qwen,
            inputs=[
                video_input_ins,
                ix1, iy1, ix2, iy2,
                object_description,
            ],
            outputs=[edited_frame_preview, qwen_status]
        )

        # run_ins_btn.click(
        #     fn=on_run_insertion,
        #     inputs=[
        #         video_input_ins,
        #         ix1, iy1, ix2, iy2,
        #         iex1, iey1, iex2, iey2,
        #         scene_prompt,
        #         use_vace_ins
        #     ],
        #     outputs=[output_video_ins, comparison_ins, metrics_ins]
        # )

    return demo


if __name__ == "__main__":
    demo = build_interface()
    demo.launch(share=True)