Spaces:

abreza
/

SpatialTrackerV2_ttm

Sleeping

App Files Files Community

abreza commited on Dec 23, 2025

Commit

9b7c995

1 Parent(s): 0d0c2b2

fix

Browse files

Files changed (1) hide show

app.py +45 -16

app.py CHANGED Viewed

@@ -397,18 +397,22 @@ def process_video(video_path, camera_movement, generate_ttm=True, progress=gr.Pr
 # --- GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as demo:
     gr.Markdown("# 🎬 Video to Point Cloud & TTM Wan Generator")
-    # Shared state for TTM files
-    first_frame_file = gr.State()
-    motion_signal_file = gr.State()
-    mask_file = gr.State()
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 1. Tracking & Viewpoint")
             video_input = gr.Video(label="Upload Video")
             camera_movement = gr.Dropdown(
-                choices=CAMERA_MOVEMENTS, value="static", label="Camera Movement")
             generate_btn = gr.Button(
                 "🚀 1. Run Spatial Tracker", variant="primary")
@@ -418,7 +422,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as
         with gr.Column(scale=1):
             gr.Markdown("### 2. Time-to-Move (Wan 2.2)")
             ttm_prompt = gr.Textbox(
-                label="Prompt", placeholder="Describe the scene (e.g., 'A monkey walking in the forest, high quality')")
             with gr.Row():
                 tweak_idx = gr.Number(
@@ -431,30 +437,53 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as
             wan_output_video = gr.Video(label="Final High-Quality TTM Video")
             wan_status = gr.Markdown("Awaiting 3D inputs...")
     with gr.Accordion("Debug: TTM Intermediate Inputs", open=False):
         with gr.Row():
             motion_signal_output = gr.Video(label="motion_signal.mp4")
             mask_output = gr.Video(label="mask.mp4")
-            first_frame_output = gr.Image(label="first_frame.png")
-    # Event Handlers
     generate_btn.click(
         fn=process_video,
         inputs=[video_input, camera_movement],
-        outputs=[output_video, motion_signal_output,
-                 mask_output, first_frame_output, status_text]
     ).then(
-        # Link output files to state for the next step
-        fn=lambda a, b, c, d: (b, c, d),
-        inputs=[output_video, motion_signal_output,
-                mask_output, first_frame_output],
         outputs=[motion_signal_file, mask_file, first_frame_file]
     )
     wan_generate_btn.click(
         fn=run_wan_ttm_generation,
-        inputs=[ttm_prompt, tweak_idx, tstrong_idx,
-                first_frame_file, motion_signal_file, mask_file],
         outputs=[wan_output_video, wan_status]
     )

 # --- GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as demo:
     gr.Markdown("# 🎬 Video to Point Cloud & TTM Wan Generator")
+    gr.Markdown("Transform standard videos into 3D-aware motion signals for Time-to-Move (TTM) generation.")
+    # Shared state for TTM files - initialized as empty strings
+    first_frame_file = gr.State("")
+    motion_signal_file = gr.State("")
+    mask_file = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 1. Tracking & Viewpoint")
             video_input = gr.Video(label="Upload Video")
             camera_movement = gr.Dropdown(
+                choices=CAMERA_MOVEMENTS,
+                value="static",
+                label="Camera Movement"
+            )
             generate_btn = gr.Button(
                 "🚀 1. Run Spatial Tracker", variant="primary")
         with gr.Column(scale=1):
             gr.Markdown("### 2. Time-to-Move (Wan 2.2)")
             ttm_prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Describe the scene (e.g., 'A monkey walking in the forest, high quality')"
+            )
             with gr.Row():
                 tweak_idx = gr.Number(
             wan_output_video = gr.Video(label="Final High-Quality TTM Video")
             wan_status = gr.Markdown("Awaiting 3D inputs...")
+    # The Accordion provides a visual check of what TTM is using
     with gr.Accordion("Debug: TTM Intermediate Inputs", open=False):
         with gr.Row():
+            # IMPORTANT: type="filepath" prevents the ValueError by passing
+            # the path string instead of the raw pixel array.
             motion_signal_output = gr.Video(label="motion_signal.mp4")
             mask_output = gr.Video(label="mask.mp4")
+            first_frame_output = gr.Image(label="first_frame.png", type="filepath")
+    # --- Event Handlers ---
+    # 1. Process 3D Tracking and save results to temporary local files
     generate_btn.click(
         fn=process_video,
         inputs=[video_input, camera_movement],
+        outputs=[
+            output_video,
+            motion_signal_output,
+            mask_output,
+            first_frame_output,
+            status_text
+        ]
     ).then(
+        # 2. Update the State variables with the file paths from the previous step.
+        # We ignore the 'output_video' (index 0) and 'status_text' (index 4).
+        fn=lambda a, b, c, d, e: (b, c, d),
+        inputs=[
+            output_video,
+            motion_signal_output,
+            mask_output,
+            first_frame_output,
+            status_text
+        ],
         outputs=[motion_signal_file, mask_file, first_frame_file]
     )
+    # 3. Use the stored paths to run the Wan 2.2 TTM Dual-Clock Denoising loop
     wan_generate_btn.click(
         fn=run_wan_ttm_generation,
+        inputs=[
+            ttm_prompt,
+            tweak_idx,
+            tstrong_idx,
+            first_frame_file,
+            motion_signal_file,
+            mask_file
+        ],
         outputs=[wan_output_video, wan_status]
     )