abreza commited on
Commit
9b7c995
·
1 Parent(s): 0d0c2b2
Files changed (1) hide show
  1. app.py +45 -16
app.py CHANGED
@@ -397,18 +397,22 @@ def process_video(video_path, camera_movement, generate_ttm=True, progress=gr.Pr
397
  # --- GRADIO INTERFACE ---
398
  with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as demo:
399
  gr.Markdown("# 🎬 Video to Point Cloud & TTM Wan Generator")
 
400
 
401
- # Shared state for TTM files
402
- first_frame_file = gr.State()
403
- motion_signal_file = gr.State()
404
- mask_file = gr.State()
405
 
406
  with gr.Row():
407
  with gr.Column(scale=1):
408
  gr.Markdown("### 1. Tracking & Viewpoint")
409
  video_input = gr.Video(label="Upload Video")
410
  camera_movement = gr.Dropdown(
411
- choices=CAMERA_MOVEMENTS, value="static", label="Camera Movement")
 
 
 
412
  generate_btn = gr.Button(
413
  "🚀 1. Run Spatial Tracker", variant="primary")
414
 
@@ -418,7 +422,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as
418
  with gr.Column(scale=1):
419
  gr.Markdown("### 2. Time-to-Move (Wan 2.2)")
420
  ttm_prompt = gr.Textbox(
421
- label="Prompt", placeholder="Describe the scene (e.g., 'A monkey walking in the forest, high quality')")
 
 
422
 
423
  with gr.Row():
424
  tweak_idx = gr.Number(
@@ -431,30 +437,53 @@ with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as
431
  wan_output_video = gr.Video(label="Final High-Quality TTM Video")
432
  wan_status = gr.Markdown("Awaiting 3D inputs...")
433
 
 
434
  with gr.Accordion("Debug: TTM Intermediate Inputs", open=False):
435
  with gr.Row():
 
 
436
  motion_signal_output = gr.Video(label="motion_signal.mp4")
437
  mask_output = gr.Video(label="mask.mp4")
438
- first_frame_output = gr.Image(label="first_frame.png")
439
 
440
- # Event Handlers
 
 
441
  generate_btn.click(
442
  fn=process_video,
443
  inputs=[video_input, camera_movement],
444
- outputs=[output_video, motion_signal_output,
445
- mask_output, first_frame_output, status_text]
 
 
 
 
 
446
  ).then(
447
- # Link output files to state for the next step
448
- fn=lambda a, b, c, d: (b, c, d),
449
- inputs=[output_video, motion_signal_output,
450
- mask_output, first_frame_output],
 
 
 
 
 
 
451
  outputs=[motion_signal_file, mask_file, first_frame_file]
452
  )
453
 
 
454
  wan_generate_btn.click(
455
  fn=run_wan_ttm_generation,
456
- inputs=[ttm_prompt, tweak_idx, tstrong_idx,
457
- first_frame_file, motion_signal_file, mask_file],
 
 
 
 
 
 
458
  outputs=[wan_output_video, wan_status]
459
  )
460
 
 
397
  # --- GRADIO INTERFACE ---
398
  with gr.Blocks(theme=gr.themes.Soft(), title="🎬 TTM Wan Video Generator") as demo:
399
  gr.Markdown("# 🎬 Video to Point Cloud & TTM Wan Generator")
400
+ gr.Markdown("Transform standard videos into 3D-aware motion signals for Time-to-Move (TTM) generation.")
401
 
402
+ # Shared state for TTM files - initialized as empty strings
403
+ first_frame_file = gr.State("")
404
+ motion_signal_file = gr.State("")
405
+ mask_file = gr.State("")
406
 
407
  with gr.Row():
408
  with gr.Column(scale=1):
409
  gr.Markdown("### 1. Tracking & Viewpoint")
410
  video_input = gr.Video(label="Upload Video")
411
  camera_movement = gr.Dropdown(
412
+ choices=CAMERA_MOVEMENTS,
413
+ value="static",
414
+ label="Camera Movement"
415
+ )
416
  generate_btn = gr.Button(
417
  "🚀 1. Run Spatial Tracker", variant="primary")
418
 
 
422
  with gr.Column(scale=1):
423
  gr.Markdown("### 2. Time-to-Move (Wan 2.2)")
424
  ttm_prompt = gr.Textbox(
425
+ label="Prompt",
426
+ placeholder="Describe the scene (e.g., 'A monkey walking in the forest, high quality')"
427
+ )
428
 
429
  with gr.Row():
430
  tweak_idx = gr.Number(
 
437
  wan_output_video = gr.Video(label="Final High-Quality TTM Video")
438
  wan_status = gr.Markdown("Awaiting 3D inputs...")
439
 
440
+ # The Accordion provides a visual check of what TTM is using
441
  with gr.Accordion("Debug: TTM Intermediate Inputs", open=False):
442
  with gr.Row():
443
+ # IMPORTANT: type="filepath" prevents the ValueError by passing
444
+ # the path string instead of the raw pixel array.
445
  motion_signal_output = gr.Video(label="motion_signal.mp4")
446
  mask_output = gr.Video(label="mask.mp4")
447
+ first_frame_output = gr.Image(label="first_frame.png", type="filepath")
448
 
449
+ # --- Event Handlers ---
450
+
451
+ # 1. Process 3D Tracking and save results to temporary local files
452
  generate_btn.click(
453
  fn=process_video,
454
  inputs=[video_input, camera_movement],
455
+ outputs=[
456
+ output_video,
457
+ motion_signal_output,
458
+ mask_output,
459
+ first_frame_output,
460
+ status_text
461
+ ]
462
  ).then(
463
+ # 2. Update the State variables with the file paths from the previous step.
464
+ # We ignore the 'output_video' (index 0) and 'status_text' (index 4).
465
+ fn=lambda a, b, c, d, e: (b, c, d),
466
+ inputs=[
467
+ output_video,
468
+ motion_signal_output,
469
+ mask_output,
470
+ first_frame_output,
471
+ status_text
472
+ ],
473
  outputs=[motion_signal_file, mask_file, first_frame_file]
474
  )
475
 
476
+ # 3. Use the stored paths to run the Wan 2.2 TTM Dual-Clock Denoising loop
477
  wan_generate_btn.click(
478
  fn=run_wan_ttm_generation,
479
+ inputs=[
480
+ ttm_prompt,
481
+ tweak_idx,
482
+ tstrong_idx,
483
+ first_frame_file,
484
+ motion_signal_file,
485
+ mask_file
486
+ ],
487
  outputs=[wan_output_video, wan_status]
488
  )
489