Spaces:

abreza
/

SpatialTrackerV2_ttm

Sleeping

App Files Files Community

abreza commited on Dec 23, 2025

Commit

2cbde32

1 Parent(s): 7e8e6f1

revert

Browse files

Files changed (1) hide show

app.py +19 -10

app.py CHANGED Viewed

@@ -97,15 +97,23 @@ if not hasattr(vggt4track_model, 'infer'):
 tracker_model = Predictor.from_pretrained("Yuxihenry/SpatialTrackerV2-Offline")
 tracker_model.eval()
-wan_pipeline = WanImageToVideoTTMPipeline.from_pretrained(
-    WAN_MODEL_ID,
-    torch_dtype=torch.bfloat16
-)
-wan_pipeline.vae.enable_tiling()
-wan_pipeline.vae.enable_slicing()
-wan_pipeline.to("cuda")
 print("✅ Tracking models loaded successfully!")
@@ -282,6 +290,7 @@ def run_wan_ttm_generation(prompt, tweak_index, tstrong_index, first_frame_path,
         return None, "❌ TTM Inputs missing. Please run 3D tracking first."
     progress(0, desc="Loading Wan TTM Pipeline...")
     progress(0.2, desc="Preparing inputs...")
     image = load_image(first_frame_path)
@@ -295,8 +304,8 @@ def run_wan_ttm_generation(prompt, tweak_index, tstrong_index, first_frame_path,
     # Match resolution logic from run_wan.py
     max_area = 480 * 832
-    mod_value = wan_pipeline.vae_scale_factor_spatial * \
-        wan_pipeline.transformer.config.patch_size[1]
     height, width = compute_hw_from_area(
         image.height, image.width, max_area, mod_value)
     image = image.resize((width, height))
@@ -305,7 +314,7 @@ def run_wan_ttm_generation(prompt, tweak_index, tstrong_index, first_frame_path,
     generator = torch.Generator(device="cuda").manual_seed(0)
     with torch.inference_mode():
-        result = wan_pipeline(
             image=image,
             prompt=prompt,
             negative_prompt=negative_prompt,

 tracker_model = Predictor.from_pretrained("Yuxihenry/SpatialTrackerV2-Offline")
 tracker_model.eval()
+# Lazy loading for Wan to save memory until needed
+wan_pipeline = None
+def get_wan_pipeline():
+    global wan_pipeline
+    if wan_pipeline is None:
+        print("🚀 Loading Wan TTM Pipeline (14B)...")
+        wan_pipeline = WanImageToVideoTTMPipeline.from_pretrained(
+            WAN_MODEL_ID,
+            torch_dtype=torch.bfloat16
+        )
+        wan_pipeline.vae.enable_tiling()
+        wan_pipeline.vae.enable_slicing()
+        wan_pipeline.to("cuda")
+    return wan_pipeline
 print("✅ Tracking models loaded successfully!")
         return None, "❌ TTM Inputs missing. Please run 3D tracking first."
     progress(0, desc="Loading Wan TTM Pipeline...")
+    pipe = get_wan_pipeline()
     progress(0.2, desc="Preparing inputs...")
     image = load_image(first_frame_path)
     # Match resolution logic from run_wan.py
     max_area = 480 * 832
+    mod_value = pipe.vae_scale_factor_spatial * \
+        pipe.transformer.config.patch_size[1]
     height, width = compute_hw_from_area(
         image.height, image.width, max_area, mod_value)
     image = image.resize((width, height))
     generator = torch.Generator(device="cuda").manual_seed(0)
     with torch.inference_mode():
+        result = pipe(
             image=image,
             prompt=prompt,
             negative_prompt=negative_prompt,