LTX-2-3-sync

Paused

App Files Files Community

linoyts HF Staff commited on Mar 10

Commit

35a2452

verified ·

1 Parent(s): 9981aff

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -126,9 +126,9 @@ _depth_processor = None
 def _get_pose_processor():
     global _pose_processor
     if _pose_processor is None:
-        from controlnet_aux import DWposeDetector
-        _pose_processor = DWposeDetector.from_pretrained_default()
-        print("[Preprocess] DWPose processor loaded")
     return _pose_processor
@@ -170,14 +170,14 @@ def extract_first_frame(video_path: str) -> str:
 def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
-    """Extract DWPose skeletons from each frame. Returns float [0,1] frames."""
     processor = _get_pose_processor()
     result = []
     for frame in frames:
         pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
-        pose_img = processor(pil, include_body=True, include_hand=True, include_face=True)
         if not isinstance(pose_img, Image.Image):
-            pose_img = Image.fromarray(pose_img.astype(np.uint8))
         pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
         result.append(np.array(pose_img).astype(np.float32) / 255.0)
     return result
@@ -241,7 +241,7 @@ def preprocess_conditioning_video(
     Image.fromarray(frames[0]).save(first_png)
     # Process based on mode
-    if mode == "Pose (DWPose)":
         processed = preprocess_video_pose(frames, width, height)
     elif mode == "Canny Edge":
         processed = preprocess_video_canny(frames, width, height)
@@ -713,7 +713,7 @@ pipeline = LTX23UnifiedPipeline(
     distilled_checkpoint_path=checkpoint_path,
     spatial_upsampler_path=spatial_upsampler_path,
     gemma_root=gemma_root,
-    # ic_loras=ic_loras,
     quantization=QuantizationPolicy.fp8_cast(),
 )
@@ -1013,12 +1013,12 @@ with gr.Blocks(title="LTX-2.3 Unified: V2V + I2V + A2V") as demo:
                     video_preprocess = gr.Dropdown(
                         label="Video Preprocessing",
                         choices=[
-                            "Pose (DWPose)",
                             "Canny Edge",
                             "Depth (MiDaS)",
                             "Raw (no preprocessing)",
                         ],
-                        value="Pose (DWPose)",
                         info="Strips appearance from video → style comes from image/prompt instead",
                     )
             input_audio = gr.Audio(

 def _get_pose_processor():
     global _pose_processor
     if _pose_processor is None:
+        from controlnet_aux import OpenposeDetector
+        _pose_processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
+        print("[Preprocess] OpenPose processor loaded")
     return _pose_processor
 def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
+    """Extract OpenPose skeletons from each frame. Returns float [0,1] frames."""
     processor = _get_pose_processor()
     result = []
     for frame in frames:
         pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
+        pose_img = processor(pil, hand_and_face=True)
         if not isinstance(pose_img, Image.Image):
+            pose_img = Image.fromarray(np.array(pose_img).astype(np.uint8))
         pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
         result.append(np.array(pose_img).astype(np.float32) / 255.0)
     return result
     Image.fromarray(frames[0]).save(first_png)
     # Process based on mode
+    if mode == "Pose (OpenPose)":
         processed = preprocess_video_pose(frames, width, height)
     elif mode == "Canny Edge":
         processed = preprocess_video_canny(frames, width, height)
     distilled_checkpoint_path=checkpoint_path,
     spatial_upsampler_path=spatial_upsampler_path,
     gemma_root=gemma_root,
+    ic_loras=ic_loras,
     quantization=QuantizationPolicy.fp8_cast(),
 )
                     video_preprocess = gr.Dropdown(
                         label="Video Preprocessing",
                         choices=[
+                            "Pose (OpenPose)",
                             "Canny Edge",
                             "Depth (MiDaS)",
                             "Raw (no preprocessing)",
                         ],
+                        value="Pose (OpenPose)",
                         info="Strips appearance from video → style comes from image/prompt instead",
                     )
             input_audio = gr.Audio(