Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -126,9 +126,9 @@ _depth_processor = None
|
|
| 126 |
def _get_pose_processor():
|
| 127 |
global _pose_processor
|
| 128 |
if _pose_processor is None:
|
| 129 |
-
from controlnet_aux import
|
| 130 |
-
_pose_processor =
|
| 131 |
-
print("[Preprocess]
|
| 132 |
return _pose_processor
|
| 133 |
|
| 134 |
|
|
@@ -170,14 +170,14 @@ def extract_first_frame(video_path: str) -> str:
|
|
| 170 |
|
| 171 |
|
| 172 |
def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
|
| 173 |
-
"""Extract
|
| 174 |
processor = _get_pose_processor()
|
| 175 |
result = []
|
| 176 |
for frame in frames:
|
| 177 |
pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
|
| 178 |
-
pose_img = processor(pil,
|
| 179 |
if not isinstance(pose_img, Image.Image):
|
| 180 |
-
pose_img = Image.fromarray(pose_img.astype(np.uint8))
|
| 181 |
pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
|
| 182 |
result.append(np.array(pose_img).astype(np.float32) / 255.0)
|
| 183 |
return result
|
|
@@ -241,7 +241,7 @@ def preprocess_conditioning_video(
|
|
| 241 |
Image.fromarray(frames[0]).save(first_png)
|
| 242 |
|
| 243 |
# Process based on mode
|
| 244 |
-
if mode == "Pose (
|
| 245 |
processed = preprocess_video_pose(frames, width, height)
|
| 246 |
elif mode == "Canny Edge":
|
| 247 |
processed = preprocess_video_canny(frames, width, height)
|
|
@@ -713,7 +713,7 @@ pipeline = LTX23UnifiedPipeline(
|
|
| 713 |
distilled_checkpoint_path=checkpoint_path,
|
| 714 |
spatial_upsampler_path=spatial_upsampler_path,
|
| 715 |
gemma_root=gemma_root,
|
| 716 |
-
|
| 717 |
quantization=QuantizationPolicy.fp8_cast(),
|
| 718 |
)
|
| 719 |
|
|
@@ -1013,12 +1013,12 @@ with gr.Blocks(title="LTX-2.3 Unified: V2V + I2V + A2V") as demo:
|
|
| 1013 |
video_preprocess = gr.Dropdown(
|
| 1014 |
label="Video Preprocessing",
|
| 1015 |
choices=[
|
| 1016 |
-
"Pose (
|
| 1017 |
"Canny Edge",
|
| 1018 |
"Depth (MiDaS)",
|
| 1019 |
"Raw (no preprocessing)",
|
| 1020 |
],
|
| 1021 |
-
value="Pose (
|
| 1022 |
info="Strips appearance from video → style comes from image/prompt instead",
|
| 1023 |
)
|
| 1024 |
input_audio = gr.Audio(
|
|
|
|
| 126 |
def _get_pose_processor():
|
| 127 |
global _pose_processor
|
| 128 |
if _pose_processor is None:
|
| 129 |
+
from controlnet_aux import OpenposeDetector
|
| 130 |
+
_pose_processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
|
| 131 |
+
print("[Preprocess] OpenPose processor loaded")
|
| 132 |
return _pose_processor
|
| 133 |
|
| 134 |
|
|
|
|
| 170 |
|
| 171 |
|
| 172 |
def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
|
| 173 |
+
"""Extract OpenPose skeletons from each frame. Returns float [0,1] frames."""
|
| 174 |
processor = _get_pose_processor()
|
| 175 |
result = []
|
| 176 |
for frame in frames:
|
| 177 |
pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
|
| 178 |
+
pose_img = processor(pil, hand_and_face=True)
|
| 179 |
if not isinstance(pose_img, Image.Image):
|
| 180 |
+
pose_img = Image.fromarray(np.array(pose_img).astype(np.uint8))
|
| 181 |
pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
|
| 182 |
result.append(np.array(pose_img).astype(np.float32) / 255.0)
|
| 183 |
return result
|
|
|
|
| 241 |
Image.fromarray(frames[0]).save(first_png)
|
| 242 |
|
| 243 |
# Process based on mode
|
| 244 |
+
if mode == "Pose (OpenPose)":
|
| 245 |
processed = preprocess_video_pose(frames, width, height)
|
| 246 |
elif mode == "Canny Edge":
|
| 247 |
processed = preprocess_video_canny(frames, width, height)
|
|
|
|
| 713 |
distilled_checkpoint_path=checkpoint_path,
|
| 714 |
spatial_upsampler_path=spatial_upsampler_path,
|
| 715 |
gemma_root=gemma_root,
|
| 716 |
+
ic_loras=ic_loras,
|
| 717 |
quantization=QuantizationPolicy.fp8_cast(),
|
| 718 |
)
|
| 719 |
|
|
|
|
| 1013 |
video_preprocess = gr.Dropdown(
|
| 1014 |
label="Video Preprocessing",
|
| 1015 |
choices=[
|
| 1016 |
+
"Pose (OpenPose)",
|
| 1017 |
"Canny Edge",
|
| 1018 |
"Depth (MiDaS)",
|
| 1019 |
"Raw (no preprocessing)",
|
| 1020 |
],
|
| 1021 |
+
value="Pose (OpenPose)",
|
| 1022 |
info="Strips appearance from video → style comes from image/prompt instead",
|
| 1023 |
)
|
| 1024 |
input_audio = gr.Audio(
|