linoyts HF Staff commited on
Commit
35a2452
·
verified ·
1 Parent(s): 9981aff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -126,9 +126,9 @@ _depth_processor = None
126
  def _get_pose_processor():
127
  global _pose_processor
128
  if _pose_processor is None:
129
- from controlnet_aux import DWposeDetector
130
- _pose_processor = DWposeDetector.from_pretrained_default()
131
- print("[Preprocess] DWPose processor loaded")
132
  return _pose_processor
133
 
134
 
@@ -170,14 +170,14 @@ def extract_first_frame(video_path: str) -> str:
170
 
171
 
172
  def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
173
- """Extract DWPose skeletons from each frame. Returns float [0,1] frames."""
174
  processor = _get_pose_processor()
175
  result = []
176
  for frame in frames:
177
  pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
178
- pose_img = processor(pil, include_body=True, include_hand=True, include_face=True)
179
  if not isinstance(pose_img, Image.Image):
180
- pose_img = Image.fromarray(pose_img.astype(np.uint8))
181
  pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
182
  result.append(np.array(pose_img).astype(np.float32) / 255.0)
183
  return result
@@ -241,7 +241,7 @@ def preprocess_conditioning_video(
241
  Image.fromarray(frames[0]).save(first_png)
242
 
243
  # Process based on mode
244
- if mode == "Pose (DWPose)":
245
  processed = preprocess_video_pose(frames, width, height)
246
  elif mode == "Canny Edge":
247
  processed = preprocess_video_canny(frames, width, height)
@@ -713,7 +713,7 @@ pipeline = LTX23UnifiedPipeline(
713
  distilled_checkpoint_path=checkpoint_path,
714
  spatial_upsampler_path=spatial_upsampler_path,
715
  gemma_root=gemma_root,
716
- # ic_loras=ic_loras,
717
  quantization=QuantizationPolicy.fp8_cast(),
718
  )
719
 
@@ -1013,12 +1013,12 @@ with gr.Blocks(title="LTX-2.3 Unified: V2V + I2V + A2V") as demo:
1013
  video_preprocess = gr.Dropdown(
1014
  label="Video Preprocessing",
1015
  choices=[
1016
- "Pose (DWPose)",
1017
  "Canny Edge",
1018
  "Depth (MiDaS)",
1019
  "Raw (no preprocessing)",
1020
  ],
1021
- value="Pose (DWPose)",
1022
  info="Strips appearance from video → style comes from image/prompt instead",
1023
  )
1024
  input_audio = gr.Audio(
 
126
  def _get_pose_processor():
127
  global _pose_processor
128
  if _pose_processor is None:
129
+ from controlnet_aux import OpenposeDetector
130
+ _pose_processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
131
+ print("[Preprocess] OpenPose processor loaded")
132
  return _pose_processor
133
 
134
 
 
170
 
171
 
172
  def preprocess_video_pose(frames: list[np.ndarray], width: int, height: int) -> list[np.ndarray]:
173
+ """Extract OpenPose skeletons from each frame. Returns float [0,1] frames."""
174
  processor = _get_pose_processor()
175
  result = []
176
  for frame in frames:
177
  pil = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
178
+ pose_img = processor(pil, hand_and_face=True)
179
  if not isinstance(pose_img, Image.Image):
180
+ pose_img = Image.fromarray(np.array(pose_img).astype(np.uint8))
181
  pose_img = pose_img.convert("RGB").resize((width, height), Image.BILINEAR)
182
  result.append(np.array(pose_img).astype(np.float32) / 255.0)
183
  return result
 
241
  Image.fromarray(frames[0]).save(first_png)
242
 
243
  # Process based on mode
244
+ if mode == "Pose (OpenPose)":
245
  processed = preprocess_video_pose(frames, width, height)
246
  elif mode == "Canny Edge":
247
  processed = preprocess_video_canny(frames, width, height)
 
713
  distilled_checkpoint_path=checkpoint_path,
714
  spatial_upsampler_path=spatial_upsampler_path,
715
  gemma_root=gemma_root,
716
+ ic_loras=ic_loras,
717
  quantization=QuantizationPolicy.fp8_cast(),
718
  )
719
 
 
1013
  video_preprocess = gr.Dropdown(
1014
  label="Video Preprocessing",
1015
  choices=[
1016
+ "Pose (OpenPose)",
1017
  "Canny Edge",
1018
  "Depth (MiDaS)",
1019
  "Raw (no preprocessing)",
1020
  ],
1021
+ value="Pose (OpenPose)",
1022
  info="Strips appearance from video → style comes from image/prompt instead",
1023
  )
1024
  input_audio = gr.Audio(