Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
#2
by
linoyts
HF Staff
- opened
app.py
CHANGED
|
@@ -11,6 +11,9 @@ from torchvision import transforms
|
|
| 11 |
import random
|
| 12 |
from controlnet_aux import CannyDetector
|
| 13 |
from image_gen_aux import DepthPreprocessor
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
dtype = torch.bfloat16
|
| 16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -24,6 +27,11 @@ pipeline.vae.enable_tiling()
|
|
| 24 |
canny_processor = CannyDetector()
|
| 25 |
depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
CONTROL_LORAS = {
|
| 28 |
"canny": {
|
| 29 |
"repo": "Lightricks/LTX-Video-ICLoRA-canny-13b-0.9.7",
|
|
@@ -41,6 +49,7 @@ CONTROL_LORAS = {
|
|
| 41 |
"adapter_name": "pose_lora"
|
| 42 |
}
|
| 43 |
}
|
|
|
|
| 44 |
@spaces.GPU()
|
| 45 |
def read_video(video) -> torch.Tensor:
|
| 46 |
"""
|
|
@@ -110,20 +119,56 @@ def process_video_for_depth(video):
|
|
| 110 |
Process video for depth control.
|
| 111 |
"""
|
| 112 |
print("Processing video for depth control...")
|
| 113 |
-
|
| 114 |
for frame in video:
|
| 115 |
-
|
| 116 |
-
return
|
| 117 |
|
|
|
|
| 118 |
def process_video_for_pose(video):
|
| 119 |
"""
|
| 120 |
-
Process video for pose control.
|
| 121 |
-
|
| 122 |
-
TODO: Implement pose estimation processing
|
| 123 |
"""
|
| 124 |
print("Processing video for pose control...")
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def process_video_for_control(video, control_type):
|
| 129 |
"""Process video based on the selected control type"""
|
|
|
|
| 11 |
import random
|
| 12 |
from controlnet_aux import CannyDetector
|
| 13 |
from image_gen_aux import DepthPreprocessor
|
| 14 |
+
import mediapipe as mp
|
| 15 |
+
from PIL import Image
|
| 16 |
+
import cv2
|
| 17 |
|
| 18 |
dtype = torch.bfloat16
|
| 19 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 27 |
canny_processor = CannyDetector()
|
| 28 |
depth_processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
|
| 29 |
|
| 30 |
+
# Initialize MediaPipe pose estimation
|
| 31 |
+
mp_drawing = mp.solutions.drawing_utils
|
| 32 |
+
mp_drawing_styles = mp.solutions.drawing_styles
|
| 33 |
+
mp_pose = mp.solutions.pose
|
| 34 |
+
|
| 35 |
CONTROL_LORAS = {
|
| 36 |
"canny": {
|
| 37 |
"repo": "Lightricks/LTX-Video-ICLoRA-canny-13b-0.9.7",
|
|
|
|
| 49 |
"adapter_name": "pose_lora"
|
| 50 |
}
|
| 51 |
}
|
| 52 |
+
|
| 53 |
@spaces.GPU()
|
| 54 |
def read_video(video) -> torch.Tensor:
|
| 55 |
"""
|
|
|
|
| 119 |
Process video for depth control.
|
| 120 |
"""
|
| 121 |
print("Processing video for depth control...")
|
| 122 |
+
depth_video = []
|
| 123 |
for frame in video:
|
| 124 |
+
depth_video.append(depth_processor(frame)[0].convert("RGB"))
|
| 125 |
+
return depth_video
|
| 126 |
|
| 127 |
+
@spaces.GPU()
|
| 128 |
def process_video_for_pose(video):
|
| 129 |
"""
|
| 130 |
+
Process video for pose control using MediaPipe pose estimation.
|
| 131 |
+
Returns video frames with pose landmarks drawn on black background.
|
|
|
|
| 132 |
"""
|
| 133 |
print("Processing video for pose control...")
|
| 134 |
+
pose_video = []
|
| 135 |
+
|
| 136 |
+
with mp_pose.Pose(
|
| 137 |
+
static_image_mode=True,
|
| 138 |
+
model_complexity=1,
|
| 139 |
+
enable_segmentation=False,
|
| 140 |
+
min_detection_confidence=0.5,
|
| 141 |
+
min_tracking_confidence=0.5
|
| 142 |
+
) as pose:
|
| 143 |
+
|
| 144 |
+
for frame in video:
|
| 145 |
+
# Convert PIL image to numpy array
|
| 146 |
+
frame_np = np.array(frame)
|
| 147 |
+
|
| 148 |
+
# Convert RGB to BGR for MediaPipe
|
| 149 |
+
frame_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
|
| 150 |
+
|
| 151 |
+
# Process the frame
|
| 152 |
+
results = pose.process(frame_bgr)
|
| 153 |
+
|
| 154 |
+
# Create black background with same dimensions
|
| 155 |
+
pose_frame = np.zeros_like(frame_np)
|
| 156 |
+
|
| 157 |
+
# Draw pose landmarks if detected
|
| 158 |
+
if results.pose_landmarks:
|
| 159 |
+
mp_drawing.draw_landmarks(
|
| 160 |
+
pose_frame,
|
| 161 |
+
results.pose_landmarks,
|
| 162 |
+
mp_pose.POSE_CONNECTIONS,
|
| 163 |
+
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style(),
|
| 164 |
+
connection_drawing_spec=mp_drawing_styles.get_default_pose_connections_style()
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Convert back to PIL Image
|
| 168 |
+
pose_pil = Image.fromarray(pose_frame)
|
| 169 |
+
pose_video.append(pose_pil)
|
| 170 |
+
|
| 171 |
+
return pose_video
|
| 172 |
|
| 173 |
def process_video_for_control(video, control_type):
|
| 174 |
"""Process video based on the selected control type"""
|