Spaces:
Paused
Paused
File size: 4,300 Bytes
e0a1fd2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | import torch
import torch.nn.functional as F
import cv2
import numpy as np
from PIL import Image
from pathlib import Path
import asyncio
from concurrent.futures import ThreadPoolExecutor
import gc
class VideoProcessor:
def __init__(self):
# Use CPU if no GPU
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
# Load MiDaS (small model for speed)
self.model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
self.model.to(self.device)
self.model.eval()
# Load transforms
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
self.transform = midas_transforms.small_transform
self.executor = ThreadPoolExecutor(max_workers=1)
def hex_to_rgb(self, hex_color: str):
"""Convert hex to RGB"""
hex_color = hex_color.lstrip('#')
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
async def process_video(self, input_path: str, threshold: float,
bg_color: str, session_id: str) -> str:
"""Process video asynchronously"""
loop = asyncio.get_event_loop()
output_path = str(Path("/tmp") / f"{session_id}_output.mp4")
# Run in thread pool
await loop.run_in_executor(
self.executor,
self._process_video_sync,
input_path, output_path, threshold, bg_color
)
return output_path
def _process_video_sync(self, input_path: str, output_path: str,
threshold: float, bg_color: str):
"""Synchronous video processing"""
cap = cv2.VideoCapture(input_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
bg_rgb = self.hex_to_rgb(bg_color)
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process frame
processed = self.process_frame(frame, threshold, bg_rgb)
out.write(processed)
frame_count += 1
if frame_count % 30 == 0:
print(f"Progress: {frame_count}/{total_frames}")
# Clear cache occasionally
if frame_count % 100 == 0:
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
cap.release()
out.release()
def process_frame(self, frame: np.ndarray, threshold: float,
bg_color: tuple) -> np.ndarray:
"""Process a single frame"""
# Resize for speed
h, w = frame.shape[:2]
new_h, new_w = 256, int(256 * w / h)
frame_small = cv2.resize(frame, (new_w, new_h))
frame_rgb = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)
# Get depth map
img = Image.fromarray(frame_rgb)
input_batch = self.transform(img).to(self.device)
with torch.no_grad():
depth = self.model(input_batch)
depth = F.interpolate(
depth.unsqueeze(1),
size=(new_h, new_w),
mode="bicubic",
align_corners=False
).squeeze().cpu().numpy()
# Normalize depth
depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
# Create mask and resize to original
mask = (depth_norm > threshold).astype(np.uint8) * 255
mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
mask = mask.astype(bool)
# Apply background
result = frame.copy()
result[~mask] = bg_color
return result |