Update pipeline/video_pipeline.py
Browse files- pipeline/video_pipeline.py +8 -24
pipeline/video_pipeline.py
CHANGED
|
@@ -32,6 +32,14 @@
|
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
logging.basicConfig(level=logging.INFO)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# --- T4 GPU Optimizations ---
|
| 36 |
def setup_t4_environment():
|
| 37 |
"""Configure PyTorch and CUDA for Tesla T4"""
|
|
@@ -68,7 +76,6 @@ class VRAMAdaptiveController:
|
|
| 68 |
def __init__(self):
|
| 69 |
self.memory_window = 96
|
| 70 |
self.cleanup_every = 20
|
| 71 |
-
|
| 72 |
def adapt(self):
|
| 73 |
"""Adjust parameters based on current VRAM availability"""
|
| 74 |
if not torch.cuda.is_available():
|
|
@@ -138,12 +145,10 @@ def generate_first_frame_mask(video_path, predictor):
|
|
| 138 |
cap.release()
|
| 139 |
if not ret:
|
| 140 |
raise ValueError("Failed to read video frame")
|
| 141 |
-
|
| 142 |
h, w = frame.shape[:2]
|
| 143 |
if max(h, w) > 1080:
|
| 144 |
scale = 1080 / max(h, w)
|
| 145 |
frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
|
| 146 |
-
|
| 147 |
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
|
| 148 |
predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 149 |
masks, _, _ = predictor.predict(
|
|
@@ -160,11 +165,9 @@ def smooth_alpha_video(alpha_path, output_path, window_size=5):
|
|
| 160 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 161 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 162 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 163 |
-
|
| 164 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 165 |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
|
| 166 |
frame_buffer = deque(maxlen=window_size)
|
| 167 |
-
|
| 168 |
while True:
|
| 169 |
ret, frame = cap.read()
|
| 170 |
if not ret:
|
|
@@ -174,7 +177,6 @@ def smooth_alpha_video(alpha_path, output_path, window_size=5):
|
|
| 174 |
frame_buffer.append(frame.astype(np.float32))
|
| 175 |
smoothed = np.mean(frame_buffer, axis=0).astype(np.uint8)
|
| 176 |
out.write(smoothed)
|
| 177 |
-
|
| 178 |
cap.release()
|
| 179 |
out.release()
|
| 180 |
return output_path
|
|
@@ -195,7 +197,6 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
|
|
| 195 |
output_path
|
| 196 |
]
|
| 197 |
subprocess.run(cmd, check=True, capture_output=True)
|
| 198 |
-
|
| 199 |
# Verify alpha channel
|
| 200 |
cap = cv2.VideoCapture(output_path)
|
| 201 |
ret, frame = cap.read()
|
|
@@ -203,7 +204,6 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
|
|
| 203 |
logger.info(f"FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
|
| 204 |
cap.release()
|
| 205 |
return output_path
|
| 206 |
-
|
| 207 |
except Exception as e:
|
| 208 |
logger.error(f"FFmpeg MOV creation failed: {e}")
|
| 209 |
return None
|
|
@@ -214,28 +214,23 @@ def stage1_create_transparent_video(input_file):
|
|
| 214 |
logger.info("Stage 1: Creating transparent video")
|
| 215 |
heartbeat_flag = {"running": True}
|
| 216 |
threading.Thread(target=heartbeat_monitor, args=(heartbeat_flag,), daemon=True).start()
|
| 217 |
-
|
| 218 |
try:
|
| 219 |
# Load models
|
| 220 |
sam2_predictor = load_sam2_predictor()
|
| 221 |
matanyone_processor = load_matanyone_processor()
|
| 222 |
if not sam2_predictor or not matanyone_processor:
|
| 223 |
raise RuntimeError("Failed to load models")
|
| 224 |
-
|
| 225 |
# Process video
|
| 226 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 227 |
temp_dir = Path(temp_dir)
|
| 228 |
input_path = _normalize_input(input_file, temp_dir)
|
| 229 |
-
|
| 230 |
# Extract audio from input video
|
| 231 |
audio_path = str(temp_dir / "audio.aac")
|
| 232 |
extract_audio(input_path, audio_path)
|
| 233 |
-
|
| 234 |
# Generate first-frame mask
|
| 235 |
mask = generate_first_frame_mask(input_path, sam2_predictor)
|
| 236 |
mask_path = str(temp_dir / "mask.png")
|
| 237 |
cv2.imwrite(mask_path, mask)
|
| 238 |
-
|
| 239 |
# MatAnyone processing
|
| 240 |
foreground_path, alpha_path = matanyone_processor.process_video(
|
| 241 |
input_path=input_path,
|
|
@@ -243,22 +238,17 @@ def stage1_create_transparent_video(input_file):
|
|
| 243 |
output_path=str(temp_dir),
|
| 244 |
max_size=720
|
| 245 |
)
|
| 246 |
-
|
| 247 |
# Temporal smoothing
|
| 248 |
smoothed_alpha = smooth_alpha_video(alpha_path, str(temp_dir / "alpha_smoothed.mp4"))
|
| 249 |
-
|
| 250 |
# Create transparent MOV
|
| 251 |
transparent_path = create_transparent_mov(foreground_path, smoothed_alpha, temp_dir)
|
| 252 |
if not transparent_path:
|
| 253 |
raise RuntimeError("Transparent MOV creation failed")
|
| 254 |
-
|
| 255 |
# Save to persistent storage
|
| 256 |
persist_path = Path("tmp") / "transparent_video.mov"
|
| 257 |
shutil.copyfile(transparent_path, persist_path)
|
| 258 |
-
|
| 259 |
# Return both transparent video and audio paths for Stage 2
|
| 260 |
return str(persist_path), audio_path
|
| 261 |
-
|
| 262 |
except Exception as e:
|
| 263 |
logger.error(f"Stage 1 failed: {e}", exc_info=True)
|
| 264 |
st.error(f"Stage 1 Error: {str(e)}")
|
|
@@ -278,19 +268,16 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 278 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 279 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 280 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 281 |
-
|
| 282 |
# Prepare background
|
| 283 |
if bg_type == "image":
|
| 284 |
bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
|
| 285 |
else: # color
|
| 286 |
bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
|
| 287 |
bg_resized = cv2.resize(bg_array, (width, height))
|
| 288 |
-
|
| 289 |
# Composite frames (no audio yet)
|
| 290 |
temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
|
| 291 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 292 |
out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
|
| 293 |
-
|
| 294 |
while True:
|
| 295 |
ret, frame = cap.read()
|
| 296 |
if not ret:
|
|
@@ -301,10 +288,8 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 301 |
else:
|
| 302 |
composite = frame # Fallback: no alpha
|
| 303 |
out.write(composite)
|
| 304 |
-
|
| 305 |
cap.release()
|
| 306 |
out.release()
|
| 307 |
-
|
| 308 |
# Mux audio back into the final video
|
| 309 |
final_output_path = str(Path("tmp") / "final_output.mp4")
|
| 310 |
if audio_path and os.path.exists(audio_path):
|
|
@@ -317,7 +302,6 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 317 |
else:
|
| 318 |
logger.warning("No audio found, returning video without audio")
|
| 319 |
return temp_output_path
|
| 320 |
-
|
| 321 |
except Exception as e:
|
| 322 |
logger.error(f"Stage 2 failed: {e}", exc_info=True)
|
| 323 |
st.error(f"Stage 2 Error: {str(e)}")
|
|
|
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
logging.basicConfig(level=logging.INFO)
|
| 34 |
|
| 35 |
+
def check_gpu():
|
| 36 |
+
"""Check if GPU is available and log memory usage."""
|
| 37 |
+
if torch.cuda.is_available():
|
| 38 |
+
logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
|
| 39 |
+
return True
|
| 40 |
+
logger.warning("CUDA is NOT available. Falling back to CPU.")
|
| 41 |
+
return False
|
| 42 |
+
|
| 43 |
# --- T4 GPU Optimizations ---
|
| 44 |
def setup_t4_environment():
|
| 45 |
"""Configure PyTorch and CUDA for Tesla T4"""
|
|
|
|
| 76 |
def __init__(self):
|
| 77 |
self.memory_window = 96
|
| 78 |
self.cleanup_every = 20
|
|
|
|
| 79 |
def adapt(self):
|
| 80 |
"""Adjust parameters based on current VRAM availability"""
|
| 81 |
if not torch.cuda.is_available():
|
|
|
|
| 145 |
cap.release()
|
| 146 |
if not ret:
|
| 147 |
raise ValueError("Failed to read video frame")
|
|
|
|
| 148 |
h, w = frame.shape[:2]
|
| 149 |
if max(h, w) > 1080:
|
| 150 |
scale = 1080 / max(h, w)
|
| 151 |
frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
|
|
|
|
| 152 |
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
|
| 153 |
predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 154 |
masks, _, _ = predictor.predict(
|
|
|
|
| 165 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 166 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 167 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
|
|
| 168 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 169 |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
|
| 170 |
frame_buffer = deque(maxlen=window_size)
|
|
|
|
| 171 |
while True:
|
| 172 |
ret, frame = cap.read()
|
| 173 |
if not ret:
|
|
|
|
| 177 |
frame_buffer.append(frame.astype(np.float32))
|
| 178 |
smoothed = np.mean(frame_buffer, axis=0).astype(np.uint8)
|
| 179 |
out.write(smoothed)
|
|
|
|
| 180 |
cap.release()
|
| 181 |
out.release()
|
| 182 |
return output_path
|
|
|
|
| 197 |
output_path
|
| 198 |
]
|
| 199 |
subprocess.run(cmd, check=True, capture_output=True)
|
|
|
|
| 200 |
# Verify alpha channel
|
| 201 |
cap = cv2.VideoCapture(output_path)
|
| 202 |
ret, frame = cap.read()
|
|
|
|
| 204 |
logger.info(f"FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
|
| 205 |
cap.release()
|
| 206 |
return output_path
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
logger.error(f"FFmpeg MOV creation failed: {e}")
|
| 209 |
return None
|
|
|
|
| 214 |
logger.info("Stage 1: Creating transparent video")
|
| 215 |
heartbeat_flag = {"running": True}
|
| 216 |
threading.Thread(target=heartbeat_monitor, args=(heartbeat_flag,), daemon=True).start()
|
|
|
|
| 217 |
try:
|
| 218 |
# Load models
|
| 219 |
sam2_predictor = load_sam2_predictor()
|
| 220 |
matanyone_processor = load_matanyone_processor()
|
| 221 |
if not sam2_predictor or not matanyone_processor:
|
| 222 |
raise RuntimeError("Failed to load models")
|
|
|
|
| 223 |
# Process video
|
| 224 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 225 |
temp_dir = Path(temp_dir)
|
| 226 |
input_path = _normalize_input(input_file, temp_dir)
|
|
|
|
| 227 |
# Extract audio from input video
|
| 228 |
audio_path = str(temp_dir / "audio.aac")
|
| 229 |
extract_audio(input_path, audio_path)
|
|
|
|
| 230 |
# Generate first-frame mask
|
| 231 |
mask = generate_first_frame_mask(input_path, sam2_predictor)
|
| 232 |
mask_path = str(temp_dir / "mask.png")
|
| 233 |
cv2.imwrite(mask_path, mask)
|
|
|
|
| 234 |
# MatAnyone processing
|
| 235 |
foreground_path, alpha_path = matanyone_processor.process_video(
|
| 236 |
input_path=input_path,
|
|
|
|
| 238 |
output_path=str(temp_dir),
|
| 239 |
max_size=720
|
| 240 |
)
|
|
|
|
| 241 |
# Temporal smoothing
|
| 242 |
smoothed_alpha = smooth_alpha_video(alpha_path, str(temp_dir / "alpha_smoothed.mp4"))
|
|
|
|
| 243 |
# Create transparent MOV
|
| 244 |
transparent_path = create_transparent_mov(foreground_path, smoothed_alpha, temp_dir)
|
| 245 |
if not transparent_path:
|
| 246 |
raise RuntimeError("Transparent MOV creation failed")
|
|
|
|
| 247 |
# Save to persistent storage
|
| 248 |
persist_path = Path("tmp") / "transparent_video.mov"
|
| 249 |
shutil.copyfile(transparent_path, persist_path)
|
|
|
|
| 250 |
# Return both transparent video and audio paths for Stage 2
|
| 251 |
return str(persist_path), audio_path
|
|
|
|
| 252 |
except Exception as e:
|
| 253 |
logger.error(f"Stage 1 failed: {e}", exc_info=True)
|
| 254 |
st.error(f"Stage 1 Error: {str(e)}")
|
|
|
|
| 268 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 269 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 270 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
|
|
| 271 |
# Prepare background
|
| 272 |
if bg_type == "image":
|
| 273 |
bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
|
| 274 |
else: # color
|
| 275 |
bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
|
| 276 |
bg_resized = cv2.resize(bg_array, (width, height))
|
|
|
|
| 277 |
# Composite frames (no audio yet)
|
| 278 |
temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
|
| 279 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 280 |
out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
|
|
|
|
| 281 |
while True:
|
| 282 |
ret, frame = cap.read()
|
| 283 |
if not ret:
|
|
|
|
| 288 |
else:
|
| 289 |
composite = frame # Fallback: no alpha
|
| 290 |
out.write(composite)
|
|
|
|
| 291 |
cap.release()
|
| 292 |
out.release()
|
|
|
|
| 293 |
# Mux audio back into the final video
|
| 294 |
final_output_path = str(Path("tmp") / "final_output.mp4")
|
| 295 |
if audio_path and os.path.exists(audio_path):
|
|
|
|
| 302 |
else:
|
| 303 |
logger.warning("No audio found, returning video without audio")
|
| 304 |
return temp_output_path
|
|
|
|
| 305 |
except Exception as e:
|
| 306 |
logger.error(f"Stage 2 failed: {e}", exc_info=True)
|
| 307 |
st.error(f"Stage 2 Error: {str(e)}")
|