Spaces:

MySafeCode
/

depthmap

Paused

App Files Files Community

depthmap / processor.py

MySafeCode

Upload 6 files

e0a1fd2 verified 17 days ago

raw

history blame

4.3 kB

	import torch
	import torch.nn.functional as F
	import cv2
	import numpy as np
	from PIL import Image
	from pathlib import Path
	import asyncio
	from concurrent.futures import ThreadPoolExecutor
	import gc

	class VideoProcessor:
	def __init__(self):
	# Use CPU if no GPU
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {self.device}")

	# Load MiDaS (small model for speed)
	self.model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
	self.model.to(self.device)
	self.model.eval()

	# Load transforms
	midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
	self.transform = midas_transforms.small_transform

	self.executor = ThreadPoolExecutor(max_workers=1)

	def hex_to_rgb(self, hex_color: str):
	"""Convert hex to RGB"""
	hex_color = hex_color.lstrip('#')
	return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))

	async def process_video(self, input_path: str, threshold: float,
	bg_color: str, session_id: str) -> str:
	"""Process video asynchronously"""
	loop = asyncio.get_event_loop()
	output_path = str(Path("/tmp") / f"{session_id}_output.mp4")

	# Run in thread pool
	await loop.run_in_executor(
	self.executor,
	self._process_video_sync,
	input_path, output_path, threshold, bg_color
	)

	return output_path

	def _process_video_sync(self, input_path: str, output_path: str,
	threshold: float, bg_color: str):
	"""Synchronous video processing"""
	cap = cv2.VideoCapture(input_path)
	fps = int(cap.get(cv2.CAP_PROP_FPS))
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Output video
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	bg_rgb = self.hex_to_rgb(bg_color)
	frame_count = 0

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Process frame
	processed = self.process_frame(frame, threshold, bg_rgb)
	out.write(processed)

	frame_count += 1
	if frame_count % 30 == 0:
	print(f"Progress: {frame_count}/{total_frames}")

	# Clear cache occasionally
	if frame_count % 100 == 0:
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	cap.release()
	out.release()

	def process_frame(self, frame: np.ndarray, threshold: float,
	bg_color: tuple) -> np.ndarray:
	"""Process a single frame"""
	# Resize for speed
	h, w = frame.shape[:2]
	new_h, new_w = 256, int(256 * w / h)

	frame_small = cv2.resize(frame, (new_w, new_h))
	frame_rgb = cv2.cvtColor(frame_small, cv2.COLOR_BGR2RGB)

	# Get depth map
	img = Image.fromarray(frame_rgb)
	input_batch = self.transform(img).to(self.device)

	with torch.no_grad():
	depth = self.model(input_batch)
	depth = F.interpolate(
	depth.unsqueeze(1),
	size=(new_h, new_w),
	mode="bicubic",
	align_corners=False
	).squeeze().cpu().numpy()

	# Normalize depth
	depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)

	# Create mask and resize to original
	mask = (depth_norm > threshold).astype(np.uint8) * 255
	mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
	mask = mask.astype(bool)

	# Apply background
	result = frame.copy()
	result[~mask] = bg_color

	return result