Spaces:

itsluckysharma01
/

NETRA-AI_Video_Surveillance_Web_Application

Runtime error

App Files Files Community

NETRA-AI_Video_Surveillance_Web_Application / src /pipeline /video_capture.py

itsluckysharma01

Upload 22 files

0a84654 verified 14 days ago

raw

history blame contribute delete

16.1 kB


	"""
	Video Capture Module for AI Processing
	Captures frames from webcam or RTSP stream and preprocesses them for YOLOv8
	Uses motion detection (MOG2) to extract ROIs for faster inference
	"""

	import cv2
	import numpy as np
	import platform
	from typing import Optional, Tuple, Generator, List
	from dataclasses import dataclass


	@dataclass
	class ROI:
	"""Region of Interest containing motion."""
	x: int
	y: int
	width: int
	height: int
	cropped_frame: np.ndarray # Original cropped region
	preprocessed: np.ndarray # Resized to 640x640 for YOLO


	class MotionDetector:
	"""Detects motion using MOG2 background subtraction."""

	def __init__(self,
	history: int = 500,
	var_threshold: float = 16,
	detect_shadows: bool = True,
	min_contour_area: int = 500,
	merge_distance: int = 50):
	"""
	Initialize MOG2 background subtractor.

	Args:
	history: Number of frames for background model
	var_threshold: Variance threshold for background/foreground segmentation
	detect_shadows: Whether to detect shadows (marks them gray vs white)
	min_contour_area: Minimum area (pixels) to consider as valid motion
	merge_distance: Distance to merge nearby contours into single ROI
	"""
	self.bg_subtractor = cv2.createBackgroundSubtractorMOG2(
	history=history,
	varThreshold=var_threshold,
	detectShadows=detect_shadows
	)
	self.min_contour_area = min_contour_area
	self.merge_distance = merge_distance
	self.kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))

	def get_foreground_mask(self, frame: np.ndarray) -> np.ndarray:
	"""
	Get binary mask of moving objects.

	Args:
	frame: Input BGR frame

	Returns:
	Binary mask where white = motion
	"""
	# Apply background subtraction
	fg_mask = self.bg_subtractor.apply(frame)

	# Remove shadows (gray pixels become black)
	_, fg_mask = cv2.threshold(fg_mask, 250, 255, cv2.THRESH_BINARY)

	# Morphological operations to clean up noise
	fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_OPEN, self.kernel)
	fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_CLOSE, self.kernel)
	fg_mask = cv2.dilate(fg_mask, self.kernel, iterations=2)

	return fg_mask

	def _merge_bounding_boxes(self, boxes: List[Tuple[int, int, int, int]]) -> List[Tuple[int, int, int, int]]:
	"""Merge nearby bounding boxes to reduce fragmentation."""
	if not boxes:
	return []

	merged = []
	used = [False] * len(boxes)

	for i, (x1, y1, w1, h1) in enumerate(boxes):
	if used[i]:
	continue

	# Start with current box
	min_x, min_y = x1, y1
	max_x, max_y = x1 + w1, y1 + h1
	used[i] = True

	# Find and merge nearby boxes
	changed = True
	while changed:
	changed = False
	for j, (x2, y2, w2, h2) in enumerate(boxes):
	if used[j]:
	continue

	# Check if boxes are close enough to merge
	if (x2 < max_x + self.merge_distance and
	x2 + w2 > min_x - self.merge_distance and
	y2 < max_y + self.merge_distance and
	y2 + h2 > min_y - self.merge_distance):

	min_x = min(min_x, x2)
	min_y = min(min_y, y2)
	max_x = max(max_x, x2 + w2)
	max_y = max(max_y, y2 + h2)
	used[j] = True
	changed = True

	merged.append((min_x, min_y, max_x - min_x, max_y - min_y))

	return merged

	def detect_motion_regions(self, frame: np.ndarray,
	padding: int = 20) -> List[Tuple[int, int, int, int]]:
	"""
	Detect regions with motion.

	Args:
	frame: Input BGR frame
	padding: Pixels to add around detected regions

	Returns:
	List of bounding boxes (x, y, width, height)
	"""
	fg_mask = self.get_foreground_mask(frame)

	# Find contours of moving objects
	contours, _ = cv2.findContours(fg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	boxes = []
	h, w = frame.shape[:2]

	for contour in contours:
	area = cv2.contourArea(contour)
	if area < self.min_contour_area:
	continue

	x, y, bw, bh = cv2.boundingRect(contour)

	# Add padding and clamp to frame bounds
	x = max(0, x - padding)
	y = max(0, y - padding)
	bw = min(w - x, bw + 2 * padding)
	bh = min(h - y, bh + 2 * padding)

	boxes.append((x, y, bw, bh))

	# Merge nearby boxes
	return self._merge_bounding_boxes(boxes)


	class VideoCapture:
	"""Captures and preprocesses video frames for AI inference."""

	# YOLOv8 native input size
	TARGET_SIZE = (640, 640)

	def __init__(self, source: int \| str = 0, use_motion_detection: bool = True):
	"""
	Initialize video capture.

	Args:
	source: Camera index (0 for default webcam) or RTSP URL string
	Example RTSP: "rtsp://username:password@ip_address:port/stream"
	use_motion_detection: Enable MOG2 motion detection for ROI extraction
	"""
	self.source = self._normalize_source(source)
	self.cap: Optional[cv2.VideoCapture] = None
	self.use_motion_detection = use_motion_detection
	self.motion_detector: Optional[MotionDetector] = None
	self.active_source: Optional[int \| str] = None
	self.active_backend: Optional[int] = None

	@staticmethod
	def _normalize_source(source: int \| str) -> int \| str:
	"""Normalize source values so numeric strings map to camera indices."""
	if isinstance(source, str) and source.strip().isdigit():
	return int(source.strip())
	return source

	@staticmethod
	def _backend_name(backend: int) -> str:
	"""Get a readable backend name for diagnostics."""
	names = {
	cv2.CAP_ANY: "CAP_ANY",
	cv2.CAP_DSHOW: "CAP_DSHOW",
	cv2.CAP_MSMF: "CAP_MSMF",
	}
	return names.get(backend, str(backend))

	def _source_candidates(self) -> List[int \| str]:
	"""Return source candidates to try opening in order."""
	if isinstance(self.source, int):
	candidates = [self.source]
	if self.source == 0:
	candidates.extend([1, 2])
	return candidates
	return [self.source]

	def _backend_candidates(self) -> List[int]:
	"""Return backend candidates based on platform and source type."""
	if isinstance(self.source, str):
	return [cv2.CAP_ANY]

	if platform.system().lower().startswith("win"):
	return [cv2.CAP_DSHOW, cv2.CAP_MSMF, cv2.CAP_ANY]

	return [cv2.CAP_ANY]

	def start(self, verbose: bool = True) -> bool:
	"""
	Start the video capture.

	Returns:
	True if capture started successfully, False otherwise
	"""
	self.stop()

	open_attempts = []
	for source_candidate in self._source_candidates():
	for backend in self._backend_candidates():
	cap = cv2.VideoCapture(source_candidate, backend)
	open_attempts.append(f"{source_candidate} via {self._backend_name(backend)}")

	if cap.isOpened():
	self.cap = cap
	self.active_source = source_candidate
	self.active_backend = backend
	break

	cap.release()

	if self.cap is not None:
	break

	if self.cap is None:
	if verbose:
	print(f"Error: Could not open video source: {self.source}")
	print("Tried:")
	for attempt in open_attempts:
	print(f" - {attempt}")
	return False

	# Set buffer size to minimize latency (useful for RTSP streams)
	self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)

	# Initialize motion detector if enabled
	if self.use_motion_detection:
	self.motion_detector = MotionDetector()
	if verbose:
	print("Motion detection enabled (MOG2)")

	# Print capture info
	width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = self.cap.get(cv2.CAP_PROP_FPS)
	if verbose:
	backend_name = self._backend_name(self.active_backend) if self.active_backend is not None else "Unknown"
	print(f"Video capture started: source={self.active_source}, backend={backend_name}")
	print(f"Resolution: {width}x{height} @ {fps:.1f} FPS")

	return True

	def stop(self):
	"""Release the video capture resources."""
	if self.cap is not None:
	self.cap.release()
	self.cap = None
	self.active_source = None
	self.active_backend = None
	print("Video capture stopped")

	def preprocess_frame(self, frame: np.ndarray) -> np.ndarray:
	"""
	Preprocess frame for YOLOv8 inference.

	Args:
	frame: Raw BGR frame from camera

	Returns:
	Preprocessed frame resized to 640x640
	"""
	# Resize to YOLOv8 native size (640x640)
	resized = cv2.resize(frame, self.TARGET_SIZE, interpolation=cv2.INTER_LINEAR)
	return resized

	def read_frame(self) -> Tuple[bool, Optional[np.ndarray], Optional[np.ndarray]]:
	"""
	Read and preprocess a single frame.

	Returns:
	Tuple of (success, original_frame, preprocessed_frame)
	"""
	if self.cap is None:
	return False, None, None

	ret, frame = self.cap.read()

	if not ret or frame is None:
	return False, None, None

	preprocessed = self.preprocess_frame(frame)
	return True, frame, preprocessed

	def extract_rois(self, frame: np.ndarray) -> List[ROI]:
	"""
	Extract regions of interest (moving objects) from frame.

	Args:
	frame: Input BGR frame

	Returns:
	List of ROI objects containing cropped and preprocessed regions
	"""
	if self.motion_detector is None:
	# If no motion detection, return whole frame as single ROI
	preprocessed = self.preprocess_frame(frame)
	return [ROI(0, 0, frame.shape[1], frame.shape[0], frame, preprocessed)]

	boxes = self.motion_detector.detect_motion_regions(frame)

	if not boxes:
	return []

	rois = []
	for x, y, w, h in boxes:
	cropped = frame[y:y+h, x:x+w]
	preprocessed = cv2.resize(cropped, self.TARGET_SIZE, interpolation=cv2.INTER_LINEAR)
	rois.append(ROI(x, y, w, h, cropped, preprocessed))

	return rois

	def read_frame_with_rois(self) -> Tuple[bool, Optional[np.ndarray], List[ROI], Optional[np.ndarray]]:
	"""
	Read frame and extract ROIs for motion regions.

	Returns:
	Tuple of (success, original_frame, list_of_rois, foreground_mask)
	"""
	if self.cap is None:
	return False, None, [], None

	ret, frame = self.cap.read()

	if not ret or frame is None:
	return False, None, [], None

	rois = self.extract_rois(frame)

	# Get foreground mask for visualization
	fg_mask = None
	if self.motion_detector is not None:
	fg_mask = self.motion_detector.get_foreground_mask(frame)

	return True, frame, rois, fg_mask

	def stream_frames(self) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]:
	"""
	Generator that continuously yields frames (no motion detection).

	Yields:
	Tuple of (original_frame, preprocessed_frame)
	"""
	while True:
	success, original, preprocessed = self.read_frame()
	if not success:
	break
	yield original, preprocessed

	def stream_rois(self) -> Generator[Tuple[np.ndarray, List[ROI], Optional[np.ndarray]], None, None]:
	"""
	Generator that yields frames with motion-detected ROIs.

	Yields:
	Tuple of (original_frame, list_of_rois, foreground_mask)
	"""
	while True:
	success, original, rois, fg_mask = self.read_frame_with_rois()
	if not success:
	break
	yield original, rois, fg_mask

	def __enter__(self):
	"""Context manager entry."""
	if not self.start():
	raise RuntimeError(f"Could not open video source: {self.source}")
	return self

	def __exit__(self, exc_type, exc_val, exc_tb):
	"""Context manager exit."""
	self.stop()


	def main():
	"""Demo: Capture frames with motion detection from webcam."""

	# Use 0 for default webcam, or provide RTSP URL for IP camera
	# Example RTSP: "rtsp://admin:password@192.168.1.100:554/stream1"
	source = 0

	with VideoCapture(source, use_motion_detection=True) as capture:
	print("Press 'q' to quit")
	print("Motion detection active - only moving regions will be processed")

	for original, rois, fg_mask in capture.stream_rois():
	# Draw bounding boxes around motion regions
	display_frame = original.copy()

	for i, roi in enumerate(rois):
	# Draw green rectangle around ROI
	cv2.rectangle(display_frame,
	(roi.x, roi.y),
	(roi.x + roi.width, roi.y + roi.height),
	(0, 255, 0), 2)

	# Label with ROI index and size
	label = f"ROI {i+1}: {roi.width}x{roi.height}"
	cv2.putText(display_frame, label, (roi.x, roi.y - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

	# Show info
	info = f"Motion ROIs: {len(rois)} \| Press 'q' to quit"
	cv2.putText(display_frame, info, (10, 30),
	cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

	cv2.imshow("Video Capture - Motion Detection", display_frame)

	# Show foreground mask
	if fg_mask is not None:
	cv2.imshow("Foreground Mask", fg_mask)

	if cv2.waitKey(1) & 0xFF == ord('q'):
	break

	cv2.destroyAllWindows()
	print("Done!")


	if __name__ == "__main__":
	main()