import cv2 import torch import os import numpy as np from torchvision import transforms def process_video(video_path, model, output_path, device): transform = transforms.Compose([ transforms.ToTensor() ]) cap = cv2.VideoCapture(str(video_path)) if not cap.isOpened(): raise IOError(f"Cannot open video: {video_path}") fourcc = cv2.VideoWriter_fourcc(*'XVID') width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) frame_skip = 2 frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break resized_frame = cv2.resize(frame, (640, 480)) if frame_count % frame_skip == 0: rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB) img_tensor = transform(rgb_frame).to(device) with torch.no_grad(): prediction = model([img_tensor])[0] for box, score, label in zip(prediction["boxes"], prediction["scores"], prediction["labels"]): if score > 0.5: x1, y1, x2, y2 = map(int, box) cv2.rectangle(resized_frame, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(resized_frame, f"{label.item()}:{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) output_frame = cv2.resize(resized_frame, (width, height)) out.write(output_frame) frame_count += 1 cap.release() out.release() return output_path